aboutsummaryrefslogtreecommitdiff
path: root/files/unit_test
diff options
context:
space:
mode:
authorVignesh Venkatasubramanian <vigneshv@google.com>2022-08-02 13:26:21 -0700
committerVignesh Venkatasubramanian <vigneshv@google.com>2022-08-25 13:27:22 -0700
commit51738d52b6c91d4573b75f6483dd045d0affc00b (patch)
treee8cf5f7cdc94a5abfe84a758a4245d7269b65f8f /files/unit_test
parentfe33d276d689446c03db3f1f761797b01aa06e1f (diff)
downloadlibyuv-51738d52b6c91d4573b75f6483dd045d0affc00b.tar.gz
Update external/libyuv to r1837
Update external/libyuv to version r1837 (d53f1bee) from upstream. This brings in some new functions that will be used by libavif. Also update README.version with local modifications. The files/ subdirectory is a pristine copy of the upstream checkout except for the local modifications that are listed in README.version. Bug: b/241008246 Bug: b/228492909 Test: Builds. Media and Camera CTS tests pass. Merged-In: I1bd041e475666e13fafd8c3792d7142a022d435a Change-Id: I85df21181e582ff835aab1e9ef13ad3ac8421d30
Diffstat (limited to 'files/unit_test')
-rw-r--r--files/unit_test/color_test.cc305
-rw-r--r--files/unit_test/compare_test.cc9
-rw-r--r--files/unit_test/convert_test.cc2784
-rw-r--r--files/unit_test/cpu_test.cc143
-rw-r--r--files/unit_test/cpu_thread_test.cc4
-rw-r--r--files/unit_test/math_test.cc5
-rw-r--r--files/unit_test/planar_test.cc1548
-rw-r--r--files/unit_test/rotate_argb_test.cc58
-rw-r--r--files/unit_test/rotate_test.cc253
-rw-r--r--files/unit_test/scale_argb_test.cc167
-rw-r--r--files/unit_test/scale_rgb_test.cc280
-rw-r--r--files/unit_test/scale_test.cc835
-rw-r--r--files/unit_test/scale_uv_test.cc278
-rw-r--r--files/unit_test/testdata/mips.txt7
-rw-r--r--files/unit_test/testdata/mips_loongson2k.txt5
-rw-r--r--files/unit_test/testdata/mips_loongson3.txt10
-rw-r--r--files/unit_test/testdata/mips_loongson_mmi.txt7
-rw-r--r--files/unit_test/testdata/mips_msa.txt7
-rw-r--r--files/unit_test/unit_test.cc194
-rw-r--r--files/unit_test/unit_test.h7
-rw-r--r--files/unit_test/video_common_test.cc11
21 files changed, 5587 insertions, 1330 deletions
diff --git a/files/unit_test/color_test.cc b/files/unit_test/color_test.cc
index 4bb448d5..01267ff1 100644
--- a/files/unit_test/color_test.cc
+++ b/files/unit_test/color_test.cc
@@ -20,20 +20,22 @@
namespace libyuv {
-// TODO(fbarchard): Port high accuracy YUV to RGB to Neon.
-#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define ERROR_R 1
-#define ERROR_G 1
-#define ERROR_B 3
-#define ERROR_FULL 6
-#define ERROR_J420 5
+// TODO(fbarchard): clang x86 has a higher accuracy YUV to RGB.
+// Port to Visual C and other CPUs
+#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || defined(__i386__))
+#define ERROR_FULL 5
+#define ERROR_J420 4
#else
+#define ERROR_FULL 6
+#define ERROR_J420 6
+#endif
#define ERROR_R 1
#define ERROR_G 1
-#define ERROR_B 3
-#define ERROR_FULL 5
-#define ERROR_J420 3
+#ifdef LIBYUV_UNLIMITED_DATA
+#define ERROR_B 1
+#else
+#define ERROR_B 18
#endif
#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF) \
@@ -187,6 +189,104 @@ static void YUVJToRGB(int y, int u, int v, int* r, int* g, int* b) {
*r = orig_pixels[2];
}
+static void YUVHToRGB(int y, int u, int v, int* r, int* g, int* b) {
+ const int kWidth = 16;
+ const int kHeight = 1;
+ const int kPixels = kWidth * kHeight;
+ const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2);
+
+ SIMD_ALIGNED(uint8_t orig_y[16]);
+ SIMD_ALIGNED(uint8_t orig_u[8]);
+ SIMD_ALIGNED(uint8_t orig_v[8]);
+ SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]);
+ memset(orig_y, y, kPixels);
+ memset(orig_u, u, kHalfPixels);
+ memset(orig_v, v, kHalfPixels);
+
+ /* YUV converted to ARGB. */
+ H422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2,
+ orig_pixels, kWidth * 4, kWidth, kHeight);
+
+ *b = orig_pixels[0];
+ *g = orig_pixels[1];
+ *r = orig_pixels[2];
+}
+
+#define F422ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j)
+
+static void YUVFToRGB(int y, int u, int v, int* r, int* g, int* b) {
+ const int kWidth = 16;
+ const int kHeight = 1;
+ const int kPixels = kWidth * kHeight;
+ const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2);
+
+ SIMD_ALIGNED(uint8_t orig_y[16]);
+ SIMD_ALIGNED(uint8_t orig_u[8]);
+ SIMD_ALIGNED(uint8_t orig_v[8]);
+ SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]);
+ memset(orig_y, y, kPixels);
+ memset(orig_u, u, kHalfPixels);
+ memset(orig_v, v, kHalfPixels);
+
+ /* YUV converted to ARGB. */
+ F422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2,
+ orig_pixels, kWidth * 4, kWidth, kHeight);
+
+ *b = orig_pixels[0];
+ *g = orig_pixels[1];
+ *r = orig_pixels[2];
+}
+
+static void YUVUToRGB(int y, int u, int v, int* r, int* g, int* b) {
+ const int kWidth = 16;
+ const int kHeight = 1;
+ const int kPixels = kWidth * kHeight;
+ const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2);
+
+ SIMD_ALIGNED(uint8_t orig_y[16]);
+ SIMD_ALIGNED(uint8_t orig_u[8]);
+ SIMD_ALIGNED(uint8_t orig_v[8]);
+ SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]);
+ memset(orig_y, y, kPixels);
+ memset(orig_u, u, kHalfPixels);
+ memset(orig_v, v, kHalfPixels);
+
+ /* YUV converted to ARGB. */
+ U422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2,
+ orig_pixels, kWidth * 4, kWidth, kHeight);
+
+ *b = orig_pixels[0];
+ *g = orig_pixels[1];
+ *r = orig_pixels[2];
+}
+
+#define V422ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j)
+
+static void YUVVToRGB(int y, int u, int v, int* r, int* g, int* b) {
+ const int kWidth = 16;
+ const int kHeight = 1;
+ const int kPixels = kWidth * kHeight;
+ const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2);
+
+ SIMD_ALIGNED(uint8_t orig_y[16]);
+ SIMD_ALIGNED(uint8_t orig_u[8]);
+ SIMD_ALIGNED(uint8_t orig_v[8]);
+ SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]);
+ memset(orig_y, y, kPixels);
+ memset(orig_u, u, kHalfPixels);
+ memset(orig_v, v, kHalfPixels);
+
+ /* YUV converted to ARGB. */
+ V422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2,
+ orig_pixels, kWidth * 4, kWidth, kHeight);
+
+ *b = orig_pixels[0];
+ *g = orig_pixels[1];
+ *r = orig_pixels[2];
+}
+
static void YToRGB(int y, int* r, int* g, int* b) {
const int kWidth = 16;
const int kHeight = 1;
@@ -335,18 +435,50 @@ TEST_F(LibYUVColorTest, TestRoundToByte) {
EXPECT_LE(allb, 255);
}
+// BT.601 limited range YUV to RGB reference
static void YUVToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
*r = RoundToByte((y - 16) * 1.164 - (v - 128) * -1.596);
*g = RoundToByte((y - 16) * 1.164 - (u - 128) * 0.391 - (v - 128) * 0.813);
*b = RoundToByte((y - 16) * 1.164 - (u - 128) * -2.018);
}
+// BT.601 full range YUV to RGB reference (aka JPEG)
static void YUVJToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
*r = RoundToByte(y - (v - 128) * -1.40200);
*g = RoundToByte(y - (u - 128) * 0.34414 - (v - 128) * 0.71414);
*b = RoundToByte(y - (u - 128) * -1.77200);
}
+// BT.709 limited range YUV to RGB reference
+// See also http://www.equasys.de/colorconversion.html
+static void YUVHToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
+ *r = RoundToByte((y - 16) * 1.164 - (v - 128) * -1.793);
+ *g = RoundToByte((y - 16) * 1.164 - (u - 128) * 0.213 - (v - 128) * 0.533);
+ *b = RoundToByte((y - 16) * 1.164 - (u - 128) * -2.112);
+}
+
+// BT.709 full range YUV to RGB reference
+static void YUVFToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
+ *r = RoundToByte(y - (v - 128) * -1.5748);
+ *g = RoundToByte(y - (u - 128) * 0.18732 - (v - 128) * 0.46812);
+ *b = RoundToByte(y - (u - 128) * -1.8556);
+}
+
+// BT.2020 limited range YUV to RGB reference
+static void YUVUToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
+ *r = RoundToByte((y - 16) * 1.164384 - (v - 128) * -1.67867);
+ *g = RoundToByte((y - 16) * 1.164384 - (u - 128) * 0.187326 -
+ (v - 128) * 0.65042);
+ *b = RoundToByte((y - 16) * 1.164384 - (u - 128) * -2.14177);
+}
+
+// BT.2020 full range YUV to RGB reference
+static void YUVVToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
+ *r = RoundToByte(y + (v - 128) * 1.474600);
+ *g = RoundToByte(y - (u - 128) * 0.164553 - (v - 128) * 0.571353);
+ *b = RoundToByte(y + (u - 128) * 1.881400);
+}
+
TEST_F(LibYUVColorTest, TestYUV) {
int r0, g0, b0, r1, g1, b1;
@@ -370,7 +502,11 @@ TEST_F(LibYUVColorTest, TestYUV) {
YUVToRGB(240, 0, 0, &r1, &g1, &b1);
EXPECT_EQ(57, r1);
EXPECT_EQ(255, g1);
+#ifdef LIBYUV_UNLIMITED_DATA
+ EXPECT_EQ(3, b1);
+#else
EXPECT_EQ(5, b1);
+#endif
for (int i = 0; i < 256; ++i) {
YUVToRGBReference(i, 128, 128, &r0, &g0, &b0);
@@ -444,28 +580,28 @@ TEST_F(LibYUVColorTest, TestGreyYUV) {
static void PrintHistogram(int rh[256], int gh[256], int bh[256]) {
int i;
- printf("hist");
+ printf("hist ");
for (i = 0; i < 256; ++i) {
if (rh[i] || gh[i] || bh[i]) {
- printf("\t%8d", i - 128);
+ printf(" %8d", i - 128);
}
}
- printf("\nred");
+ printf("\nred ");
for (i = 0; i < 256; ++i) {
if (rh[i] || gh[i] || bh[i]) {
- printf("\t%8d", rh[i]);
+ printf(" %8d", rh[i]);
}
}
printf("\ngreen");
for (i = 0; i < 256; ++i) {
if (rh[i] || gh[i] || bh[i]) {
- printf("\t%8d", gh[i]);
+ printf(" %8d", gh[i]);
}
}
- printf("\nblue");
+ printf("\nblue ");
for (i = 0; i < 256; ++i) {
if (rh[i] || gh[i] || bh[i]) {
- printf("\t%8d", bh[i]);
+ printf(" %8d", bh[i]);
}
}
printf("\n");
@@ -473,7 +609,13 @@ static void PrintHistogram(int rh[256], int gh[256], int bh[256]) {
// Step by 5 on inner loop goes from 0 to 255 inclusive.
// Set to 1 for better converage. 3, 5 or 17 for faster testing.
+#ifdef DISABLE_SLOW_TESTS
#define FASTSTEP 5
+#else
+#define FASTSTEP 1
+#endif
+
+// BT.601 limited range.
TEST_F(LibYUVColorTest, TestFullYUV) {
int rh[256] = {
0,
@@ -503,6 +645,7 @@ TEST_F(LibYUVColorTest, TestFullYUV) {
PrintHistogram(rh, gh, bh);
}
+// BT.601 full range.
TEST_F(LibYUVColorTest, TestFullYUVJ) {
int rh[256] = {
0,
@@ -520,9 +663,129 @@ TEST_F(LibYUVColorTest, TestFullYUVJ) {
int y = RANDOM256(y2);
YUVJToRGBReference(y, u, v, &r0, &g0, &b0);
YUVJToRGB(y, u, v, &r1, &g1, &b1);
- EXPECT_NEAR(r0, r1, 1);
- EXPECT_NEAR(g0, g1, 1);
- EXPECT_NEAR(b0, b1, 1);
+ EXPECT_NEAR(r0, r1, ERROR_R);
+ EXPECT_NEAR(g0, g1, ERROR_G);
+ EXPECT_NEAR(b0, b1, ERROR_B);
+ ++rh[r1 - r0 + 128];
+ ++gh[g1 - g0 + 128];
+ ++bh[b1 - b0 + 128];
+ }
+ }
+ }
+ PrintHistogram(rh, gh, bh);
+}
+
+// BT.709 limited range.
+TEST_F(LibYUVColorTest, TestFullYUVH) {
+ int rh[256] = {
+ 0,
+ };
+ int gh[256] = {
+ 0,
+ };
+ int bh[256] = {
+ 0,
+ };
+ for (int u = 0; u < 256; ++u) {
+ for (int v = 0; v < 256; ++v) {
+ for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
+ int r0, g0, b0, r1, g1, b1;
+ int y = RANDOM256(y2);
+ YUVHToRGBReference(y, u, v, &r0, &g0, &b0);
+ YUVHToRGB(y, u, v, &r1, &g1, &b1);
+ EXPECT_NEAR(r0, r1, ERROR_R);
+ EXPECT_NEAR(g0, g1, ERROR_G);
+ EXPECT_NEAR(b0, b1, ERROR_B);
+ ++rh[r1 - r0 + 128];
+ ++gh[g1 - g0 + 128];
+ ++bh[b1 - b0 + 128];
+ }
+ }
+ }
+ PrintHistogram(rh, gh, bh);
+}
+
+// BT.709 full range.
+TEST_F(LibYUVColorTest, TestFullYUVF) {
+ int rh[256] = {
+ 0,
+ };
+ int gh[256] = {
+ 0,
+ };
+ int bh[256] = {
+ 0,
+ };
+ for (int u = 0; u < 256; ++u) {
+ for (int v = 0; v < 256; ++v) {
+ for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
+ int r0, g0, b0, r1, g1, b1;
+ int y = RANDOM256(y2);
+ YUVFToRGBReference(y, u, v, &r0, &g0, &b0);
+ YUVFToRGB(y, u, v, &r1, &g1, &b1);
+ EXPECT_NEAR(r0, r1, ERROR_R);
+ EXPECT_NEAR(g0, g1, ERROR_G);
+ EXPECT_NEAR(b0, b1, ERROR_B);
+ ++rh[r1 - r0 + 128];
+ ++gh[g1 - g0 + 128];
+ ++bh[b1 - b0 + 128];
+ }
+ }
+ }
+ PrintHistogram(rh, gh, bh);
+}
+
+// BT.2020 limited range.
+TEST_F(LibYUVColorTest, TestFullYUVU) {
+ int rh[256] = {
+ 0,
+ };
+ int gh[256] = {
+ 0,
+ };
+ int bh[256] = {
+ 0,
+ };
+ for (int u = 0; u < 256; ++u) {
+ for (int v = 0; v < 256; ++v) {
+ for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
+ int r0, g0, b0, r1, g1, b1;
+ int y = RANDOM256(y2);
+ YUVUToRGBReference(y, u, v, &r0, &g0, &b0);
+ YUVUToRGB(y, u, v, &r1, &g1, &b1);
+ EXPECT_NEAR(r0, r1, ERROR_R);
+ EXPECT_NEAR(g0, g1, ERROR_G);
+ EXPECT_NEAR(b0, b1, ERROR_B);
+ ++rh[r1 - r0 + 128];
+ ++gh[g1 - g0 + 128];
+ ++bh[b1 - b0 + 128];
+ }
+ }
+ }
+ PrintHistogram(rh, gh, bh);
+}
+
+// BT.2020 full range.
+TEST_F(LibYUVColorTest, TestFullYUVV) {
+ int rh[256] = {
+ 0,
+ };
+ int gh[256] = {
+ 0,
+ };
+ int bh[256] = {
+ 0,
+ };
+ for (int u = 0; u < 256; ++u) {
+ for (int v = 0; v < 256; ++v) {
+ for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
+ int r0, g0, b0, r1, g1, b1;
+ int y = RANDOM256(y2);
+ YUVVToRGBReference(y, u, v, &r0, &g0, &b0);
+ YUVVToRGB(y, u, v, &r1, &g1, &b1);
+ EXPECT_NEAR(r0, r1, ERROR_R);
+ EXPECT_NEAR(g0, g1, 2);
+ EXPECT_NEAR(b0, b1, ERROR_B);
++rh[r1 - r0 + 128];
++gh[g1 - g0 + 128];
++bh[b1 - b0 + 128];
diff --git a/files/unit_test/compare_test.cc b/files/unit_test/compare_test.cc
index 136254e1..c29562cb 100644
--- a/files/unit_test/compare_test.cc
+++ b/files/unit_test/compare_test.cc
@@ -15,10 +15,13 @@
#include "../unit_test/unit_test.h"
#include "libyuv/basic_types.h"
#include "libyuv/compare.h"
-#include "libyuv/compare_row.h" /* For HammingDistance_C */
#include "libyuv/cpu_id.h"
#include "libyuv/video_common.h"
+#ifdef ENABLE_ROW_TESTS
+#include "libyuv/compare_row.h" /* For HammingDistance_C */
+#endif
+
namespace libyuv {
// hash seed of 5381 recommended.
@@ -206,6 +209,7 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Unaligned) {
free_aligned_buffer_page_end(src_a);
}
+#ifdef ENABLE_ROW_TESTS
TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) {
const int kMaxWidth = 4096 * 3;
align_buffer_page_end(src_a, kMaxWidth);
@@ -340,7 +344,7 @@ static const int kMaxOptCount = (1 << (32 - 3)) - 64; // 536870848
TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) {
uint32_t h1 = 0;
- const int kMaxWidth = (benchmark_width_ * benchmark_height_ + 31) & ~31;
+ const int kMaxWidth = (benchmark_width_ * benchmark_height_ + 63) & ~63;
align_buffer_page_end(src_a, kMaxWidth);
align_buffer_page_end(src_b, kMaxWidth);
memset(src_a, 255u, kMaxWidth);
@@ -403,6 +407,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) {
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
}
+#endif // ENABLE_ROW_TESTS
TEST_F(LibYUVCompareTest, TestHammingDistance) {
align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_);
diff --git a/files/unit_test/convert_test.cc b/files/unit_test/convert_test.cc
index 32a4cd1c..1f975825 100644
--- a/files/unit_test/convert_test.cc
+++ b/files/unit_test/convert_test.cc
@@ -12,8 +12,6 @@
#include <stdlib.h>
#include <time.h>
-#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
-
#include "libyuv/basic_types.h"
#include "libyuv/compare.h"
#include "libyuv/convert.h"
@@ -29,12 +27,19 @@
#include "libyuv/rotate.h"
#include "libyuv/video_common.h"
-#if defined(__arm__) || defined(__aarch64__)
-// arm version subsamples by summing 4 pixels then multiplying by matrix with
-// 4x smaller coefficients which are rounded to nearest integer.
-#define ARM_YUV_ERROR 4
-#else
-#define ARM_YUV_ERROR 0
+#ifdef ENABLE_ROW_TESTS
+#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
+#endif
+
+// Some functions fail on big endian. Enable these tests on all cpus except
+// PowerPC, but they are not optimized so disabled by default.
+#if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__)
+#define LITTLE_ENDIAN_ONLY_TEST 1
+#endif
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+// SLOW TESTS are those that are unoptimized C code.
+// FULL TESTS are optimized but test many variations of the same code.
+#define ENABLE_FULL_TESTS
#endif
namespace libyuv {
@@ -49,19 +54,20 @@ namespace libyuv {
#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF) \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
+ SRC_DEPTH) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
- "DST SRC_SUBSAMP_X unsupported"); \
+ "SRC_SUBSAMP_X unsupported"); \
static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
- "DST SRC_SUBSAMP_Y unsupported"); \
+ "SRC_SUBSAMP_Y unsupported"); \
static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
- "DST DST_SUBSAMP_X unsupported"); \
+ "DST_SUBSAMP_X unsupported"); \
static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
- "DST DST_SUBSAMP_Y unsupported"); \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ "DST_SUBSAMP_Y unsupported"); \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
@@ -81,6 +87,16 @@ namespace libyuv {
MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
+ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
+ SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \
+ SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \
+ } \
+ for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \
+ src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \
+ src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \
+ } \
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
@@ -89,9 +105,7 @@ namespace libyuv {
memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
- reinterpret_cast<SRC_T*>(src_y + OFF), kWidth, \
- reinterpret_cast<SRC_T*>(src_u + OFF), kSrcHalfWidth, \
- reinterpret_cast<SRC_T*>(src_v + OFF), kSrcHalfWidth, \
+ src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \
reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \
@@ -99,9 +113,7 @@ namespace libyuv {
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
- reinterpret_cast<SRC_T*>(src_y + OFF), kWidth, \
- reinterpret_cast<SRC_T*>(src_u + OFF), kSrcHalfWidth, \
- reinterpret_cast<SRC_T*>(src_v + OFF), kSrcHalfWidth, \
+ src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \
reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \
@@ -127,41 +139,58 @@ namespace libyuv {
#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y) \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
- benchmark_width_ - 4, _Any, +, 0) \
+ benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
- benchmark_width_, _Unaligned, +, 1) \
+ benchmark_width_, _Unaligned, +, 2, SRC_DEPTH) \
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
- benchmark_width_, _Invert, -, 0) \
+ benchmark_width_, _Invert, -, 0, SRC_DEPTH) \
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
- benchmark_width_, _Opt, +, 0)
-
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2)
-TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2)
-TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2)
-TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1)
-TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1)
-TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2)
-TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2)
-TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2)
-TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2)
-TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2)
+ benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1, 8)
+TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I444, uint8_t, 1, 1, 1, 8)
+TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1, 8)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I012, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H012, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I410, uint16_t, 2, 1, 1, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I410, uint16_t, 2, 1, 1, 10)
+TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I412, uint16_t, 2, 1, 1, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I412, uint16_t, 2, 1, 1, 12)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
+TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
// Test Android 420 to I420
#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
W1280, N, NEG, OFF, PN, OFF_U, OFF_V) \
- TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##_##PN##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##To##PN##N) { \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kSizeUV = \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
@@ -216,41 +245,23 @@ TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2)
dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \
dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
} \
- int max_diff = 0; \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
- int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
- static_cast<int>(dst_y_opt[i * kWidth + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
} \
} \
- EXPECT_EQ(0, max_diff); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
- int abs_diff = abs( \
- static_cast<int>(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
- static_cast<int>( \
- dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
+ EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
+ dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
} \
} \
- EXPECT_LE(max_diff, 3); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
- int abs_diff = abs( \
- static_cast<int>(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
- static_cast<int>( \
- dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
+ EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
+ dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
} \
} \
- EXPECT_LE(max_diff, 3); \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_u_c); \
free_aligned_buffer_page_end(dst_v_c); \
@@ -265,11 +276,11 @@ TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2)
SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \
SUBSAMP_Y) \
TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \
_Any, +, 0, PN, OFF_U, OFF_V) \
TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \
- _Unaligned, +, 1, PN, OFF_U, OFF_V) \
+ _Unaligned, +, 2, PN, OFF_U, OFF_V) \
TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \
-, 0, PN, OFF_U, OFF_V) \
@@ -280,6 +291,8 @@ TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2)
TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2)
TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2)
TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
+#undef TESTAPLANARTOP
+#undef TESTAPLANARTOPI
// wrapper to keep API the same
int I400ToNV21(const uint8_t* src_y,
@@ -298,77 +311,75 @@ int I400ToNV21(const uint8_t* src_y,
dst_stride_vu, width, height);
}
-#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
+#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
+ SRC_DEPTH) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
+ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
+ static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
+ "SRC_SUBSAMP_X unsupported"); \
+ static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
+ "SRC_SUBSAMP_Y unsupported"); \
+ static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
+ "DST_SUBSAMP_X unsupported"); \
+ static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
+ "DST_SUBSAMP_Y unsupported"); \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
- align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
- align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
- OFF); \
- align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
- OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- for (int i = 0; i < kHeight; ++i) \
- for (int j = 0; j < kWidth; ++j) \
- src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
- for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
- src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
- (fastrand() & 0xff); \
- src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
- (fastrand() & 0xff); \
- } \
+ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
+ const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
+ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
+ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(src_u, \
+ kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(src_v, \
+ kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_c, \
+ kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_opt, \
+ kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
+ MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
+ MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
+ MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
+ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
+ SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \
+ SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \
} \
- memset(dst_y_c, 1, kWidth* kHeight); \
- memset(dst_uv_c, 2, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_y_opt, 101, kWidth* kHeight); \
- memset(dst_uv_opt, 102, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \
+ src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \
+ src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
+ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
MaskCpuFlags(disable_cpu_flags_); \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
- src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
- dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
+ SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \
+ src_v_p, kSrcHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_c), \
+ kDstHalfWidth * 2, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
- src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
- dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
+ src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_opt), kDstHalfWidth * 2, kWidth, \
+ NEG kHeight); \
} \
- int max_diff = 0; \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
- static_cast<int>(dst_y_opt[i * kWidth + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
- } \
+ for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
+ EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
} \
- EXPECT_LE(max_diff, 1); \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \
- int abs_diff = \
- abs(static_cast<int>( \
- dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \
- static_cast<int>( \
- dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
- } \
+ for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) { \
+ EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \
} \
- EXPECT_LE(max_diff, 1); \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_uv_c); \
free_aligned_buffer_page_end(dst_y_opt); \
@@ -378,92 +389,110 @@ int I400ToNV21(const uint8_t* src_y,
free_aligned_buffer_page_end(src_v); \
}
-#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0)
-
-TESTPLANARTOBP(I420, 2, 2, NV12, 2, 2)
-TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2)
-TESTPLANARTOBP(I422, 2, 1, NV21, 2, 2)
-TESTPLANARTOBP(I444, 1, 1, NV21, 2, 2)
-TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
-
-#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, \
- OFF) \
+#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \
+ SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+
+TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I422, uint8_t, 1, 2, 1, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV12, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I400, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I010, uint16_t, 2, 2, 2, P010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
+TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
+
+#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
+ DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
+ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
+ static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
+ "SRC_SUBSAMP_X unsupported"); \
+ static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
+ "SRC_SUBSAMP_Y unsupported"); \
+ static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
+ "DST_SUBSAMP_X unsupported"); \
+ static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
+ "DST_SUBSAMP_Y unsupported"); \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
- align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
- align_buffer_page_end(src_uv, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2 * \
- SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
- OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- for (int i = 0; i < kHeight; ++i) \
- for (int j = 0; j < kWidth; ++j) \
- src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
- for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
- src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 0 + OFF] = \
- (fastrand() & 0xff); \
- src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 1 + OFF] = \
- (fastrand() & 0xff); \
- } \
+ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
+ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
+ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
+ const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
+ const int kPaddedHeight = \
+ (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
+ const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
+ const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
+ align_buffer_page_end( \
+ src_uv, \
+ 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_c, \
+ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_opt, \
+ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
+ SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
+ for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
+ src_y_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
- memset(dst_y_c, 1, kWidth* kHeight); \
- memset(dst_uv_c, 2, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_y_opt, 101, kWidth* kHeight); \
- memset(dst_uv_opt, 102, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
+ src_uv_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_uv + OFF, \
- SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_c, kWidth, dst_uv_c, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
+ src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
+ DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
+ NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_uv + OFF, \
- SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_opt, kWidth, dst_uv_opt, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
+ src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
+ DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
+ NEG kHeight); \
} \
- int max_diff = 0; \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
- static_cast<int>(dst_y_opt[i * kWidth + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
+ if (DOY) { \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
} \
} \
} \
- EXPECT_LE(max_diff, 1); \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \
- int abs_diff = \
- abs(static_cast<int>( \
- dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \
- static_cast<int>( \
- dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
+ for (int i = 0; i < kDstHalfHeight; ++i) { \
+ for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
+ EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
+ dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
} \
} \
- EXPECT_LE(max_diff, 1); \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_uv_c); \
free_aligned_buffer_page_end(dst_y_opt); \
@@ -472,112 +501,116 @@ TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
free_aligned_buffer_page_end(src_uv); \
}
-#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width, _Unaligned, +, 1) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0)
-
-// TODO(fbarchard): Fix msan on this unittest
-// TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2)
-
-#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \
- DOY) \
+#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \
+ SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \
+ SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \
+ SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \
+ TILE_WIDTH, TILE_HEIGHT) \
+ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \
+ SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
+
+TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
+
+#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
+ SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
+ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
+ static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
+ "SRC_SUBSAMP_X unsupported"); \
+ static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
+ "SRC_SUBSAMP_Y unsupported"); \
+ static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
+ "DST_SUBSAMP_X unsupported"); \
+ static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
+ "DST_SUBSAMP_Y unsupported"); \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
- align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
- align_buffer_page_end(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
- OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight); \
- align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
- align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- for (int i = 0; i < kHeight; ++i) \
- for (int j = 0; j < kWidth; ++j) \
- src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
- for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
- for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
- src_uv[(i * 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
- (fastrand() & 0xff); \
- } \
+ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
+ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
+ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
+ const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
+ const int kPaddedHeight = \
+ (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
+ const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
+ const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
+ align_buffer_page_end( \
+ src_uv, kSrcHalfPaddedWidth* kSrcHalfPaddedHeight* SRC_BPC * 2 + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
+ SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
+ for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
+ src_y_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
- memset(dst_y_c, 1, kWidth* kHeight); \
- memset(dst_u_c, 2, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_v_c, 3, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_y_opt, 101, kWidth* kHeight); \
- memset(dst_u_opt, 102, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_v_opt, 103, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
+ src_uv_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
+ memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
+ memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_uv + OFF, \
- 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_c : NULL, kWidth, \
- dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \
- SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
+ src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \
+ reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
+ reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \
+ NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_uv + OFF, \
- 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_opt : NULL, \
- kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_opt, \
- SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
- } \
- int max_diff = 0; \
- if (DOY) { \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
- static_cast<int>(dst_y_opt[i * kWidth + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
- } \
- } \
- EXPECT_LE(max_diff, 1); \
+ src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \
+ reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
+ reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \
+ NEG kHeight); \
} \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
- int abs_diff = abs( \
- static_cast<int>(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
- static_cast<int>( \
- dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
- } \
+ for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
+ EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
} \
- EXPECT_LE(max_diff, 1); \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
- int abs_diff = abs( \
- static_cast<int>(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
- static_cast<int>( \
- dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
- } \
+ for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \
+ EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \
+ EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \
} \
- EXPECT_LE(max_diff, 1); \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_u_c); \
free_aligned_buffer_page_end(dst_v_c); \
@@ -588,29 +621,72 @@ TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
free_aligned_buffer_page_end(src_uv); \
}
-#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0, 1) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1, \
- 1) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0)
-
-TESTBIPLANARTOP(NV12, 2, 2, I420, 2, 2)
-TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2)
+#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \
+ TILE_WIDTH, TILE_HEIGHT) \
+ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \
+ SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH, \
+ TILE_WIDTH, TILE_HEIGHT) \
+ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH, \
+ TILE_WIDTH, TILE_HEIGHT)
+
+TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
+
+// Provide matrix wrappers for full range bt.709
+#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
+ I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j)
+#define F420ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j)
+#define F422ToABGR(a, b, c, d, e, f, g, h, i, j) \
+ I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j)
+#define F422ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j)
+#define F444ToABGR(a, b, c, d, e, f, g, h, i, j) \
+ I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j)
+#define F444ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j)
+
+// Provide matrix wrappers for full range bt.2020
+#define V420ToABGR(a, b, c, d, e, f, g, h, i, j) \
+ I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j)
+#define V420ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j)
+#define V422ToABGR(a, b, c, d, e, f, g, h, i, j) \
+ I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j)
+#define V422ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j)
+#define V444ToABGR(a, b, c, d, e, f, g, h, i, j) \
+ I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j)
+#define V444ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j)
+
+#define I420ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \
+ I420ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
+#define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \
+ I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kWidth = W1280; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
@@ -655,59 +731,123 @@ TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2)
free_aligned_buffer_page_end(dst_argb_opt); \
}
+#if defined(ENABLE_FULL_TESTS)
#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN) \
TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_ - 4, _Any, +, 0) \
+ YALIGN, benchmark_width_ + 1, _Any, +, 0) \
TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Unaligned, +, 1) \
+ YALIGN, benchmark_width_, _Unaligned, +, 4) \
TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Invert, -, 0) \
TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Opt, +, 0)
+#else
+#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN) \
+ TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_ + 1, _Any, +, 0) \
+ TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Opt, +, 0)
+#endif
+#if defined(ENABLE_FULL_TESTS)
TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
TESTPLANARTOB(J420, 2, 2, ARGB, 4, 4, 1)
TESTPLANARTOB(J420, 2, 2, ABGR, 4, 4, 1)
+TESTPLANARTOB(F420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(F420, 2, 2, ABGR, 4, 4, 1)
TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1)
TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1)
+TESTPLANARTOB(U420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(U420, 2, 2, ABGR, 4, 4, 1)
+TESTPLANARTOB(V420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(V420, 2, 2, ABGR, 4, 4, 1)
TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1)
TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1)
TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1)
+TESTPLANARTOB(J420, 2, 2, RAW, 3, 3, 1)
+TESTPLANARTOB(J420, 2, 2, RGB24, 3, 3, 1)
TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1)
TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1)
TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1)
TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1)
TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1)
-TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
+#endif
+TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(U422, 2, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(U422, 2, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
-TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(H444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(U444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(U444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(V444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(V444, 1, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1)
TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, AB30, 4, 4, 1)
+TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1)
+#endif
+TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
+#else
+TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1)
+TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
+TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1)
+TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1)
+TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
+#endif
+TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
+TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
+TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
+TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
+TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
+TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
+#endif
#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, W1280, DIFF, N, NEG, OFF, ATTEN) \
+ YALIGN, W1280, N, NEG, OFF, ATTEN) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kWidth = W1280; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
@@ -740,15 +880,9 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \
ATTEN); \
} \
- int max_diff = 0; \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
- int abs_diff = abs(static_cast<int>(dst_argb_c[i + OFF]) - \
- static_cast<int>(dst_argb_opt[i + OFF])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
+ EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \
} \
- EXPECT_LE(max_diff, DIFF); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(src_v); \
@@ -757,26 +891,175 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
free_aligned_buffer_page_end(dst_argb_opt); \
}
+#if defined(ENABLE_FULL_TESTS)
#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, DIFF) \
+ YALIGN) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, 0) \
+ YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, 0) \
+ YALIGN, benchmark_width_, _Unaligned, +, 2, 0) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Invert, -, 0, 0) \
+ YALIGN, benchmark_width_, _Invert, -, 0, 0) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Opt, +, 0, 0) \
+ YALIGN, benchmark_width_, _Opt, +, 0, 0) \
+ TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Premult, +, 0, 1)
+#else
+#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Premult, +, 0, 1)
+ YALIGN, benchmark_width_, _Opt, +, 0, 0)
+#endif
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2)
-TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
+#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+
+#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \
+ &kYuvI601Constants, k, l, m, kFilterBilinear)
+#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \
+ &kYuvI601Constants, k, l, m, kFilterBilinear)
+
+#if defined(ENABLE_FULL_TESTS)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
+#else
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
+#endif
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
- BPP_B, W1280, DIFF, N, NEG, OFF) \
+ BPP_B, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kStrideB = kWidth * BPP_B; \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
@@ -813,18 +1096,12 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
kHeight); \
FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
kHeight); \
- int max_diff = 0; \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth * 4; ++j) { \
- int abs_diff = \
- abs(static_cast<int>(dst_argb32_c[i * kWidth * 4 + j]) - \
- static_cast<int>(dst_argb32_opt[i * kWidth * 4 + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
+ EXPECT_EQ(dst_argb32_c[i * kWidth * 4 + j], \
+ dst_argb32_opt[i * kWidth * 4 + j]); \
} \
} \
- EXPECT_LE(max_diff, DIFF); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
free_aligned_buffer_page_end(dst_argb_c); \
@@ -833,95 +1110,64 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
free_aligned_buffer_page_end(dst_argb32_opt); \
}
-#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- DIFF) \
+#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_ - 4, DIFF, _Any, +, 0) \
+ benchmark_width_ + 1, _Any, +, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, DIFF, _Unaligned, +, 1) \
+ benchmark_width_, _Unaligned, +, 2) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, DIFF, _Invert, -, 0) \
+ benchmark_width_, _Invert, -, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, DIFF, _Opt, +, 0)
-
-TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4, 2)
-TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4, 2)
-TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4, 2)
-TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4, 2)
-TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2)
-TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2)
-TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2)
-TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2)
-TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9)
-TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
-
-#ifdef DO_THREE_PLANES
-// Do 3 allocations for yuv. conventional but slower.
-#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- W1280, DIFF, N, NEG, OFF) \
- TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
- const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
- const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
- const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
- align_buffer_page_end(src_argb, kStride* kHeight + OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight); \
- align_buffer_page_end(dst_u_c, kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_v_c, kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
- align_buffer_page_end(dst_u_opt, \
- kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_v_opt, \
- kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_y_c, 1, kWidth* kHeight); \
- memset(dst_u_c, 2, kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_v_c, 3, kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_y_opt, 101, kWidth* kHeight); \
- memset(dst_u_opt, 102, kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_v_opt, 103, kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- for (int i = 0; i < kHeight; ++i) \
- for (int j = 0; j < kStride; ++j) \
- src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
- MaskCpuFlags(disable_cpu_flags_); \
- FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_u_c, \
- kStrideUV, dst_v_c, kStrideUV, kWidth, NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \
- dst_u_opt, kStrideUV, dst_v_opt, kStrideUV, \
- kWidth, NEG kHeight); \
- } \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- EXPECT_NEAR(static_cast<int>(dst_y_c[i * kWidth + j]), \
- static_cast<int>(dst_y_opt[i * kWidth + j]), DIFF); \
- } \
- } \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < kStrideUV; ++j) { \
- EXPECT_NEAR(static_cast<int>(dst_u_c[i * kStrideUV + j]), \
- static_cast<int>(dst_u_opt[i * kStrideUV + j]), DIFF); \
- } \
- } \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < kStrideUV; ++j) { \
- EXPECT_NEAR(static_cast<int>(dst_v_c[i * kStrideUV + j]), \
- static_cast<int>(dst_v_opt[i * kStrideUV + j]), DIFF); \
- } \
- } \
- free_aligned_buffer_page_end(dst_y_c); \
- free_aligned_buffer_page_end(dst_u_c); \
- free_aligned_buffer_page_end(dst_v_c); \
- free_aligned_buffer_page_end(dst_y_opt); \
- free_aligned_buffer_page_end(dst_u_opt); \
- free_aligned_buffer_page_end(dst_v_opt); \
- free_aligned_buffer_page_end(src_argb); \
- }
-#else
+ benchmark_width_, _Opt, +, 0)
+
+#define JNV12ToARGB(a, b, c, d, e, f, g, h) \
+ NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
+#define JNV21ToARGB(a, b, c, d, e, f, g, h) \
+ NV21ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
+#define JNV12ToABGR(a, b, c, d, e, f, g, h) \
+ NV21ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
+#define JNV21ToABGR(a, b, c, d, e, f, g, h) \
+ NV12ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
+#define JNV12ToRGB24(a, b, c, d, e, f, g, h) \
+ NV12ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
+#define JNV21ToRGB24(a, b, c, d, e, f, g, h) \
+ NV21ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
+#define JNV12ToRAW(a, b, c, d, e, f, g, h) \
+ NV21ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
+#define JNV21ToRAW(a, b, c, d, e, f, g, h) \
+ NV12ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h)
+#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \
+ NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
+
+TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4)
+TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4)
+TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4)
+TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4)
+TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3)
+TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3)
+TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3)
+TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2)
+#endif
+
+TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4)
+TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4)
+TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4)
+TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4)
+TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3)
+TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3)
+TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3)
+TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3)
+TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
+#endif
+
#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- W1280, DIFF, N, NEG, OFF) \
+ W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kWidth = W1280; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
@@ -951,14 +1197,12 @@ TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
} \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
- EXPECT_NEAR(static_cast<int>(dst_y_c[i * kWidth + j]), \
- static_cast<int>(dst_y_opt[i * kWidth + j]), DIFF); \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
} \
} \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \
for (int j = 0; j < kStrideUV; ++j) { \
- EXPECT_NEAR(static_cast<int>(dst_uv_c[i * kStrideUV + j]), \
- static_cast<int>(dst_uv_opt[i * kStrideUV + j]), DIFF); \
+ EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \
} \
} \
free_aligned_buffer_page_end(dst_y_c); \
@@ -967,45 +1211,53 @@ TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
free_aligned_buffer_page_end(dst_uv_opt); \
free_aligned_buffer_page_end(src_argb); \
}
-#endif
-#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- DIFF) \
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_ - 4, DIFF, _Any, +, 0) \
+ benchmark_width_ + 1, _Any, +, 0) \
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, DIFF, _Unaligned, +, 1) \
+ benchmark_width_, _Unaligned, +, 2) \
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, DIFF, _Invert, -, 0) \
+ benchmark_width_, _Invert, -, 0) \
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, DIFF, _Opt, +, 0)
-
-TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4)
-TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
-TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
-TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
-TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR)
-TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR)
-TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
-TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
-TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4)
-TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2)
-TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
-TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
-TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
-// TODO(fbarchard): Investigate J420 error of 11 on Windows.
-TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 11)
-TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
-TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
-TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2)
-TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2)
-TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2)
-TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
+ benchmark_width_, _Opt, +, 0)
+#else
+#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+#endif
+
+TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2)
+TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2)
+TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1)
+TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1)
+TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2)
+TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2)
+TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2)
+TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2)
+#endif
+TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2)
+TESTATOPLANAR(I400, 1, 1, I420, 2, 2)
+TESTATOPLANAR(J400, 1, 1, J420, 2, 2)
+TESTATOPLANAR(RAW, 3, 1, I420, 2, 2)
+TESTATOPLANAR(RAW, 3, 1, J420, 2, 2)
+TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2)
+TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2)
+TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2)
+TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2)
+TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
+TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
+TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
SUBSAMP_Y, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
@@ -1031,28 +1283,17 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \
dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight); \
} \
- int max_diff = 0; \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
- int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
- static_cast<int>(dst_y_opt[i * kWidth + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
} \
} \
- EXPECT_LE(max_diff, 4); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < kStrideUV * 2; ++j) { \
- int abs_diff = \
- abs(static_cast<int>(dst_uv_c[i * kStrideUV * 2 + j]) - \
- static_cast<int>(dst_uv_opt[i * kStrideUV * 2 + j])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
+ EXPECT_EQ(dst_uv_c[i * kStrideUV * 2 + j], \
+ dst_uv_opt[i * kStrideUV * 2 + j]); \
} \
} \
- EXPECT_LE(max_diff, 4); \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_uv_c); \
free_aligned_buffer_page_end(dst_y_opt); \
@@ -1062,9 +1303,9 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_ - 4, _Any, +, 0) \
+ benchmark_width_ + 1, _Any, +, 0) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Unaligned, +, 1) \
+ benchmark_width_, _Unaligned, +, 2) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Invert, -, 0) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
@@ -1072,146 +1313,181 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
+TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2)
+TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2)
+TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2)
TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
-#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, W1280, DIFF, N, NEG, OFF) \
- TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
- const int kHeight = benchmark_height_; \
- const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
- const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
- const int kStrideA = \
- (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
- const int kStrideB = \
- (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
- align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \
- align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \
- align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \
- for (int i = 0; i < kStrideA * kHeightA; ++i) { \
- src_argb[i + OFF] = (fastrand() & 0xff); \
- } \
- memset(dst_argb_c, 1, kStrideB* kHeightB); \
- memset(dst_argb_opt, 101, kStrideB* kHeightB); \
- MaskCpuFlags(disable_cpu_flags_); \
- FMT_A##To##FMT_B(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, kWidth, \
- NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- FMT_A##To##FMT_B(src_argb + OFF, kStrideA, dst_argb_opt, kStrideB, \
- kWidth, NEG kHeight); \
- } \
- int max_diff = 0; \
- for (int i = 0; i < kStrideB * kHeightB; ++i) { \
- int abs_diff = abs(static_cast<int>(dst_argb_c[i]) - \
- static_cast<int>(dst_argb_opt[i])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
- } \
- EXPECT_LE(max_diff, DIFF); \
- free_aligned_buffer_page_end(src_argb); \
- free_aligned_buffer_page_end(dst_argb_c); \
- free_aligned_buffer_page_end(dst_argb_opt); \
+#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
+ EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
+ const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
+ const int kStrideA = \
+ (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
+ const int kStrideB = \
+ (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
+ align_buffer_page_end(src_argb, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
+ align_buffer_page_end(dst_argb_c, kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
+ align_buffer_page_end(dst_argb_opt, \
+ kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
+ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
+ src_argb[i + OFF] = (fastrand() & 0xff); \
+ } \
+ memset(dst_argb_c, 1, kStrideB* kHeightB); \
+ memset(dst_argb_opt, 101, kStrideB* kHeightB); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \
+ kStrideB, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, \
+ (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
+ } \
+ for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
+ EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(src_argb); \
+ free_aligned_buffer_page_end(dst_argb_c); \
+ free_aligned_buffer_page_end(dst_argb_opt); \
}
-#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, \
- STRIDE_B, HEIGHT_B, DIFF) \
- TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \
- for (int times = 0; times < benchmark_iterations_; ++times) { \
- const int kWidth = (fastrand() & 63) + 1; \
- const int kHeight = (fastrand() & 31) + 1; \
- const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
- const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
- const int kStrideA = \
- (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
- const int kStrideB = \
- (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
- align_buffer_page_end(src_argb, kStrideA* kHeightA); \
- align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \
- align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \
- for (int i = 0; i < kStrideA * kHeightA; ++i) { \
- src_argb[i] = (fastrand() & 0xff); \
- } \
- memset(dst_argb_c, 123, kStrideB* kHeightB); \
- memset(dst_argb_opt, 123, kStrideB* kHeightB); \
- MaskCpuFlags(disable_cpu_flags_); \
- FMT_A##To##FMT_B(src_argb, kStrideA, dst_argb_c, kStrideB, kWidth, \
- kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- FMT_A##To##FMT_B(src_argb, kStrideA, dst_argb_opt, kStrideB, kWidth, \
- kHeight); \
- for (int i = 0; i < kStrideB * kHeightB; ++i) { \
- EXPECT_NEAR(dst_argb_c[i], dst_argb_opt[i], DIFF); \
- } \
- free_aligned_buffer_page_end(src_argb); \
- free_aligned_buffer_page_end(dst_argb_c); \
- free_aligned_buffer_page_end(dst_argb_opt); \
- } \
- }
-
-#define TESTATOB(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, DIFF) \
- TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, benchmark_width_ - 4, DIFF, _Any, +, 0) \
- TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, benchmark_width_, DIFF, _Unaligned, +, 1) \
- TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, benchmark_width_, DIFF, _Invert, -, 0) \
- TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, benchmark_width_, DIFF, _Opt, +, 0) \
- TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, DIFF)
-
-// TODO(fbarchard): make ARM version of C code that matches NEON.
-TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0)
-TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
-TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0)
-TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
-TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0)
-TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
-TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2)
-TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
-TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
-TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0)
-TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0)
-TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0)
-TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
-TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
-TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
-TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
-TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0)
+#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \
+ TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \
+ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \
+ for (int times = 0; times < benchmark_iterations_; ++times) { \
+ const int kWidth = (fastrand() & 63) + 1; \
+ const int kHeight = (fastrand() & 31) + 1; \
+ const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
+ const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
+ const int kStrideA = \
+ (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
+ const int kStrideB = \
+ (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
+ align_buffer_page_end(src_argb, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
+ align_buffer_page_end(dst_argb_c, \
+ kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
+ align_buffer_page_end(dst_argb_opt, \
+ kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
+ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
+ src_argb[i] = 0xfe; \
+ } \
+ memset(dst_argb_c, 123, kStrideB* kHeightB); \
+ memset(dst_argb_opt, 123, kStrideB* kHeightB); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \
+ kStrideB, kWidth, kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \
+ kStrideB, kWidth, kHeight); \
+ for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
+ EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(src_argb); \
+ free_aligned_buffer_page_end(dst_argb_c); \
+ free_aligned_buffer_page_end(dst_argb_opt); \
+ } \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
+ EPP_B, STRIDE_B, HEIGHT_B) \
+ TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+ STRIDE_B, HEIGHT_B, benchmark_width_ + 1, _Any, +, 0) \
+ TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+ STRIDE_B, HEIGHT_B, benchmark_width_, _Unaligned, +, 4) \
+ TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+ STRIDE_B, HEIGHT_B, benchmark_width_, _Invert, -, 0) \
+ TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+ STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0) \
+ TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
+ EPP_B, STRIDE_B, HEIGHT_B)
+#else
+#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
+ EPP_B, STRIDE_B, HEIGHT_B) \
+ TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+ STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0)
+#endif
+
+TESTATOB(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOB(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOB(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1)
+#endif
+TESTATOB(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+TESTATOB(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOB(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
+#endif
+TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) // 4
+TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1)
+TESTATOB(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1)
+TESTATOB(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOB(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1)
+TESTATOB(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1)
+TESTATOB(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOB(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOB(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+TESTATOB(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOB(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOB(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, W1280, DIFF, N, NEG, OFF) \
+ HEIGHT_B, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
@@ -1235,22 +1511,16 @@ TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0)
FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_opt, \
kStrideB, NULL, kWidth, NEG kHeight); \
} \
- int max_diff = 0; \
for (int i = 0; i < kStrideB * kHeightB; ++i) { \
- int abs_diff = abs(static_cast<int>(dst_argb_c[i]) - \
- static_cast<int>(dst_argb_opt[i])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
+ EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
- EXPECT_LE(max_diff, DIFF); \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, \
- STRIDE_B, HEIGHT_B, DIFF) \
+ STRIDE_B, HEIGHT_B) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither_Random) { \
for (int times = 0; times < benchmark_iterations_; ++times) { \
const int kWidth = (fastrand() & 63) + 1; \
@@ -1275,15 +1545,9 @@ TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0)
MaskCpuFlags(benchmark_cpu_info_); \
FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_opt, kStrideB, \
NULL, kWidth, kHeight); \
- int max_diff = 0; \
for (int i = 0; i < kStrideB * kHeightB; ++i) { \
- int abs_diff = abs(static_cast<int>(dst_argb_c[i]) - \
- static_cast<int>(dst_argb_opt[i])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
+ EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
- EXPECT_LE(max_diff, DIFF); \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
@@ -1291,49 +1555,57 @@ TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0)
}
#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, DIFF) \
+ HEIGHT_B) \
TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, benchmark_width_ - 4, DIFF, _Any, +, 0) \
+ HEIGHT_B, benchmark_width_ + 1, _Any, +, 0) \
TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, benchmark_width_, DIFF, _Unaligned, +, 1) \
+ HEIGHT_B, benchmark_width_, _Unaligned, +, 2) \
TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, benchmark_width_, DIFF, _Invert, -, 0) \
+ HEIGHT_B, benchmark_width_, _Invert, -, 0) \
TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, benchmark_width_, DIFF, _Opt, +, 0) \
+ HEIGHT_B, benchmark_width_, _Opt, +, 0) \
TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
- HEIGHT_B, DIFF)
+ HEIGHT_B)
-TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
+#endif
-#define TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG, OFF) \
- TEST_F(LibYUVConvertTest, FMT_ATOB##_Symetric##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+// These conversions called twice, produce the original result.
+// e.g. endian swap twice.
+#define TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG, \
+ OFF) \
+ TEST_F(LibYUVConvertTest, FMT_ATOB##_Endswap##N) { \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
const int kStrideA = \
- (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
- align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \
- align_buffer_page_end(dst_argb_c, kStrideA* kHeightA); \
- align_buffer_page_end(dst_argb_opt, kStrideA* kHeightA); \
- for (int i = 0; i < kStrideA * kHeightA; ++i) { \
+ (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
+ align_buffer_page_end(src_argb, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
+ align_buffer_page_end(dst_argb_c, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
+ align_buffer_page_end(dst_argb_opt, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
+ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
src_argb[i + OFF] = (fastrand() & 0xff); \
} \
memset(dst_argb_c, 1, kStrideA* kHeightA); \
memset(dst_argb_opt, 101, kStrideA* kHeightA); \
MaskCpuFlags(disable_cpu_flags_); \
- FMT_ATOB(src_argb + OFF, kStrideA, dst_argb_c, kStrideA, kWidth, \
- NEG kHeight); \
+ FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c, \
+ kStrideA, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
- FMT_ATOB(src_argb + OFF, kStrideA, dst_argb_opt, kStrideA, kWidth, \
- NEG kHeight); \
+ FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_opt, \
+ kStrideA, kWidth, NEG kHeight); \
} \
MaskCpuFlags(disable_cpu_flags_); \
- FMT_ATOB(dst_argb_c, kStrideA, dst_argb_c, kStrideA, kWidth, NEG kHeight); \
+ FMT_ATOB((TYPE_A*)dst_argb_c, kStrideA, (TYPE_A*)dst_argb_c, kStrideA, \
+ kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
- FMT_ATOB(dst_argb_opt, kStrideA, dst_argb_opt, kStrideA, kWidth, \
- NEG kHeight); \
- for (int i = 0; i < kStrideA * kHeightA; ++i) { \
+ FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \
+ kWidth, NEG kHeight); \
+ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
@@ -1342,32 +1614,25 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
free_aligned_buffer_page_end(dst_argb_opt); \
}
-#define TESTSYM(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A) \
- TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, benchmark_width_ - 4, _Any, +, \
- 0) \
- TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, _Unaligned, \
- +, 1) \
- TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, _Opt, +, 0)
-
-TESTSYM(ARGBToARGB, 4, 4, 1)
-TESTSYM(ARGBToBGRA, 4, 4, 1)
-TESTSYM(ARGBToABGR, 4, 4, 1)
-TESTSYM(BGRAToARGB, 4, 4, 1)
-TESTSYM(ABGRToARGB, 4, 4, 1)
+#if defined(ENABLE_FULL_TESTS)
+#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A) \
+ TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_ + 1, \
+ _Any, +, 0) \
+ TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \
+ _Unaligned, +, 2) \
+ TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \
+ _Opt, +, 0)
+#else
+#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A) \
+ TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \
+ _Opt, +, 0)
+#endif
-TEST_F(LibYUVConvertTest, Test565) {
- SIMD_ALIGNED(uint8_t orig_pixels[256][4]);
- SIMD_ALIGNED(uint8_t pixels565[256][2]);
-
- for (int i = 0; i < 256; ++i) {
- for (int j = 0; j < 4; ++j) {
- orig_pixels[i][j] = i;
- }
- }
- ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1);
- uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
- EXPECT_EQ(610919429u, checksum);
-}
+TESTEND(ARGBToBGRA, uint8_t, 4, 4, 1)
+TESTEND(ARGBToABGR, uint8_t, 4, 4, 1)
+TESTEND(BGRAToARGB, uint8_t, 4, 4, 1)
+TESTEND(ABGRToARGB, uint8_t, 4, 4, 1)
+TESTEND(AB64ToAR64, uint16_t, 4, 4, 1)
#ifdef HAVE_JPEG
TEST_F(LibYUVConvertTest, ValidateJpeg) {
@@ -1883,6 +2148,65 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) {
free_aligned_buffer_page_end(dst_vu);
}
+TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+
+ // Convert to NV12
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Convert to I420
+ align_buffer_page_end(dst2_y, width * height);
+ align_buffer_page_end(dst2_u, half_width * half_height);
+ align_buffer_page_end(dst2_v, half_width * half_height);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width,
+ dst2_v, half_width, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Convert I420 to NV12
+ align_buffer_page_end(dst3_y, width * height);
+ align_buffer_page_end(dst3_uv, half_width * half_height * 2);
+
+ I420ToNV12(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y,
+ width, dst3_uv, half_width * 2, width, height);
+
+ for (int i = 0; i < width * height; ++i) {
+ EXPECT_EQ(dst_y[i], dst3_y[i]);
+ }
+ for (int i = 0; i < half_width * half_height * 2; ++i) {
+ EXPECT_EQ(dst_uv[i], dst3_uv[i]);
+ EXPECT_EQ(dst_uv[i], dst3_uv[i]);
+ }
+
+ free_aligned_buffer_page_end(dst3_y);
+ free_aligned_buffer_page_end(dst3_uv);
+
+ free_aligned_buffer_page_end(dst2_y);
+ free_aligned_buffer_page_end(dst2_u);
+ free_aligned_buffer_page_end(dst2_v);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+}
+
TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) {
int width = 0;
int height = 0;
@@ -1913,7 +2237,42 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) {
free_aligned_buffer_page_end(dst_uv);
}
-TEST_F(LibYUVConvertTest, TestMJPGToNV21_422) {
+TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value. Hashes are for VU so flip the plane.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ align_buffer_page_end(dst_vu, half_width * half_height * 2);
+ SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+ half_height);
+ uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 2682851208u);
+ EXPECT_EQ(dst_vu_hash, 1069662856u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+ free_aligned_buffer_page_end(dst_vu);
+}
+
+// TODO(fbarchard): Improve test to compare against I422, not checksum
+TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) {
int width = 0;
int height = 0;
int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
@@ -1937,10 +2296,44 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_422) {
uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
EXPECT_EQ(dst_y_hash, 2682851208u);
- EXPECT_EQ(dst_uv_hash, 3543430771u);
+ EXPECT_EQ(dst_uv_hash, 493520167u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV12(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value. Hashes are for VU so flip the plane.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ align_buffer_page_end(dst_vu, half_width * half_height * 2);
+ SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+ half_height);
+ uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 2682851208u);
+ EXPECT_EQ(dst_vu_hash, 493520167u);
free_aligned_buffer_page_end(dst_y);
free_aligned_buffer_page_end(dst_uv);
+ free_aligned_buffer_page_end(dst_vu);
}
TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) {
@@ -1973,6 +2366,40 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) {
free_aligned_buffer_page_end(dst_uv);
}
+TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV12(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value. Hashes are for VU so flip the plane.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ align_buffer_page_end(dst_vu, half_width * half_height * 2);
+ SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+ half_height);
+ uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 330644005u);
+ EXPECT_EQ(dst_vu_hash, 135214341u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+ free_aligned_buffer_page_end(dst_vu);
+}
+
TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) {
int width = 0;
int height = 0;
@@ -2003,6 +2430,40 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) {
free_aligned_buffer_page_end(dst_uv);
}
+TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV12(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value. Hashes are for VU so flip the plane.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ align_buffer_page_end(dst_vu, half_width * half_height * 2);
+ SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+ half_height);
+ uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 2682851208u);
+ EXPECT_EQ(dst_vu_hash, 506143297u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+ free_aligned_buffer_page_end(dst_vu);
+}
+
TEST_F(LibYUVConvertTest, TestMJPGToARGB) {
int width = 0;
int height = 0;
@@ -2022,7 +2483,11 @@ TEST_F(LibYUVConvertTest, TestMJPGToARGB) {
// Test result matches known hash value.
uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381);
+#ifdef LIBYUV_UNLIMITED_DATA
+ EXPECT_EQ(dst_argb_hash, 3900633302u);
+#else
EXPECT_EQ(dst_argb_hash, 2355976473u);
+#endif
free_aligned_buffer_page_end(dst_argb);
}
@@ -2178,7 +2643,7 @@ TEST_F(LibYUVConvertTest, I420CropOddY) {
const int SUBSAMP_Y = 2;
const int kWidth = benchmark_width_;
const int kHeight = benchmark_height_;
- const int crop_y = 1;
+ const int crop_y = benchmark_height_ > 1 ? 1 : 0;
const int kDestWidth = benchmark_width_;
const int kDestHeight = benchmark_height_ - crop_y * 2;
const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X);
@@ -2329,9 +2794,9 @@ TEST_F(LibYUVConvertTest, TestDither) {
}
#define TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C) \
+ YALIGN, W1280, N, NEG, OFF, FMT_C, BPP_C) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kWidth = W1280; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
@@ -2360,7 +2825,6 @@ TEST_F(LibYUVConvertTest, TestDither) {
src_y + OFF, kWidth, src_u + OFF, kStrideUV, src_v + OFF, kStrideUV, \
dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight); \
} \
- int max_diff = 0; \
/* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \
align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight); \
align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight); \
@@ -2371,13 +2835,8 @@ TEST_F(LibYUVConvertTest, TestDither) {
FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt, \
kWidth * BPP_C, kWidth, kHeight); \
for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \
- int abs_diff = abs(static_cast<int>(dst_argb32_c[i]) - \
- static_cast<int>(dst_argb32_opt[i])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
+ EXPECT_EQ(dst_argb32_c[i], dst_argb32_opt[i]); \
} \
- EXPECT_LE(max_diff, DIFF); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(src_v); \
@@ -2387,20 +2846,20 @@ TEST_F(LibYUVConvertTest, TestDither) {
free_aligned_buffer_page_end(dst_argb32_opt); \
}
-#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, DIFF, FMT_C, BPP_C) \
- TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, \
- BPP_C) \
- TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, \
- BPP_C) \
- TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C) \
- TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
-
-TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4)
+#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, FMT_C, BPP_C) \
+ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \
+ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C) \
+ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \
+ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
+
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
+#endif
#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \
TEST_F(LibYUVConvertTest, NAME) { \
@@ -2462,12 +2921,12 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4)
TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12)
TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
-// Transitive tests. A to B to C is same as A to C.
-
+// Transitive test. A to B to C is same as A to C.
+// Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere.
#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
W1280, N, NEG, OFF, FMT_C, BPP_C) \
- TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) { \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
@@ -2484,23 +2943,23 @@ TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
src_v[i + OFF] = (fastrand() & 0xff); \
} \
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \
- src_v + OFF, kStrideUV, dst_argb_b + OFF, \
- kStrideB, kWidth, NEG kHeight); \
- } \
+ FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \
+ src_v + OFF, kStrideUV, dst_argb_b + OFF, kStrideB, \
+ kWidth, NEG kHeight); \
/* Convert to a 3rd format in 1 step and 2 steps and compare */ \
const int kStrideC = kWidth * BPP_C; \
align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \
align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
- FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \
- src_v + OFF, kStrideUV, dst_argb_c + OFF, kStrideC, \
- kWidth, NEG kHeight); \
- /* Convert B to C */ \
- FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, kStrideC, \
- kWidth, kHeight); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \
+ src_v + OFF, kStrideUV, dst_argb_c + OFF, \
+ kStrideC, kWidth, NEG kHeight); \
+ /* Convert B to C */ \
+ FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \
+ kStrideC, kWidth, kHeight); \
+ } \
for (int i = 0; i < kStrideC * kHeight; ++i) { \
EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \
} \
@@ -2512,58 +2971,109 @@ TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
free_aligned_buffer_page_end(dst_argb_bc); \
}
+#if defined(ENABLE_FULL_TESTS)
#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
FMT_C, BPP_C) \
TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
- benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C) \
+ benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \
TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
- benchmark_width_, _Unaligned, +, 1, FMT_C, BPP_C) \
+ benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C) \
TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \
TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
+#else
+#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
+ FMT_C, BPP_C) \
+ TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
+ benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
+#endif
+#if defined(ENABLE_FULL_TESTS)
+TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4)
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ABGR, 4)
-TESTPLANARTOE(J420, 2, 2, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(J420, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(H420, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3)
TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4)
TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3)
+TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4)
TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3)
-TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3)
-TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4)
-TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, ARGB, 4)
-TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3)
-TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3)
+TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4)
+TESTPLANARTOE(H420, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, ABGR, 4)
TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3)
+TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RGB24, 3)
TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4)
+TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3)
+TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, ARGB, 4)
+TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3)
+TESTPLANARTOE(J420, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(J420, 2, 2, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(U420, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(U420, 2, 2, ARGB, 1, 4, ARGB, 4)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2)
+#endif
+TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4)
TESTPLANARTOE(J422, 2, 1, ARGB, 1, 4, ARGB, 4)
TESTPLANARTOE(J422, 2, 1, ABGR, 1, 4, ARGB, 4)
TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4)
TESTPLANARTOE(H422, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(U422, 2, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(U422, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(V422, 2, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(V422, 2, 1, ABGR, 1, 4, ARGB, 4)
TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4)
TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ABGR, 4)
TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(J444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(H444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(H444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(U444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(U444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(V444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(V444, 1, 1, ABGR, 1, 4, ARGB, 4)
TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4)
TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4)
TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4)
TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4)
+#else
+TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3)
+TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
+TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3)
+TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3)
+TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4)
+TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2)
+TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4)
+TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4)
+TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4)
+#endif
+// Transitive test: Compare 1 step vs 2 step conversion for YUVA to ARGB.
+// Benchmark 2 step conversion for comparison to 1 step conversion.
#define TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
W1280, N, NEG, OFF, FMT_C, BPP_C, ATTEN) \
- TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) { \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
const int kSizeUV = \
@@ -2573,6 +3083,12 @@ TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4)
align_buffer_page_end(src_v, kSizeUV + OFF); \
align_buffer_page_end(src_a, kWidth* kHeight + OFF); \
align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \
+ const int kStrideC = kWidth * BPP_C; \
+ align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \
+ align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \
+ memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
+ memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
+ memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
src_y[i + OFF] = (fastrand() & 0xff); \
src_a[i + OFF] = (fastrand() & 0xff); \
@@ -2581,26 +3097,21 @@ TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4)
src_u[i + OFF] = (fastrand() & 0xff); \
src_v[i + OFF] = (fastrand() & 0xff); \
} \
- memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
+ /* Convert A to B */ \
FMT_PLANAR##To##FMT_B( \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth, \
dst_argb_b + OFF, kStrideB, kWidth, NEG kHeight, ATTEN); \
+ /* Convert B to C */ \
+ FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \
+ kStrideC, kWidth, kHeight); \
} \
- /* Convert to a 3rd format in 1 step and 2 steps and compare */ \
- const int kStrideC = kWidth * BPP_C; \
- align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \
- align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \
- memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
- memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
+ /* Convert A to C */ \
FMT_PLANAR##To##FMT_C( \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth, \
dst_argb_c + OFF, kStrideC, kWidth, NEG kHeight, ATTEN); \
- /* Convert B to C */ \
- FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, kStrideC, \
- kWidth, kHeight); \
for (int i = 0; i < kStrideC * kHeight; ++i) { \
EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \
} \
@@ -2613,26 +3124,71 @@ TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4)
free_aligned_buffer_page_end(dst_argb_bc); \
}
+#if defined(ENABLE_FULL_TESTS)
#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
FMT_C, BPP_C) \
TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
- benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C, 0) \
+ benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C, 0) \
TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
- benchmark_width_, _Unaligned, +, 1, FMT_C, BPP_C, 0) \
+ benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C, 0) \
TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
benchmark_width_, _Invert, -, 0, FMT_C, BPP_C, 0) \
TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0) \
TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
benchmark_width_, _Premult, +, 0, FMT_C, BPP_C, 1)
+#else
+#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
+ FMT_C, BPP_C) \
+ TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
+ benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0)
+#endif
+#if defined(ENABLE_FULL_TESTS)
TESTQPLANARTOE(I420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(J420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(J420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(H420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(H420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(F420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(F420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(U420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(U420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(V420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(V420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(I422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(J422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(J422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(F422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(F422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(H422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(H422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(U422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(U422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(V422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(V422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(I444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(J444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(J444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(H444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(H444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(U444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(U444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(V444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4)
+TESTQPLANARTOE(V444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+#else
+TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
+#endif
#define TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, W1280, N, NEG, \
OFF, FMT_C, BPP_C) \
- TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_##FMT_C##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##To##FMT_C##N) { \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
@@ -2640,21 +3196,21 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \
MemRandomize(src_argb_a + OFF, kStrideA * kHeight); \
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB, \
- kWidth, NEG kHeight); \
- } \
+ FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB, \
+ kWidth, NEG kHeight); \
/* Convert to a 3rd format in 1 step and 2 steps and compare */ \
const int kStrideC = kWidth * BPP_C; \
align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \
align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
- FMT_A##To##FMT_C(src_argb_a + OFF, kStrideA, dst_argb_c + OFF, kStrideC, \
- kWidth, NEG kHeight); \
- /* Convert B to C */ \
- FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, kStrideC, \
- kWidth, kHeight); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_A##To##FMT_C(src_argb_a + OFF, kStrideA, dst_argb_c + OFF, kStrideC, \
+ kWidth, NEG kHeight); \
+ /* Convert B to C */ \
+ FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \
+ kStrideC, kWidth, kHeight); \
+ } \
for (int i = 0; i < kStrideC * kHeight; i += 4) { \
EXPECT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]); \
EXPECT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]); \
@@ -2669,15 +3225,16 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \
TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, \
- benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C) \
+ benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \
TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \
- _Unaligned, +, 1, FMT_C, BPP_C) \
+ _Unaligned, +, 4, FMT_C, BPP_C) \
TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \
_Invert, -, 0, FMT_C, BPP_C) \
TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \
_Opt, +, 0, FMT_C, BPP_C)
// Caveat: Destination needs to be 4 bytes
+#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4)
TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4)
TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4)
@@ -2686,6 +3243,7 @@ TESTPLANETOE(ARGB, 1, 4, AB30, 1, 4, ARGB, 4)
TESTPLANETOE(ABGR, 1, 4, AB30, 1, 4, ABGR, 4)
TESTPLANETOE(AB30, 1, 4, ARGB, 1, 4, ABGR, 4)
TESTPLANETOE(AB30, 1, 4, ABGR, 1, 4, ARGB, 4)
+#endif
TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
// 2x2 frames
@@ -2790,79 +3348,506 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
}
#endif // HAS_ABGRTOAR30ROW_AVX2
+// Provide matrix wrappers for 12 bit YUV
+#define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \
+ I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+
+#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I410ToABGR(a, b, c, d, e, f, g, h, i, j) \
+ I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define H410ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define H410ToABGR(a, b, c, d, e, f, g, h, i, j) \
+ I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define U410ToARGB(a, b, c, d, e, f, g, h, i, j) \
+ I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+#define U410ToABGR(a, b, c, d, e, f, g, h, i, j) \
+ I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+#define I410ToAR30(a, b, c, d, e, f, g, h, i, j) \
+ I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I410ToAB30(a, b, c, d, e, f, g, h, i, j) \
+ I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define H410ToAR30(a, b, c, d, e, f, g, h, i, j) \
+ I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define H410ToAB30(a, b, c, d, e, f, g, h, i, j) \
+ I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define U410ToAR30(a, b, c, d, e, f, g, h, i, j) \
+ I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+#define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \
+ I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+
+#define I010ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \
+ I010ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
+#define I010ToAR30Filter(a, b, c, d, e, f, g, h, i, j) \
+ I010ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
+#define I210ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \
+ I210ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
+#define I210ToAR30Filter(a, b, c, d, e, f, g, h, i, j) \
+ I210ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
+
// TODO(fbarchard): Fix clamping issue affected by U channel.
-#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- ALIGN, YALIGN, W1280, DIFF, N, NEG, SOFF, DOFF) \
- TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
- const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
- const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
- const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
- const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
- const int kBpc = 2; \
- align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
- align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \
- align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \
- align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
- align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
- for (int i = 0; i < kWidth * kHeight; ++i) { \
- reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & 0x3ff); \
- } \
- for (int i = 0; i < kSizeUV; ++i) { \
- reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & 0x3ff); \
- reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & 0x3ff); \
- } \
- memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
- memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
- MaskCpuFlags(disable_cpu_flags_); \
- FMT_PLANAR##To##FMT_B( \
- reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
- reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
- reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
- dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- FMT_PLANAR##To##FMT_B( \
- reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
- reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
- reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
- dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \
- } \
- int max_diff = 0; \
- for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
- int abs_diff = abs(static_cast<int>(dst_argb_c[i + DOFF]) - \
- static_cast<int>(dst_argb_opt[i + DOFF])); \
- if (abs_diff > max_diff) { \
- max_diff = abs_diff; \
- } \
- } \
- EXPECT_LE(max_diff, DIFF); \
- free_aligned_buffer_page_end(src_y); \
- free_aligned_buffer_page_end(src_u); \
- free_aligned_buffer_page_end(src_v); \
- free_aligned_buffer_page_end(dst_argb_c); \
- free_aligned_buffer_page_end(dst_argb_opt); \
- }
-
-#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, DIFF) \
- TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, 0) \
- TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, 1) \
- TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Invert, -, 0, 0) \
- TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, DIFF, _Opt, +, 0, 0)
-
-TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 2)
-TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 2)
-TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2)
-TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 2)
-TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 2)
-TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 2)
-TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2)
-TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1, 2)
+#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \
+ BPP_B, ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF) \
+ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
+ const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
+ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
+ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
+ const int kBpc = 2; \
+ align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
+ align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \
+ align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \
+ align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
+ align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \
+ } \
+ for (int i = 0; i < kSizeUV; ++i) { \
+ reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & FMT_MASK); \
+ reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & FMT_MASK); \
+ } \
+ memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
+ memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_PLANAR##To##FMT_B( \
+ reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
+ reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
+ reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
+ dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_PLANAR##To##FMT_B( \
+ reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
+ reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
+ reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
+ dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \
+ } \
+ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
+ EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \
+ } \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_u); \
+ free_aligned_buffer_page_end(src_v); \
+ free_aligned_buffer_page_end(dst_argb_c); \
+ free_aligned_buffer_page_end(dst_argb_opt); \
+ }
+
+#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \
+ BPP_B, ALIGN, YALIGN) \
+ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
+ ALIGN, YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \
+ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
+ ALIGN, YALIGN, benchmark_width_, _Unaligned, +, 4, 4) \
+ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
+ ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0) \
+ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
+ ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0)
+
+// These conversions are only optimized for x86
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
+TESTPLANAR16TOB(I012, 2, 2, 0xfff, ARGB, 4, 4, 1)
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGBFilter, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGBFilter, 4, 4, 1)
+
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1)
+TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1)
+TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1)
+#endif // LITTLE_ENDIAN_ONLY_TEST
+#endif // DISABLE_SLOW_TESTS
+
+#define TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
+ ALIGN, YALIGN, W1280, N, NEG, OFF, ATTEN, S_DEPTH) \
+ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
+ const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
+ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
+ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
+ const int kBpc = 2; \
+ align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF); \
+ align_buffer_page_end(src_u, kSizeUV* kBpc + OFF); \
+ align_buffer_page_end(src_v, kSizeUV* kBpc + OFF); \
+ align_buffer_page_end(src_a, kWidth* kHeight* kBpc + OFF); \
+ align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
+ align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ reinterpret_cast<uint16_t*>(src_y + OFF)[i] = \
+ (fastrand() & ((1 << S_DEPTH) - 1)); \
+ reinterpret_cast<uint16_t*>(src_a + OFF)[i] = \
+ (fastrand() & ((1 << S_DEPTH) - 1)); \
+ } \
+ for (int i = 0; i < kSizeUV; ++i) { \
+ reinterpret_cast<uint16_t*>(src_u + OFF)[i] = \
+ (fastrand() & ((1 << S_DEPTH) - 1)); \
+ reinterpret_cast<uint16_t*>(src_v + OFF)[i] = \
+ (fastrand() & ((1 << S_DEPTH) - 1)); \
+ } \
+ memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \
+ memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + OFF), kWidth, \
+ reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV, \
+ reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV, \
+ reinterpret_cast<uint16_t*>(src_a + OFF), kWidth, \
+ dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \
+ ATTEN); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_PLANAR##To##FMT_B( \
+ reinterpret_cast<uint16_t*>(src_y + OFF), kWidth, \
+ reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV, \
+ reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV, \
+ reinterpret_cast<uint16_t*>(src_a + OFF), kWidth, \
+ dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, ATTEN); \
+ } \
+ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
+ EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \
+ } \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_u); \
+ free_aligned_buffer_page_end(src_v); \
+ free_aligned_buffer_page_end(src_a); \
+ free_aligned_buffer_page_end(dst_argb_c); \
+ free_aligned_buffer_page_end(dst_argb_opt); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
+ ALIGN, YALIGN, S_DEPTH) \
+ TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
+ TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Unaligned, +, 2, 0, S_DEPTH) \
+ TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
+ TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH) \
+ TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Premult, +, 0, 1, S_DEPTH)
+#else
+#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
+ ALIGN, YALIGN, S_DEPTH) \
+ TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#endif
+
+#define I010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+ l, m)
+#define I010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+ l, m)
+#define J010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define J010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define F010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define F010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define H010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define H010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define U010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define U010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define V010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define V010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define I210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+ l, m)
+#define I210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+ l, m)
+#define J210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define J210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define F210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define F210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define H210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define H210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define U210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define U210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define V210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define V210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define I410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+ l, m)
+#define I410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+ l, m)
+#define J410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define J410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define F410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define F410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define H410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define H410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define U410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define U410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define V410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define V410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define I010AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \
+ &kYuvI601Constants, k, l, m, kFilterBilinear)
+#define I210AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \
+ &kYuvI601Constants, k, l, m, kFilterBilinear)
+
+// These conversions are only optimized for x86
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
+#endif // DISABLE_SLOW_TESTS
+
+#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
+ ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
+ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
+ const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
+ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \
+ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; \
+ const int kBpc = 2; \
+ align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
+ align_buffer_page_end(src_uv, kSizeUV* kBpc + SOFF); \
+ align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
+ align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = \
+ (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \
+ } \
+ for (int i = 0; i < kSizeUV; ++i) { \
+ reinterpret_cast<uint16_t*>(src_uv + SOFF)[i] = \
+ (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \
+ } \
+ memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
+ memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
+ reinterpret_cast<uint16_t*>(src_uv + SOFF), \
+ kStrideUV, dst_argb_c + DOFF, kStrideB, kWidth, \
+ NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
+ reinterpret_cast<uint16_t*>(src_uv + SOFF), \
+ kStrideUV, dst_argb_opt + DOFF, kStrideB, kWidth, \
+ NEG kHeight); \
+ } \
+ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
+ EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \
+ } \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_uv); \
+ free_aligned_buffer_page_end(dst_argb_c); \
+ free_aligned_buffer_page_end(dst_argb_opt); \
+ }
+
+#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
+ ALIGN, YALIGN, S_DEPTH) \
+ TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
+ TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
+ TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
+ TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+
+#define P010ToARGB(a, b, c, d, e, f, g, h) \
+ P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P210ToARGB(a, b, c, d, e, f, g, h) \
+ P210ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P010ToAR30(a, b, c, d, e, f, g, h) \
+ P010ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P210ToAR30(a, b, c, d, e, f, g, h) \
+ P210ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+
+#define P012ToARGB(a, b, c, d, e, f, g, h) \
+ P012ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P212ToARGB(a, b, c, d, e, f, g, h) \
+ P212ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P012ToAR30(a, b, c, d, e, f, g, h) \
+ P012ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P212ToAR30(a, b, c, d, e, f, g, h) \
+ P212ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+
+#define P016ToARGB(a, b, c, d, e, f, g, h) \
+ P016ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P216ToARGB(a, b, c, d, e, f, g, h) \
+ P216ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P016ToAR30(a, b, c, d, e, f, g, h) \
+ P016ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P216ToAR30(a, b, c, d, e, f, g, h) \
+ P216ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+
+#define P010ToARGBFilter(a, b, c, d, e, f, g, h) \
+ P010ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
+ kFilterBilinear)
+#define P210ToARGBFilter(a, b, c, d, e, f, g, h) \
+ P210ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
+ kFilterBilinear)
+#define P010ToAR30Filter(a, b, c, d, e, f, g, h) \
+ P010ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
+ kFilterBilinear)
+#define P210ToAR30Filter(a, b, c, d, e, f, g, h) \
+ P210ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \
+ kFilterBilinear)
+
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
+TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
+TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
+TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
+TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
+TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
+TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
+TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
+TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
+TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
+TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
+TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
+TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
+TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
+TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
+#endif // LITTLE_ENDIAN_ONLY_TEST
+#endif // DISABLE_SLOW_TESTS
static int Clamp(int y) {
if (y < 0) {
@@ -2918,10 +3903,11 @@ TEST_F(LibYUVConvertTest, TestH420ToARGB) {
++histogram_b[b];
++histogram_g[g];
++histogram_r[r];
- int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f));
- EXPECT_NEAR(b, expected_y, 1);
- EXPECT_NEAR(g, expected_y, 1);
- EXPECT_NEAR(r, expected_y, 1);
+ // Reference formula for Y channel contribution in YUV to RGB conversions:
+ int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f + 0.5f));
+ EXPECT_EQ(b, expected_y);
+ EXPECT_EQ(g, expected_y);
+ EXPECT_EQ(r, expected_y);
EXPECT_EQ(a, 255);
}
@@ -3043,7 +4029,7 @@ TEST_F(LibYUVConvertTest, TestH010ToAR30) {
++histogram_b[b10];
++histogram_g[g10];
++histogram_r[r10];
- int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f));
+ int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f + 0.5));
EXPECT_NEAR(b10, expected_y, 4);
EXPECT_NEAR(g10, expected_y, 4);
EXPECT_NEAR(r10, expected_y, 4);
@@ -3196,6 +4182,68 @@ TEST_F(LibYUVConvertTest, TestH420ToAR30) {
free_aligned_buffer_page_end(ar30_pixels);
}
+// Test I400 with jpeg matrix is same as J400
+TEST_F(LibYUVConvertTest, TestI400) {
+ const int kSize = 256;
+ align_buffer_page_end(orig_i400, kSize);
+ align_buffer_page_end(argb_pixels_i400, kSize * 4);
+ align_buffer_page_end(argb_pixels_j400, kSize * 4);
+ align_buffer_page_end(argb_pixels_jpeg_i400, kSize * 4);
+ align_buffer_page_end(argb_pixels_h709_i400, kSize * 4);
+ align_buffer_page_end(argb_pixels_2020_i400, kSize * 4);
+
+ // Test grey scale
+ for (int i = 0; i < kSize; ++i) {
+ orig_i400[i] = i;
+ }
+
+ J400ToARGB(orig_i400, 0, argb_pixels_j400, 0, kSize, 1);
+ I400ToARGB(orig_i400, 0, argb_pixels_i400, 0, kSize, 1);
+ I400ToARGBMatrix(orig_i400, 0, argb_pixels_jpeg_i400, 0, &kYuvJPEGConstants,
+ kSize, 1);
+ I400ToARGBMatrix(orig_i400, 0, argb_pixels_h709_i400, 0, &kYuvH709Constants,
+ kSize, 1);
+ I400ToARGBMatrix(orig_i400, 0, argb_pixels_2020_i400, 0, &kYuv2020Constants,
+ kSize, 1);
+
+ EXPECT_EQ(0, argb_pixels_i400[0]);
+ EXPECT_EQ(0, argb_pixels_j400[0]);
+ EXPECT_EQ(0, argb_pixels_jpeg_i400[0]);
+ EXPECT_EQ(0, argb_pixels_h709_i400[0]);
+ EXPECT_EQ(0, argb_pixels_2020_i400[0]);
+ EXPECT_EQ(0, argb_pixels_i400[16 * 4]);
+ EXPECT_EQ(16, argb_pixels_j400[16 * 4]);
+ EXPECT_EQ(16, argb_pixels_jpeg_i400[16 * 4]);
+ EXPECT_EQ(0, argb_pixels_h709_i400[16 * 4]);
+ EXPECT_EQ(0, argb_pixels_2020_i400[16 * 4]);
+ EXPECT_EQ(130, argb_pixels_i400[128 * 4]);
+ EXPECT_EQ(128, argb_pixels_j400[128 * 4]);
+ EXPECT_EQ(128, argb_pixels_jpeg_i400[128 * 4]);
+ EXPECT_EQ(130, argb_pixels_h709_i400[128 * 4]);
+ EXPECT_EQ(130, argb_pixels_2020_i400[128 * 4]);
+ EXPECT_EQ(255, argb_pixels_i400[255 * 4]);
+ EXPECT_EQ(255, argb_pixels_j400[255 * 4]);
+ EXPECT_EQ(255, argb_pixels_jpeg_i400[255 * 4]);
+ EXPECT_EQ(255, argb_pixels_h709_i400[255 * 4]);
+ EXPECT_EQ(255, argb_pixels_2020_i400[255 * 4]);
+
+ for (int i = 0; i < kSize * 4; ++i) {
+ if ((i & 3) == 3) {
+ EXPECT_EQ(255, argb_pixels_j400[i]);
+ } else {
+ EXPECT_EQ(i / 4, argb_pixels_j400[i]);
+ }
+ EXPECT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_i400);
+ free_aligned_buffer_page_end(argb_pixels_i400);
+ free_aligned_buffer_page_end(argb_pixels_j400);
+ free_aligned_buffer_page_end(argb_pixels_jpeg_i400);
+ free_aligned_buffer_page_end(argb_pixels_h709_i400);
+ free_aligned_buffer_page_end(argb_pixels_2020_i400);
+}
+
// Test RGB24 to ARGB and back to RGB24
TEST_F(LibYUVConvertTest, TestARGBToRGB24) {
const int kSize = 256;
@@ -3220,4 +4268,74 @@ TEST_F(LibYUVConvertTest, TestARGBToRGB24) {
free_aligned_buffer_page_end(dest_rgb24);
}
+TEST_F(LibYUVConvertTest, Test565) {
+ SIMD_ALIGNED(uint8_t orig_pixels[256][4]);
+ SIMD_ALIGNED(uint8_t pixels565[256][2]);
+
+ for (int i = 0; i < 256; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ orig_pixels[i][j] = i;
+ }
+ }
+ ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1);
+ uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
+ EXPECT_EQ(610919429u, checksum);
+}
+
+// Test RGB24 to J420 is exact
+#if defined(LIBYUV_BIT_EXACT)
+TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
+ const int kSize = 256;
+ align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24
+ align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2);
+ int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) /
+ (kSize * 2) * benchmark_iterations_;
+
+ for (int i = 0; i < kSize * 3 * 2; ++i) {
+ orig_rgb24[i] = i;
+ }
+
+ for (int i = 0; i < iterations256; ++i) {
+ RGB24ToJ420(orig_rgb24, kSize * 3, dest_j420, kSize, // Y plane
+ dest_j420 + kSize * 2, kSize / 2, // U plane
+ dest_j420 + kSize * 5 / 2, kSize / 2, // V plane
+ kSize, 2);
+ }
+
+ uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381);
+ EXPECT_EQ(2755440272u, checksum);
+
+ free_aligned_buffer_page_end(orig_rgb24);
+ free_aligned_buffer_page_end(dest_j420);
+}
+#endif
+
+// Test RGB24 to I420 is exact
+#if defined(LIBYUV_BIT_EXACT)
+TEST_F(LibYUVConvertTest, TestRGB24ToI420) {
+ const int kSize = 256;
+ align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24
+ align_buffer_page_end(dest_i420, kSize * 3 / 2 * 2);
+ int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) /
+ (kSize * 2) * benchmark_iterations_;
+
+ for (int i = 0; i < kSize * 3 * 2; ++i) {
+ orig_rgb24[i] = i;
+ }
+
+ for (int i = 0; i < iterations256; ++i) {
+ RGB24ToI420(orig_rgb24, kSize * 3, dest_i420, kSize, // Y plane
+ dest_i420 + kSize * 2, kSize / 2, // U plane
+ dest_i420 + kSize * 5 / 2, kSize / 2, // V plane
+ kSize, 2);
+ }
+
+ uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381);
+ EXPECT_EQ(1526656597u, checksum);
+
+ free_aligned_buffer_page_end(orig_rgb24);
+ free_aligned_buffer_page_end(dest_i420);
+}
+#endif
+
} // namespace libyuv
diff --git a/files/unit_test/cpu_test.cc b/files/unit_test/cpu_test.cc
index a7991d2b..080778f5 100644
--- a/files/unit_test/cpu_test.cc
+++ b/files/unit_test/cpu_test.cc
@@ -40,6 +40,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
int has_gfni = TestCpuFlag(kCpuHasGFNI);
int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL);
+ int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI);
int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI);
int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
@@ -57,6 +58,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
printf("Has GFNI %d\n", has_gfni);
printf("Has AVX512BW %d\n", has_avx512bw);
printf("Has AVX512VL %d\n", has_avx512vl);
+ printf("Has AVX512VNNI %d\n", has_avx512vnni);
printf("Has AVX512VBMI %d\n", has_avx512vbmi);
printf("Has AVX512VBMI2 %d\n", has_avx512vbmi2);
printf("Has AVX512VBITALG %d\n", has_avx512vbitalg);
@@ -67,31 +69,113 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
printf("Has MIPS %d\n", has_mips);
int has_msa = TestCpuFlag(kCpuHasMSA);
printf("Has MSA %d\n", has_msa);
- int has_mmi = TestCpuFlag(kCpuHasMMI);
- printf("Has MMI %d\n", has_mmi);
+#endif
+
+#if defined(__loongarch__)
+ int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH);
+ printf("Has LOONGARCH %d\n", has_loongarch);
+ int has_lsx = TestCpuFlag(kCpuHasLSX);
+ printf("Has LSX %d\n", has_lsx);
+ int has_lasx = TestCpuFlag(kCpuHasLASX);
+ printf("Has LASX %d\n", has_lasx);
#endif
}
-TEST_F(LibYUVBaseTest, TestCpuCompilerEnabled) {
-#if defined(__aarch64__)
- printf("Arm64 build\n");
+TEST_F(LibYUVBaseTest, TestCompilerMacros) {
+ // Tests all macros used in public headers.
+#ifdef __ATOMIC_RELAXED
+ printf("__ATOMIC_RELAXED %d\n", __ATOMIC_RELAXED);
+#endif
+#ifdef __cplusplus
+ printf("__cplusplus %ld\n", __cplusplus);
+#endif
+#ifdef __clang_major__
+ printf("__clang_major__ %d\n", __clang_major__);
+#endif
+#ifdef __clang_minor__
+ printf("__clang_minor__ %d\n", __clang_minor__);
#endif
-#if defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)
- printf("Neon build enabled\n");
+#ifdef __GNUC__
+ printf("__GNUC__ %d\n", __GNUC__);
#endif
-#if defined(__x86_64__) || defined(_M_X64)
- printf("x64 build\n");
+#ifdef __GNUC_MINOR__
+ printf("__GNUC_MINOR__ %d\n", __GNUC_MINOR__);
+#endif
+#ifdef __i386__
+ printf("__i386__ %d\n", __i386__);
+#endif
+#ifdef __mips
+ printf("__mips %d\n", __mips);
+#endif
+#ifdef __mips_isa_rev
+ printf("__mips_isa_rev %d\n", __mips_isa_rev);
+#endif
+#ifdef __x86_64__
+ printf("__x86_64__ %d\n", __x86_64__);
#endif
#ifdef _MSC_VER
printf("_MSC_VER %d\n", _MSC_VER);
#endif
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(GCC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
- defined(VISUALC_HAS_AVX2))
- printf("Has AVX2 1\n");
-#else
- printf("Has AVX2 0\n");
-// If compiler does not support AVX2, the following function not expected:
+#ifdef __aarch64__
+ printf("__aarch64__ %d\n", __aarch64__);
+#endif
+#ifdef __APPLE__
+ printf("__APPLE__ %d\n", __APPLE__);
+#endif
+#ifdef __arm__
+ printf("__arm__ %d\n", __arm__);
+#endif
+#ifdef __clang__
+ printf("__clang__ %d\n", __clang__);
+#endif
+#ifdef __CLR_VER
+ printf("__CLR_VER %d\n", __CLR_VER);
+#endif
+#ifdef __CYGWIN__
+ printf("__CYGWIN__ %d\n", __CYGWIN__);
+#endif
+#ifdef __llvm__
+ printf("__llvm__ %d\n", __llvm__);
+#endif
+#ifdef __mips_msa
+ printf("__mips_msa %d\n", __mips_msa);
+#endif
+#ifdef __native_client__
+ printf("__native_client__ %d\n", __native_client__);
+#endif
+#ifdef __pic__
+ printf("__pic__ %d\n", __pic__);
+#endif
+#ifdef __pnacl__
+ printf("__pnacl__ %d\n", __pnacl__);
+#endif
+#ifdef _M_IX86
+ printf("_M_IX86 %d\n", _M_IX86);
+#endif
+#ifdef _M_X64
+ printf("_M_X64 %d\n", _M_X64);
+#endif
+#ifdef _MIPS_ARCH_LOONGSON3A
+ printf("_MIPS_ARCH_LOONGSON3A %d\n", _MIPS_ARCH_LOONGSON3A);
+#endif
+#ifdef __loongarch__
+ printf("__loongarch__ %d\n", __loongarch__);
+#endif
+#ifdef _WIN32
+ printf("_WIN32 %d\n", _WIN32);
+#endif
+#ifdef GG_LONGLONG
+ printf("GG_LONGLONG %d\n", GG_LONGLONG);
+#endif
+#ifdef INT_TYPES_DEFINED
+ printf("INT_TYPES_DEFINED\n");
+#endif
+#ifdef __has_feature
+ printf("__has_feature\n");
+#if __has_feature(memory_sanitizer)
+ printf("__has_feature(memory_sanitizer) %d\n",
+ __has_feature(memory_sanitizer));
+#endif
#endif
}
@@ -156,11 +240,36 @@ TEST_F(LibYUVBaseTest, TestLinuxNeon) {
printf("WARNING: unable to load \"../../unit_test/testdata/arm_v7.txt\"\n");
}
#if defined(__linux__) && defined(__ARM_NEON__)
- EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("/proc/cpuinfo"));
+ if (FileExists("/proc/cpuinfo")) {
+ if (kCpuHasNEON != ArmCpuCaps("/proc/cpuinfo")) {
+ // This can happen on ARM emulator but /proc/cpuinfo is from host.
+ printf("WARNING: Neon build enabled but CPU does not have NEON\n");
+ }
+ } else {
+ printf("WARNING: unable to load \"/proc/cpuinfo\"\n");
+ }
#endif
}
+TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) {
+ if (FileExists("../../unit_test/testdata/mips.txt")) {
+ printf("Note: testing to load \"../../unit_test/testdata/mips.txt\"\n");
+
+ EXPECT_EQ(0, MipsCpuCaps("../../unit_test/testdata/mips.txt"));
+ EXPECT_EQ(kCpuHasMSA, MipsCpuCaps("../../unit_test/testdata/mips_msa.txt"));
+ EXPECT_EQ(kCpuHasMSA,
+ MipsCpuCaps("../../unit_test/testdata/mips_loongson2k.txt"));
+ } else {
+ printf("WARNING: unable to load \"../../unit_test/testdata/mips.txt\"\n");
+ }
+}
+
+// TODO(fbarchard): Fix clangcl test of cpuflags.
+#ifdef _MSC_VER
+TEST_F(LibYUVBaseTest, DISABLED_TestSetCpuFlags) {
+#else
TEST_F(LibYUVBaseTest, TestSetCpuFlags) {
+#endif
// Reset any masked flags that may have been set so auto init is enabled.
MaskCpuFlags(0);
diff --git a/files/unit_test/cpu_thread_test.cc b/files/unit_test/cpu_thread_test.cc
index 59061b98..69aab74e 100644
--- a/files/unit_test/cpu_thread_test.cc
+++ b/files/unit_test/cpu_thread_test.cc
@@ -12,7 +12,7 @@
#include "libyuv/cpu_id.h"
-#if defined(__clang__)
+#if defined(__clang__) && !defined(__wasm__)
#if __has_include(<pthread.h>)
#define LIBYUV_HAVE_PTHREAD 1
#endif
@@ -30,7 +30,7 @@ namespace libyuv {
void* ThreadMain(void* arg) {
int* flags = static_cast<int*>(arg);
- *flags = TestCpuFlag(kCpuHasSSSE3);
+ *flags = TestCpuFlag(kCpuInitialized);
return nullptr;
}
#endif // LIBYUV_HAVE_PTHREAD
diff --git a/files/unit_test/math_test.cc b/files/unit_test/math_test.cc
index 0abbad51..a1544c12 100644
--- a/files/unit_test/math_test.cc
+++ b/files/unit_test/math_test.cc
@@ -16,10 +16,14 @@
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/scale.h"
+
+#ifdef ENABLE_ROW_TESTS
#include "libyuv/scale_row.h"
+#endif
namespace libyuv {
+#ifdef ENABLE_ROW_TESTS
TEST_F(LibYUVBaseTest, TestFixedDiv) {
int num[1280];
int div[1280];
@@ -151,5 +155,6 @@ TEST_F(LibYUVBaseTest, TestFixedDiv1_Opt) {
EXPECT_NEAR(result_c[j], result_opt[j], 1);
}
}
+#endif // ENABLE_ROW_TESTS
} // namespace libyuv
diff --git a/files/unit_test/planar_test.cc b/files/unit_test/planar_test.cc
index 70f8966e..3a8c470b 100644
--- a/files/unit_test/planar_test.cc
+++ b/files/unit_test/planar_test.cc
@@ -12,9 +12,6 @@
#include <stdlib.h>
#include <time.h>
-// row.h defines SIMD_ALIGNED, overriding unit_test.h
-#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
-
#include "../unit_test/unit_test.h"
#include "libyuv/compare.h"
#include "libyuv/convert.h"
@@ -24,6 +21,19 @@
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
+#include "libyuv/scale.h"
+
+#ifdef ENABLE_ROW_TESTS
+// row.h defines SIMD_ALIGNED, overriding unit_test.h
+// TODO(fbarchard): Remove row.h from unittests. Test public functions.
+#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
+#endif
+
+#if defined(LIBYUV_BIT_EXACT)
+#define EXPECTED_ATTENUATE_DIFF 0
+#else
+#define EXPECTED_ATTENUATE_DIFF 2
+#endif
namespace libyuv {
@@ -96,9 +106,9 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
- EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1);
- EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1);
- EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1);
+ EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF);
+ EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF);
+ EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF);
EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
free_aligned_buffer_page_end(atten2_pixels);
@@ -151,31 +161,32 @@ static int TestAttenuateI(int width,
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
- int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestAttenuateI(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, 2);
+
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
static int TestUnattenuateI(int width,
@@ -224,31 +235,31 @@ static int TestUnattenuateI(int width,
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
- int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 1);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, -1, 0);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, 2);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
@@ -277,6 +288,7 @@ TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
}
}
+// near is for legacy platforms.
TEST_F(LibYUVPlanarTest, TestARGBGray) {
SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
@@ -313,17 +325,17 @@ TEST_F(LibYUVPlanarTest, TestARGBGray) {
orig_pixels[5][3] = 224u;
// Do 16 to test asm version.
ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
- EXPECT_EQ(30u, orig_pixels[0][0]);
- EXPECT_EQ(30u, orig_pixels[0][1]);
- EXPECT_EQ(30u, orig_pixels[0][2]);
+ EXPECT_NEAR(29u, orig_pixels[0][0], 1);
+ EXPECT_NEAR(29u, orig_pixels[0][1], 1);
+ EXPECT_NEAR(29u, orig_pixels[0][2], 1);
EXPECT_EQ(128u, orig_pixels[0][3]);
EXPECT_EQ(149u, orig_pixels[1][0]);
EXPECT_EQ(149u, orig_pixels[1][1]);
EXPECT_EQ(149u, orig_pixels[1][2]);
EXPECT_EQ(0u, orig_pixels[1][3]);
- EXPECT_EQ(76u, orig_pixels[2][0]);
- EXPECT_EQ(76u, orig_pixels[2][1]);
- EXPECT_EQ(76u, orig_pixels[2][2]);
+ EXPECT_NEAR(77u, orig_pixels[2][0], 1);
+ EXPECT_NEAR(77u, orig_pixels[2][1], 1);
+ EXPECT_NEAR(77u, orig_pixels[2][2], 1);
EXPECT_EQ(255u, orig_pixels[2][3]);
EXPECT_EQ(0u, orig_pixels[3][0]);
EXPECT_EQ(0u, orig_pixels[3][1]);
@@ -333,9 +345,9 @@ TEST_F(LibYUVPlanarTest, TestARGBGray) {
EXPECT_EQ(255u, orig_pixels[4][1]);
EXPECT_EQ(255u, orig_pixels[4][2]);
EXPECT_EQ(255u, orig_pixels[4][3]);
- EXPECT_EQ(96u, orig_pixels[5][0]);
- EXPECT_EQ(96u, orig_pixels[5][1]);
- EXPECT_EQ(96u, orig_pixels[5][2]);
+ EXPECT_NEAR(97u, orig_pixels[5][0], 1);
+ EXPECT_NEAR(97u, orig_pixels[5][1], 1);
+ EXPECT_NEAR(97u, orig_pixels[5][2], 1);
EXPECT_EQ(224u, orig_pixels[5][3]);
for (int i = 0; i < 1280; ++i) {
orig_pixels[i][0] = i;
@@ -385,30 +397,30 @@ TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
orig_pixels[5][3] = 224u;
// Do 16 to test asm version.
ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
- EXPECT_EQ(30u, gray_pixels[0][0]);
- EXPECT_EQ(30u, gray_pixels[0][1]);
- EXPECT_EQ(30u, gray_pixels[0][2]);
- EXPECT_EQ(128u, gray_pixels[0][3]);
- EXPECT_EQ(149u, gray_pixels[1][0]);
- EXPECT_EQ(149u, gray_pixels[1][1]);
- EXPECT_EQ(149u, gray_pixels[1][2]);
- EXPECT_EQ(0u, gray_pixels[1][3]);
- EXPECT_EQ(76u, gray_pixels[2][0]);
- EXPECT_EQ(76u, gray_pixels[2][1]);
- EXPECT_EQ(76u, gray_pixels[2][2]);
- EXPECT_EQ(255u, gray_pixels[2][3]);
- EXPECT_EQ(0u, gray_pixels[3][0]);
- EXPECT_EQ(0u, gray_pixels[3][1]);
- EXPECT_EQ(0u, gray_pixels[3][2]);
- EXPECT_EQ(255u, gray_pixels[3][3]);
- EXPECT_EQ(255u, gray_pixels[4][0]);
- EXPECT_EQ(255u, gray_pixels[4][1]);
- EXPECT_EQ(255u, gray_pixels[4][2]);
- EXPECT_EQ(255u, gray_pixels[4][3]);
- EXPECT_EQ(96u, gray_pixels[5][0]);
- EXPECT_EQ(96u, gray_pixels[5][1]);
- EXPECT_EQ(96u, gray_pixels[5][2]);
- EXPECT_EQ(224u, gray_pixels[5][3]);
+ EXPECT_NEAR(30u, gray_pixels[0][0], 1);
+ EXPECT_NEAR(30u, gray_pixels[0][1], 1);
+ EXPECT_NEAR(30u, gray_pixels[0][2], 1);
+ EXPECT_NEAR(128u, gray_pixels[0][3], 1);
+ EXPECT_NEAR(149u, gray_pixels[1][0], 1);
+ EXPECT_NEAR(149u, gray_pixels[1][1], 1);
+ EXPECT_NEAR(149u, gray_pixels[1][2], 1);
+ EXPECT_NEAR(0u, gray_pixels[1][3], 1);
+ EXPECT_NEAR(76u, gray_pixels[2][0], 1);
+ EXPECT_NEAR(76u, gray_pixels[2][1], 1);
+ EXPECT_NEAR(76u, gray_pixels[2][2], 1);
+ EXPECT_NEAR(255u, gray_pixels[2][3], 1);
+ EXPECT_NEAR(0u, gray_pixels[3][0], 1);
+ EXPECT_NEAR(0u, gray_pixels[3][1], 1);
+ EXPECT_NEAR(0u, gray_pixels[3][2], 1);
+ EXPECT_NEAR(255u, gray_pixels[3][3], 1);
+ EXPECT_NEAR(255u, gray_pixels[4][0], 1);
+ EXPECT_NEAR(255u, gray_pixels[4][1], 1);
+ EXPECT_NEAR(255u, gray_pixels[4][2], 1);
+ EXPECT_NEAR(255u, gray_pixels[4][3], 1);
+ EXPECT_NEAR(96u, gray_pixels[5][0], 1);
+ EXPECT_NEAR(96u, gray_pixels[5][1], 1);
+ EXPECT_NEAR(96u, gray_pixels[5][2], 1);
+ EXPECT_NEAR(224u, gray_pixels[5][3], 1);
for (int i = 0; i < 1280; ++i) {
orig_pixels[i][0] = i;
orig_pixels[i][1] = i / 2;
@@ -418,6 +430,20 @@ TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
}
+
+ for (int i = 0; i < 256; ++i) {
+ orig_pixels[i][0] = i;
+ orig_pixels[i][1] = i;
+ orig_pixels[i][2] = i;
+ orig_pixels[i][3] = i;
+ }
+ ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1);
+ for (int i = 0; i < 256; ++i) {
+ EXPECT_EQ(i, orig_pixels[i][0]);
+ EXPECT_EQ(i, orig_pixels[i][1]);
+ EXPECT_EQ(i, orig_pixels[i][2]);
+ EXPECT_EQ(i, orig_pixels[i][3]);
+ }
}
TEST_F(LibYUVPlanarTest, TestARGBSepia) {
@@ -763,27 +789,75 @@ TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
}
}
-TEST_F(LibYUVPlanarTest, TestARGBMirror) {
- SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
- SIMD_ALIGNED(uint8_t dst_pixels[1280][4]);
+TEST_F(LibYUVPlanarTest, ARGBMirror_Opt) {
+ align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 4);
+ align_buffer_page_end(dst_pixels_opt,
+ benchmark_width_ * benchmark_height_ * 4);
+ align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 4);
- for (int i = 0; i < 1280; ++i) {
- orig_pixels[i][0] = i;
- orig_pixels[i][1] = i / 2;
- orig_pixels[i][2] = i / 3;
- orig_pixels[i][3] = i / 4;
+ MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 4);
+ MaskCpuFlags(disable_cpu_flags_);
+ ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_c,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
}
- ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
+ for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
- for (int i = 0; i < 1280; ++i) {
- EXPECT_EQ(i & 255, dst_pixels[1280 - 1 - i][0]);
- EXPECT_EQ((i / 2) & 255, dst_pixels[1280 - 1 - i][1]);
- EXPECT_EQ((i / 3) & 255, dst_pixels[1280 - 1 - i][2]);
- EXPECT_EQ((i / 4) & 255, dst_pixels[1280 - 1 - i][3]);
+TEST_F(LibYUVPlanarTest, MirrorPlane_Opt) {
+ align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(dst_pixels_opt, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_);
+
+ MemRandomize(src_pixels, benchmark_width_ * benchmark_height_);
+ MaskCpuFlags(disable_cpu_flags_);
+ MirrorPlane(src_pixels, benchmark_width_, dst_pixels_c, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ MirrorPlane(src_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_,
+ benchmark_width_, benchmark_height_);
}
- for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
- ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
+ for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+TEST_F(LibYUVPlanarTest, MirrorUVPlane_Opt) {
+ align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 2);
+ align_buffer_page_end(dst_pixels_opt,
+ benchmark_width_ * benchmark_height_ * 2);
+ align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 2);
+
+ MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 2);
+ MaskCpuFlags(disable_cpu_flags_);
+ MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
+ benchmark_width_ * 2, benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
+ benchmark_width_ * 2, benchmark_width_, benchmark_height_);
}
+ for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
}
TEST_F(LibYUVPlanarTest, TestShade) {
@@ -1006,10 +1080,91 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
}
}
+TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) {
+ SIMD_ALIGNED(uint16_t orig_pixels_0[1280]);
+ SIMD_ALIGNED(uint16_t orig_pixels_1[1280]);
+ SIMD_ALIGNED(uint16_t interpolate_pixels[1280]);
+ memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
+ memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
+
+ orig_pixels_0[0] = 16u;
+ orig_pixels_0[1] = 32u;
+ orig_pixels_0[2] = 64u;
+ orig_pixels_0[3] = 128u;
+ orig_pixels_0[4] = 0u;
+ orig_pixels_0[5] = 0u;
+ orig_pixels_0[6] = 0u;
+ orig_pixels_0[7] = 255u;
+ orig_pixels_0[8] = 0u;
+ orig_pixels_0[9] = 0u;
+ orig_pixels_0[10] = 0u;
+ orig_pixels_0[11] = 0u;
+ orig_pixels_0[12] = 0u;
+ orig_pixels_0[13] = 0u;
+ orig_pixels_0[14] = 0u;
+ orig_pixels_0[15] = 0u;
+
+ orig_pixels_1[0] = 0u;
+ orig_pixels_1[1] = 0u;
+ orig_pixels_1[2] = 0u;
+ orig_pixels_1[3] = 0u;
+ orig_pixels_1[4] = 0u;
+ orig_pixels_1[5] = 0u;
+ orig_pixels_1[6] = 0u;
+ orig_pixels_1[7] = 0u;
+ orig_pixels_1[8] = 0u;
+ orig_pixels_1[9] = 0u;
+ orig_pixels_1[10] = 0u;
+ orig_pixels_1[11] = 0u;
+ orig_pixels_1[12] = 255u;
+ orig_pixels_1[13] = 255u;
+ orig_pixels_1[14] = 255u;
+ orig_pixels_1[15] = 255u;
+
+ InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+ &interpolate_pixels[0], 0, 16, 1, 128);
+ EXPECT_EQ(8u, interpolate_pixels[0]);
+ EXPECT_EQ(16u, interpolate_pixels[1]);
+ EXPECT_EQ(32u, interpolate_pixels[2]);
+ EXPECT_EQ(64u, interpolate_pixels[3]);
+ EXPECT_EQ(0u, interpolate_pixels[4]);
+ EXPECT_EQ(0u, interpolate_pixels[5]);
+ EXPECT_EQ(0u, interpolate_pixels[6]);
+ EXPECT_EQ(128u, interpolate_pixels[7]);
+ EXPECT_EQ(0u, interpolate_pixels[8]);
+ EXPECT_EQ(0u, interpolate_pixels[9]);
+ EXPECT_EQ(0u, interpolate_pixels[10]);
+ EXPECT_EQ(0u, interpolate_pixels[11]);
+ EXPECT_EQ(128u, interpolate_pixels[12]);
+ EXPECT_EQ(128u, interpolate_pixels[13]);
+ EXPECT_EQ(128u, interpolate_pixels[14]);
+ EXPECT_EQ(128u, interpolate_pixels[15]);
+
+ InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+ &interpolate_pixels[0], 0, 16, 1, 0);
+ EXPECT_EQ(16u, interpolate_pixels[0]);
+ EXPECT_EQ(32u, interpolate_pixels[1]);
+ EXPECT_EQ(64u, interpolate_pixels[2]);
+ EXPECT_EQ(128u, interpolate_pixels[3]);
+
+ InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+ &interpolate_pixels[0], 0, 16, 1, 192);
+
+ EXPECT_EQ(4u, interpolate_pixels[0]);
+ EXPECT_EQ(8u, interpolate_pixels[1]);
+ EXPECT_EQ(16u, interpolate_pixels[2]);
+ EXPECT_EQ(32u, interpolate_pixels[3]);
+
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+ InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+ &interpolate_pixels[0], 0, 1280, 1, 123);
+ }
+}
+
#define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
N, NEG, OFF) \
TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kStrideA = \
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
@@ -1041,7 +1196,7 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
}
#define TESTINTERPOLATE(TERP) \
- TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ - 1, TERP, _Any, +, 0) \
+ TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ + 1, TERP, _Any, +, 0) \
TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \
TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
@@ -1058,7 +1213,8 @@ static int TestBlend(int width,
int disable_cpu_flags,
int benchmark_cpu_info,
int invert,
- int off) {
+ int off,
+ int attenuate) {
if (width < 1) {
width = 1;
}
@@ -1072,10 +1228,12 @@ static int TestBlend(int width,
src_argb_a[i + off] = (fastrand() & 0xff);
src_argb_b[i + off] = (fastrand() & 0xff);
}
- ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
- height);
- ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width,
- height);
+ MemRandomize(src_argb_a, kStride * height + off);
+ MemRandomize(src_argb_b, kStride * height + off);
+ if (attenuate) {
+ ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
+ height);
+ }
memset(dst_argb_c, 255, kStride * height);
memset(dst_argb_opt, 255, kStride * height);
@@ -1104,29 +1262,36 @@ static int TestBlend(int width,
TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
int max_diff =
- TestBlend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
- disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
+ TestBlend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
EXPECT_LE(max_diff, 1);
}
TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
int max_diff =
TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
- disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
EXPECT_LE(max_diff, 1);
}
TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
int max_diff =
TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
- disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
+ disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
+ EXPECT_LE(max_diff, 1);
+}
+
+TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) {
+ int max_diff =
+ TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0);
EXPECT_LE(max_diff, 1);
}
TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
int max_diff =
TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
- disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
EXPECT_LE(max_diff, 1);
}
@@ -1203,7 +1368,7 @@ TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
}
TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
- TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
+ TestBlendPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
}
TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
@@ -1298,7 +1463,7 @@ TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
// TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable.
TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
- TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
+ TestI420Blend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
}
TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
@@ -1400,6 +1565,212 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) {
EXPECT_EQ(0, err);
}
+TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
+ int i;
+ int y_plane_size = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(orig_y, y_plane_size);
+ align_buffer_page_end(dst_c, y_plane_size);
+ align_buffer_page_end(dst_opt, y_plane_size);
+
+ MemRandomize(orig_y, y_plane_size);
+ memset(dst_c, 1, y_plane_size);
+ memset(dst_opt, 2, y_plane_size);
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+ for (i = 0; i < benchmark_iterations_; i++) {
+ CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ }
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (i = 0; i < benchmark_iterations_; i++) {
+ CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ }
+
+ for (i = 0; i < y_plane_size; ++i) {
+ EXPECT_EQ(dst_c[i], dst_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(dst_c);
+ free_aligned_buffer_page_end(dst_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) {
+ // Test to verify copying a rect with a zero height or width does
+ // not touch destination memory.
+ uint8_t src = 42;
+ uint8_t dst = 0;
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+ CopyPlane(&src, 0, &dst, 0, 0, 0);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+
+ CopyPlane(&src, 1, &dst, 1, 1, 0);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+
+ CopyPlane(&src, 1, &dst, 1, 0, 1);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+ CopyPlane(&src, 0, &dst, 0, 0, 0);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+
+ CopyPlane(&src, 1, &dst, 1, 1, 0);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+
+ CopyPlane(&src, 1, &dst, 1, 0, 1);
+ EXPECT_EQ(src, 42);
+ EXPECT_EQ(dst, 0);
+}
+
+TEST_F(LibYUVPlanarTest, TestDetilePlane) {
+ int i, j;
+
+ // orig is tiled. Allocate enough memory for tiles.
+ int orig_width = (benchmark_width_ + 15) & ~15;
+ int orig_height = (benchmark_height_ + 15) & ~15;
+ int orig_plane_size = orig_width * orig_height;
+ int y_plane_size = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(orig_y, orig_plane_size);
+ align_buffer_page_end(dst_c, y_plane_size);
+ align_buffer_page_end(dst_opt, y_plane_size);
+
+ MemRandomize(orig_y, orig_plane_size);
+ memset(dst_c, 0, y_plane_size);
+ memset(dst_opt, 0, y_plane_size);
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_,
+ benchmark_height_, 16);
+ }
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_,
+ benchmark_height_, 16);
+ }
+
+ for (i = 0; i < y_plane_size; ++i) {
+ EXPECT_EQ(dst_c[i], dst_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(dst_c);
+ free_aligned_buffer_page_end(dst_opt);
+}
+
+// Compares DetileSplitUV to 2 step Detile + SplitUV
+TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
+ int i, j;
+
+ // orig is tiled. Allocate enough memory for tiles.
+ int orig_width = (benchmark_width_ + 15) & ~15;
+ int orig_height = (benchmark_height_ + 15) & ~15;
+ int orig_plane_size = orig_width * orig_height;
+ int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
+ align_buffer_page_end(orig_uv, orig_plane_size);
+ align_buffer_page_end(detiled_uv, orig_plane_size);
+ align_buffer_page_end(dst_u_two_stage, uv_plane_size);
+ align_buffer_page_end(dst_u_opt, uv_plane_size);
+ align_buffer_page_end(dst_v_two_stage, uv_plane_size);
+ align_buffer_page_end(dst_v_opt, uv_plane_size);
+
+ MemRandomize(orig_uv, orig_plane_size);
+ memset(detiled_uv, 0, orig_plane_size);
+ memset(dst_u_two_stage, 0, uv_plane_size);
+ memset(dst_u_opt, 0, uv_plane_size);
+ memset(dst_v_two_stage, 0, uv_plane_size);
+ memset(dst_v_opt, 0, uv_plane_size);
+
+ DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2,
+ dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
+ benchmark_height_, 16);
+
+ // Benchmark 2 step conversion for comparison.
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
+ benchmark_width_, benchmark_height_, 16);
+ SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
+ (benchmark_width_ + 1) / 2, dst_v_two_stage,
+ (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
+ benchmark_height_);
+ }
+
+ for (i = 0; i < uv_plane_size; ++i) {
+ EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
+ EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(detiled_uv);
+ free_aligned_buffer_page_end(dst_u_two_stage);
+ free_aligned_buffer_page_end(dst_u_opt);
+ free_aligned_buffer_page_end(dst_v_two_stage);
+ free_aligned_buffer_page_end(dst_v_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
+ int i, j;
+
+ // orig is tiled. Allocate enough memory for tiles.
+ int orig_width = (benchmark_width_ + 15) & ~15;
+ int orig_height = (benchmark_height_ + 15) & ~15;
+ int orig_plane_size = orig_width * orig_height;
+ int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
+ align_buffer_page_end(orig_uv, orig_plane_size);
+ align_buffer_page_end(dst_u_c, uv_plane_size);
+ align_buffer_page_end(dst_u_opt, uv_plane_size);
+ align_buffer_page_end(dst_v_c, uv_plane_size);
+ align_buffer_page_end(dst_v_opt, uv_plane_size);
+
+ MemRandomize(orig_uv, orig_plane_size);
+ memset(dst_u_c, 0, uv_plane_size);
+ memset(dst_u_opt, 0, uv_plane_size);
+ memset(dst_v_c, 0, uv_plane_size);
+ memset(dst_v_opt, 0, uv_plane_size);
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+
+ DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
+ dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
+ benchmark_height_, 16);
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetileSplitUVPlane(
+ orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
+ (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
+ }
+
+ for (i = 0; i < uv_plane_size; ++i) {
+ EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
+ EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(dst_u_c);
+ free_aligned_buffer_page_end(dst_u_opt);
+ free_aligned_buffer_page_end(dst_v_c);
+ free_aligned_buffer_page_end(dst_v_opt);
+}
+
static int TestMultiply(int width,
int height,
int benchmark_iterations,
@@ -1447,7 +1818,7 @@ static int TestMultiply(int width,
}
TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
- int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 1);
@@ -1522,7 +1893,7 @@ static int TestAdd(int width,
TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
int max_diff =
- TestAdd(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
+ TestAdd(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 1);
}
@@ -1595,7 +1966,7 @@ static int TestSubtract(int width,
}
TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
- int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestSubtract(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 1);
@@ -1668,7 +2039,7 @@ static int TestSobel(int width,
TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
int max_diff =
- TestSobel(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
+ TestSobel(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
EXPECT_EQ(0, max_diff);
}
@@ -1741,7 +2112,7 @@ static int TestSobelToPlane(int width,
}
TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
- int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestSobelToPlane(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_EQ(0, max_diff);
@@ -1813,7 +2184,7 @@ static int TestSobelXY(int width,
}
TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
- int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestSobelXY(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_EQ(0, max_diff);
@@ -1889,29 +2260,35 @@ static int TestBlur(int width,
return max_diff;
}
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+#define DISABLED_ARM(name) name
+#else
+#define DISABLED_ARM(name) DISABLED_##name
+#endif
+
static const int kBlurSize = 55;
-TEST_F(LibYUVPlanarTest, ARGBBlur_Any) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Any)) {
int max_diff =
- TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
+ TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Unaligned)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Invert)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Opt)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
@@ -1919,35 +2296,35 @@ TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) {
}
static const int kBlurSmallSize = 5;
-TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Any)) {
int max_diff =
- TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
+ TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Unaligned)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Invert)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Opt)) {
int max_diff =
TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
EXPECT_LE(max_diff, 1);
}
-TEST_F(LibYUVPlanarTest, TestARGBPolynomial) {
+TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) {
SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
@@ -2426,7 +2803,7 @@ static int TestARGBRect(int width,
}
TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
- int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0, 4);
EXPECT_EQ(0, max_diff);
@@ -2454,7 +2831,7 @@ TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
}
TEST_F(LibYUVPlanarTest, SetPlane_Any) {
- int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
+ int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0, 1);
EXPECT_EQ(0, max_diff);
@@ -2483,33 +2860,24 @@ TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_;
- align_buffer_page_end(src_pixels, kPixels * 2);
- align_buffer_page_end(tmp_pixels_u, kPixels);
- align_buffer_page_end(tmp_pixels_v, kPixels);
+ align_buffer_page_end(src_pixels_u, kPixels);
+ align_buffer_page_end(src_pixels_v, kPixels);
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_c, kPixels * 2);
- MemRandomize(src_pixels, kPixels * 2);
- MemRandomize(tmp_pixels_u, kPixels);
- MemRandomize(tmp_pixels_v, kPixels);
+ MemRandomize(src_pixels_u, kPixels);
+ MemRandomize(src_pixels_v, kPixels);
MemRandomize(dst_pixels_opt, kPixels * 2);
MemRandomize(dst_pixels_c, kPixels * 2);
MaskCpuFlags(disable_cpu_flags_);
- SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
- tmp_pixels_v, benchmark_width_, benchmark_width_,
- benchmark_height_);
- MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
+ MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
- SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
- tmp_pixels_v, benchmark_width_, benchmark_width_,
- benchmark_height_);
-
for (int i = 0; i < benchmark_iterations_; ++i) {
- MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
+ MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
benchmark_height_);
}
@@ -2518,9 +2886,43 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
- free_aligned_buffer_page_end(src_pixels);
- free_aligned_buffer_page_end(tmp_pixels_u);
- free_aligned_buffer_page_end(tmp_pixels_v);
+ free_aligned_buffer_page_end(src_pixels_u);
+ free_aligned_buffer_page_end(src_pixels_v);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+// 16 bit channel split and merge
+TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels_u, kPixels * 2);
+ align_buffer_page_end(src_pixels_v, kPixels * 2);
+ align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2);
+ align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2);
+ MemRandomize(src_pixels_u, kPixels * 2);
+ MemRandomize(src_pixels_v, kPixels * 2);
+ MemRandomize(dst_pixels_opt, kPixels * 2 * 2);
+ MemRandomize(dst_pixels_c, kPixels * 2 * 2);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
+ (const uint16_t*)src_pixels_v, benchmark_width_,
+ (uint16_t*)dst_pixels_c, benchmark_width_ * 2,
+ benchmark_width_, benchmark_height_, 12);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
+ (const uint16_t*)src_pixels_v, benchmark_width_,
+ (uint16_t*)dst_pixels_opt, benchmark_width_ * 2,
+ benchmark_width_, benchmark_height_, 12);
+ }
+
+ for (int i = 0; i < kPixels * 2 * 2; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ free_aligned_buffer_page_end(src_pixels_u);
+ free_aligned_buffer_page_end(src_pixels_v);
free_aligned_buffer_page_end(dst_pixels_opt);
free_aligned_buffer_page_end(dst_pixels_c);
}
@@ -2528,47 +2930,112 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels, kPixels * 2);
- align_buffer_page_end(tmp_pixels_u, kPixels);
- align_buffer_page_end(tmp_pixels_v, kPixels);
+ align_buffer_page_end(dst_pixels_u_c, kPixels);
+ align_buffer_page_end(dst_pixels_v_c, kPixels);
+ align_buffer_page_end(dst_pixels_u_opt, kPixels);
+ align_buffer_page_end(dst_pixels_v_opt, kPixels);
+
+ MemRandomize(src_pixels, kPixels * 2);
+ MemRandomize(dst_pixels_u_c, kPixels);
+ MemRandomize(dst_pixels_v_c, kPixels);
+ MemRandomize(dst_pixels_u_opt, kPixels);
+ MemRandomize(dst_pixels_v_opt, kPixels);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_c,
+ benchmark_width_, dst_pixels_v_c, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_opt,
+ benchmark_width_, dst_pixels_v_opt, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ }
+
+ for (int i = 0; i < kPixels; ++i) {
+ EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
+ EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_u_c);
+ free_aligned_buffer_page_end(dst_pixels_v_c);
+ free_aligned_buffer_page_end(dst_pixels_u_opt);
+ free_aligned_buffer_page_end(dst_pixels_v_opt);
+}
+
+// 16 bit channel split
+TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 2 * 2);
+ align_buffer_page_end(dst_pixels_u_c, kPixels * 2);
+ align_buffer_page_end(dst_pixels_v_c, kPixels * 2);
+ align_buffer_page_end(dst_pixels_u_opt, kPixels * 2);
+ align_buffer_page_end(dst_pixels_v_opt, kPixels * 2);
+ MemRandomize(src_pixels, kPixels * 2 * 2);
+ MemRandomize(dst_pixels_u_c, kPixels * 2);
+ MemRandomize(dst_pixels_v_c, kPixels * 2);
+ MemRandomize(dst_pixels_u_opt, kPixels * 2);
+ MemRandomize(dst_pixels_v_opt, kPixels * 2);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
+ (uint16_t*)dst_pixels_u_c, benchmark_width_,
+ (uint16_t*)dst_pixels_v_c, benchmark_width_, benchmark_width_,
+ benchmark_height_, 10);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
+ (uint16_t*)dst_pixels_u_opt, benchmark_width_,
+ (uint16_t*)dst_pixels_v_opt, benchmark_width_,
+ benchmark_width_, benchmark_height_, 10);
+ }
+
+ for (int i = 0; i < kPixels * 2; ++i) {
+ EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
+ EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
+ }
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_u_c);
+ free_aligned_buffer_page_end(dst_pixels_v_c);
+ free_aligned_buffer_page_end(dst_pixels_u_opt);
+ free_aligned_buffer_page_end(dst_pixels_v_opt);
+}
+
+TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
+ // Round count up to multiple of 16
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 2);
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_c, kPixels * 2);
MemRandomize(src_pixels, kPixels * 2);
- MemRandomize(tmp_pixels_u, kPixels);
- MemRandomize(tmp_pixels_v, kPixels);
MemRandomize(dst_pixels_opt, kPixels * 2);
MemRandomize(dst_pixels_c, kPixels * 2);
MaskCpuFlags(disable_cpu_flags_);
- SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
- tmp_pixels_v, benchmark_width_, benchmark_width_,
- benchmark_height_);
- MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
- dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
- benchmark_height_);
+ SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
+ benchmark_width_ * 2, benchmark_width_, benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) {
- SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u,
- benchmark_width_, tmp_pixels_v, benchmark_width_,
- benchmark_width_, benchmark_height_);
+ SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
+ benchmark_width_ * 2, benchmark_width_, benchmark_height_);
}
- MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
- dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
- benchmark_height_);
for (int i = 0; i < kPixels * 2; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
free_aligned_buffer_page_end(src_pixels);
- free_aligned_buffer_page_end(tmp_pixels_u);
- free_aligned_buffer_page_end(tmp_pixels_v);
free_aligned_buffer_page_end(dst_pixels_opt);
free_aligned_buffer_page_end(dst_pixels_c);
}
TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
+ // Round count up to multiple of 16
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels, kPixels * 3);
align_buffer_page_end(tmp_pixels_r, kPixels);
@@ -2617,6 +3084,7 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
}
TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
+ // Round count up to multiple of 16
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels, kPixels * 3);
align_buffer_page_end(tmp_pixels_r, kPixels);
@@ -2663,10 +3131,373 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
free_aligned_buffer_page_end(dst_pixels_c);
}
+TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 4);
+ align_buffer_page_end(tmp_pixels_r, kPixels);
+ align_buffer_page_end(tmp_pixels_g, kPixels);
+ align_buffer_page_end(tmp_pixels_b, kPixels);
+ align_buffer_page_end(tmp_pixels_a, kPixels);
+ align_buffer_page_end(dst_pixels_opt, kPixels * 4);
+ align_buffer_page_end(dst_pixels_c, kPixels * 4);
+
+ MemRandomize(src_pixels, kPixels * 4);
+ MemRandomize(tmp_pixels_r, kPixels);
+ MemRandomize(tmp_pixels_g, kPixels);
+ MemRandomize(tmp_pixels_b, kPixels);
+ MemRandomize(tmp_pixels_a, kPixels);
+ MemRandomize(dst_pixels_opt, kPixels * 4);
+ MemRandomize(dst_pixels_c, kPixels * 4);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, tmp_pixels_a, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
+ dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
+ benchmark_height_);
+
+ MaskCpuFlags(benchmark_cpu_info_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, tmp_pixels_a, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
+ benchmark_width_, tmp_pixels_b, benchmark_width_,
+ tmp_pixels_a, benchmark_width_, dst_pixels_opt,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+ }
+
+ for (int i = 0; i < kPixels * 4; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(tmp_pixels_r);
+ free_aligned_buffer_page_end(tmp_pixels_g);
+ free_aligned_buffer_page_end(tmp_pixels_b);
+ free_aligned_buffer_page_end(tmp_pixels_a);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 4);
+ align_buffer_page_end(tmp_pixels_r, kPixels);
+ align_buffer_page_end(tmp_pixels_g, kPixels);
+ align_buffer_page_end(tmp_pixels_b, kPixels);
+ align_buffer_page_end(tmp_pixels_a, kPixels);
+ align_buffer_page_end(dst_pixels_opt, kPixels * 4);
+ align_buffer_page_end(dst_pixels_c, kPixels * 4);
+
+ MemRandomize(src_pixels, kPixels * 4);
+ MemRandomize(tmp_pixels_r, kPixels);
+ MemRandomize(tmp_pixels_g, kPixels);
+ MemRandomize(tmp_pixels_b, kPixels);
+ MemRandomize(tmp_pixels_a, kPixels);
+ MemRandomize(dst_pixels_opt, kPixels * 4);
+ MemRandomize(dst_pixels_c, kPixels * 4);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, tmp_pixels_a, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
+ dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
+ benchmark_height_);
+
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, tmp_pixels_a,
+ benchmark_width_, benchmark_width_, benchmark_height_);
+ }
+
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
+ dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
+ benchmark_height_);
+
+ for (int i = 0; i < kPixels * 4; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(tmp_pixels_r);
+ free_aligned_buffer_page_end(tmp_pixels_g);
+ free_aligned_buffer_page_end(tmp_pixels_b);
+ free_aligned_buffer_page_end(tmp_pixels_a);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 4);
+ align_buffer_page_end(tmp_pixels_r, kPixels);
+ align_buffer_page_end(tmp_pixels_g, kPixels);
+ align_buffer_page_end(tmp_pixels_b, kPixels);
+ align_buffer_page_end(dst_pixels_opt, kPixels * 4);
+ align_buffer_page_end(dst_pixels_c, kPixels * 4);
+
+ MemRandomize(src_pixels, kPixels * 4);
+ MemRandomize(tmp_pixels_r, kPixels);
+ MemRandomize(tmp_pixels_g, kPixels);
+ MemRandomize(tmp_pixels_b, kPixels);
+ MemRandomize(dst_pixels_opt, kPixels * 4);
+ MemRandomize(dst_pixels_c, kPixels * 4);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, NULL, 0, benchmark_width_,
+ benchmark_height_);
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+
+ MaskCpuFlags(benchmark_cpu_info_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, NULL, 0, benchmark_width_,
+ benchmark_height_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
+ benchmark_width_, tmp_pixels_b, benchmark_width_, NULL, 0,
+ dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
+ benchmark_height_);
+ }
+
+ for (int i = 0; i < kPixels * 4; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(tmp_pixels_r);
+ free_aligned_buffer_page_end(tmp_pixels_g);
+ free_aligned_buffer_page_end(tmp_pixels_b);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels, kPixels * 4);
+ align_buffer_page_end(tmp_pixels_r, kPixels);
+ align_buffer_page_end(tmp_pixels_g, kPixels);
+ align_buffer_page_end(tmp_pixels_b, kPixels);
+ align_buffer_page_end(dst_pixels_opt, kPixels * 4);
+ align_buffer_page_end(dst_pixels_c, kPixels * 4);
+
+ MemRandomize(src_pixels, kPixels * 4);
+ MemRandomize(tmp_pixels_r, kPixels);
+ MemRandomize(tmp_pixels_g, kPixels);
+ MemRandomize(tmp_pixels_b, kPixels);
+ MemRandomize(dst_pixels_opt, kPixels * 4);
+ MemRandomize(dst_pixels_c, kPixels * 4);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
+ benchmark_width_, NULL, 0, benchmark_width_,
+ benchmark_height_);
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
+ benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, NULL, 0, benchmark_width_,
+ benchmark_height_);
+ }
+
+ MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
+ tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_opt,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+
+ for (int i = 0; i < kPixels * 4; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(tmp_pixels_r);
+ free_aligned_buffer_page_end(tmp_pixels_g);
+ free_aligned_buffer_page_end(tmp_pixels_b);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(dst_pixels_c);
+}
+
+// Merge 4 channels
+#define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
+ const int kWidth = W1280; \
+ const int kPixels = kWidth * benchmark_height_; \
+ align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
+ align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
+ MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
+ memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
+ memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
+ STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
+ STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
+ STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
+ STYPE* src_pixels_a = reinterpret_cast<STYPE*>(src_memory_a + OFF); \
+ DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
+ DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4, \
+ kWidth, NEG benchmark_height_, DEPTH); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, src_pixels_a, kWidth, dst_pixels_opt, kWidth * 4, \
+ kWidth, NEG benchmark_height_, DEPTH); \
+ } \
+ for (int i = 0; i < kPixels * 4; ++i) { \
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(src_memory_r); \
+ free_aligned_buffer_page_end(src_memory_g); \
+ free_aligned_buffer_page_end(src_memory_b); \
+ free_aligned_buffer_page_end(src_memory_a); \
+ free_aligned_buffer_page_end(dst_memory_c); \
+ free_aligned_buffer_page_end(dst_memory_opt); \
+ }
+
+// Merge 3 channel RGB into 4 channel XRGB with opaque alpha
+#define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) { \
+ const int kWidth = W1280; \
+ const int kPixels = kWidth * benchmark_height_; \
+ align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
+ align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
+ MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
+ memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
+ STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
+ STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
+ STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
+ DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
+ DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth, \
+ NEG benchmark_height_, DEPTH); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, NULL, 0, dst_pixels_opt, kWidth * 4, kWidth, \
+ NEG benchmark_height_, DEPTH); \
+ } \
+ for (int i = 0; i < kPixels * 4; ++i) { \
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(src_memory_r); \
+ free_aligned_buffer_page_end(src_memory_g); \
+ free_aligned_buffer_page_end(src_memory_b); \
+ free_aligned_buffer_page_end(dst_memory_c); \
+ free_aligned_buffer_page_end(dst_memory_opt); \
+ }
+
+#define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
+ TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
+ TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
+ 2) \
+ TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
+ TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) \
+ TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, \
+ 0) \
+ TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
+ 2) \
+ TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
+ TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
+
+TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 10)
+TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 12)
+TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 16)
+TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 10)
+TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 12)
+TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
+
+#define TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
+ const int kWidth = W1280; \
+ const int kPixels = kWidth * benchmark_height_; \
+ align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
+ align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
+ MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
+ MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
+ STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
+ STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
+ STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
+ DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
+ DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
+ memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE)); \
+ memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE)); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, dst_pixels_c, kWidth * 4, kWidth, \
+ NEG benchmark_height_, DEPTH); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
+ kWidth, dst_pixels_opt, kWidth * 4, kWidth, \
+ NEG benchmark_height_, DEPTH); \
+ } \
+ for (int i = 0; i < kPixels * 4; ++i) { \
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(src_memory_r); \
+ free_aligned_buffer_page_end(src_memory_g); \
+ free_aligned_buffer_page_end(src_memory_b); \
+ free_aligned_buffer_page_end(dst_memory_c); \
+ free_aligned_buffer_page_end(dst_memory_opt); \
+ }
+
+#define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
+ TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
+ TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
+ 2) \
+ TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
+ TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
+
+TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10)
+TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12)
+TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
+
// TODO(fbarchard): improve test for platforms and cpu detect
#ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
- const int kPixels = benchmark_width_ * benchmark_height_;
+ // Round count up to multiple of 16
+ const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+
align_buffer_page_end(src_pixels_u, kPixels * 2);
align_buffer_page_end(src_pixels_v, kPixels * 2);
align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
@@ -2679,19 +3510,19 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
reinterpret_cast<const uint16_t*>(src_pixels_v),
- reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 64, kPixels);
+ reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 16, kPixels);
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
for (int i = 0; i < benchmark_iterations_; ++i) {
if (has_avx2) {
MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
reinterpret_cast<const uint16_t*>(src_pixels_v),
- reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
+ reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
kPixels);
} else {
MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
reinterpret_cast<const uint16_t*>(src_pixels_v),
- reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
+ reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
kPixels);
}
}
@@ -2710,7 +3541,9 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
// TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
- const int kPixels = benchmark_width_ * benchmark_height_;
+ // Round count up to multiple of 32
+ const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
+
align_buffer_page_end(src_pixels_y, kPixels * 2);
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
@@ -2776,6 +3609,65 @@ TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
free_aligned_buffer_page_end(dst_pixels_y_c);
}
+TEST_F(LibYUVPlanarTest, YUY2ToY) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels_y, kPixels * 2);
+ align_buffer_page_end(dst_pixels_y_opt, kPixels);
+ align_buffer_page_end(dst_pixels_y_c, kPixels);
+
+ MemRandomize(src_pixels_y, kPixels * 2);
+ memset(dst_pixels_y_opt, 0, kPixels);
+ memset(dst_pixels_y_c, 1, kPixels);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
+ benchmark_width_, benchmark_width_, benchmark_height_);
+ }
+
+ for (int i = 0; i < kPixels; ++i) {
+ EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels_y);
+ free_aligned_buffer_page_end(dst_pixels_y_opt);
+ free_aligned_buffer_page_end(dst_pixels_y_c);
+}
+
+TEST_F(LibYUVPlanarTest, UYVYToY) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels_y, kPixels * 2);
+ align_buffer_page_end(dst_pixels_y_opt, kPixels);
+ align_buffer_page_end(dst_pixels_y_c, kPixels);
+
+ MemRandomize(src_pixels_y, kPixels * 2);
+ memset(dst_pixels_y_opt, 0, kPixels);
+ memset(dst_pixels_y_c, 1, kPixels);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
+ benchmark_width_, benchmark_width_, benchmark_height_);
+ }
+
+ for (int i = 0; i < kPixels; ++i) {
+ EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels_y);
+ free_aligned_buffer_page_end(dst_pixels_y_opt);
+ free_aligned_buffer_page_end(dst_pixels_y_c);
+}
+
+#ifdef ENABLE_ROW_TESTS
// TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
@@ -2822,6 +3714,36 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
}
#endif // HAS_CONVERT16TO8ROW_AVX2
+#ifdef HAS_UYVYTOYROW_NEON
+TEST_F(LibYUVPlanarTest, UYVYToYRow_Opt) {
+ // NEON does multiple of 16, so round count up
+ const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+ align_buffer_page_end(src_pixels_y, kPixels * 2);
+ align_buffer_page_end(dst_pixels_y_opt, kPixels);
+ align_buffer_page_end(dst_pixels_y_c, kPixels);
+
+ MemRandomize(src_pixels_y, kPixels * 2);
+ memset(dst_pixels_y_opt, 0, kPixels);
+ memset(dst_pixels_y_c, 1, kPixels);
+
+ UYVYToYRow_C(src_pixels_y, dst_pixels_y_c, kPixels);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ UYVYToYRow_NEON(src_pixels_y, dst_pixels_y_opt, kPixels);
+ }
+
+ for (int i = 0; i < kPixels; ++i) {
+ EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels_y);
+ free_aligned_buffer_page_end(dst_pixels_y_opt);
+ free_aligned_buffer_page_end(dst_pixels_y_c);
+}
+#endif // HAS_UYVYTOYROW_NEON
+
+#endif // ENABLE_ROW_TESTS
+
TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels_y, kPixels);
@@ -2855,6 +3777,7 @@ TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
free_aligned_buffer_page_end(dst_pixels_y_c);
}
+#ifdef ENABLE_ROW_TESTS
// TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_CONVERT8TO16ROW_AVX2
TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
@@ -3173,33 +4096,33 @@ extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
- SIMD_ALIGNED(uint32_t orig_pixels[640 + 4]);
- SIMD_ALIGNED(uint16_t dst_pixels_c[640]);
- SIMD_ALIGNED(uint16_t dst_pixels_opt[640]);
+ SIMD_ALIGNED(uint32_t orig_pixels[1280 + 8]);
+ SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
+ SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
memset(orig_pixels, 0, sizeof(orig_pixels));
memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
- for (int i = 0; i < 640 + 4; ++i) {
+ for (int i = 0; i < 1280 + 8; ++i) {
orig_pixels[i] = i * 256;
}
- GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640);
- for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
+ GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
- GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640);
+ GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
} else {
- GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640);
+ GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
}
#else
- GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640);
+ GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
#endif
}
- for (int i = 0; i < 640; ++i) {
+ for (int i = 0; i < 1280; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
@@ -3225,141 +4148,139 @@ extern "C" void GaussCol_C(const uint16_t* src0,
int width);
TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
- SIMD_ALIGNED(uint16_t orig_pixels[640 * 5]);
- SIMD_ALIGNED(uint32_t dst_pixels_c[640]);
- SIMD_ALIGNED(uint32_t dst_pixels_opt[640]);
+ SIMD_ALIGNED(uint16_t orig_pixels[1280 * 5]);
+ SIMD_ALIGNED(uint32_t dst_pixels_c[1280]);
+ SIMD_ALIGNED(uint32_t dst_pixels_opt[1280]);
memset(orig_pixels, 0, sizeof(orig_pixels));
memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
- for (int i = 0; i < 640 * 5; ++i) {
- orig_pixels[i] = i;
+ for (int i = 0; i < 1280 * 5; ++i) {
+ orig_pixels[i] = static_cast<float>(i);
}
- GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
- &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0],
- 640);
- for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
+ GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], &dst_pixels_c[0],
+ 1280);
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
- GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
- &orig_pixels[640 * 3], &orig_pixels[640 * 4],
- &dst_pixels_opt[0], 640);
+ GaussCol_NEON(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
+ &dst_pixels_opt[0], 1280);
} else {
- GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
- &orig_pixels[640 * 3], &orig_pixels[640 * 4],
- &dst_pixels_opt[0], 640);
+ GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
+ &dst_pixels_opt[0], 1280);
}
#else
- GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
- &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_opt[0],
- 640);
+ GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
+ &dst_pixels_opt[0], 1280);
#endif
}
- for (int i = 0; i < 640; ++i) {
+ for (int i = 0; i < 1280; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
-
- EXPECT_EQ(dst_pixels_c[0],
- static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + 640 * 3 * 4 +
- 640 * 4 * 1));
- EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
}
-float TestFloatDivToByte(int benchmark_width,
- int benchmark_height,
- int benchmark_iterations,
- float scale,
- bool opt) {
- int i, j;
- // NEON does multiple of 8, so round count up
- const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
- align_buffer_page_end(src_weights, kPixels * 4);
- align_buffer_page_end(src_values, kPixels * 4);
- align_buffer_page_end(dst_out_c, kPixels);
- align_buffer_page_end(dst_out_opt, kPixels);
- align_buffer_page_end(dst_mask_c, kPixels);
- align_buffer_page_end(dst_mask_opt, kPixels);
-
- // Randomize works but may contain some denormals affecting performance.
- // MemRandomize(orig_y, kPixels * 4);
- // large values are problematic. audio is really -1 to 1.
- for (i = 0; i < kPixels; ++i) {
- (reinterpret_cast<float*>(src_weights))[i] = scale;
- (reinterpret_cast<float*>(src_values))[i] =
- sinf(static_cast<float>(i) * 0.1f);
- }
- memset(dst_out_c, 0, kPixels);
- memset(dst_out_opt, 1, kPixels);
- memset(dst_mask_c, 2, kPixels);
- memset(dst_mask_opt, 3, kPixels);
+TEST_F(LibYUVPlanarTest, TestGaussRow_F32_Opt) {
+ SIMD_ALIGNED(float orig_pixels[1280 + 4]);
+ SIMD_ALIGNED(float dst_pixels_c[1280]);
+ SIMD_ALIGNED(float dst_pixels_opt[1280]);
- FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
- reinterpret_cast<float*>(src_values), dst_out_c,
- dst_mask_c, kPixels);
+ memset(orig_pixels, 0, sizeof(orig_pixels));
+ memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
+ memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
- for (j = 0; j < benchmark_iterations; j++) {
- if (opt) {
-#ifdef HAS_FLOATDIVTOBYTEROW_NEON
- FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights),
- reinterpret_cast<float*>(src_values), dst_out_opt,
- dst_mask_opt, kPixels);
-#else
- FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
- reinterpret_cast<float*>(src_values), dst_out_opt,
- dst_mask_opt, kPixels);
-#endif
+ for (int i = 0; i < 1280 + 4; ++i) {
+ orig_pixels[i] = static_cast<float>(i);
+ }
+ GaussRow_F32_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+ int has_neon = TestCpuFlag(kCpuHasNEON);
+ if (has_neon) {
+ GaussRow_F32_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
} else {
- FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
- reinterpret_cast<float*>(src_values), dst_out_opt,
- dst_mask_opt, kPixels);
+ GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
}
+#else
+ GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
+#endif
}
- uint8_t max_diff = 0;
- for (i = 0; i < kPixels; ++i) {
- uint8_t abs_diff = abs(dst_out_c[i] - dst_out_opt[i]) +
- abs(dst_mask_c[i] - dst_mask_opt[i]);
- if (abs_diff > max_diff) {
- max_diff = abs_diff;
- }
+ for (int i = 0; i < 1280; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
+}
- free_aligned_buffer_page_end(src_weights);
- free_aligned_buffer_page_end(src_values);
- free_aligned_buffer_page_end(dst_out_c);
- free_aligned_buffer_page_end(dst_out_opt);
- free_aligned_buffer_page_end(dst_mask_c);
- free_aligned_buffer_page_end(dst_mask_opt);
+TEST_F(LibYUVPlanarTest, TestGaussCol_F32_Opt) {
+ SIMD_ALIGNED(float dst_pixels_c[1280]);
+ SIMD_ALIGNED(float dst_pixels_opt[1280]);
+ align_buffer_page_end(orig_pixels_buf, 1280 * 5 * 4); // 5 rows
+ float* orig_pixels = reinterpret_cast<float*>(orig_pixels_buf);
- return max_diff;
-}
+ memset(orig_pixels, 0, 1280 * 5 * 4);
+ memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
+ memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
-TEST_F(LibYUVPlanarTest, TestFloatDivToByte_C) {
- float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
- benchmark_iterations_, 1.2f, false);
- EXPECT_EQ(0, diff);
-}
+ for (int i = 0; i < 1280 * 5; ++i) {
+ orig_pixels[i] = static_cast<float>(i);
+ }
+ GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
+ &dst_pixels_c[0], 1280);
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+ int has_neon = TestCpuFlag(kCpuHasNEON);
+ if (has_neon) {
+ GaussCol_F32_NEON(&orig_pixels[0], &orig_pixels[1280],
+ &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
+ &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
+ } else {
+ GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280],
+ &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
+ &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
+ }
+#else
+ GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
+ &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
+ &dst_pixels_opt[0], 1280);
+#endif
+ }
-TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) {
- float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
- benchmark_iterations_, 1.2f, true);
- EXPECT_EQ(0, diff);
+ for (int i = 0; i < 1280; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ free_aligned_buffer_page_end(orig_pixels_buf);
}
-TEST_F(LibYUVPlanarTest, UVToVURow) {
+TEST_F(LibYUVPlanarTest, SwapUVRow) {
const int kPixels = benchmark_width_ * benchmark_height_;
+ void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
+ SwapUVRow_C;
+
align_buffer_page_end(src_pixels_vu, kPixels * 2);
align_buffer_page_end(dst_pixels_uv, kPixels * 2);
-
MemRandomize(src_pixels_vu, kPixels * 2);
memset(dst_pixels_uv, 1, kPixels * 2);
- UVToVURow_C(src_pixels_vu, dst_pixels_uv, kPixels);
+#if defined(HAS_SWAPUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SwapUVRow = SwapUVRow_Any_NEON;
+ if (IS_ALIGNED(kPixels, 16)) {
+ SwapUVRow = SwapUVRow_NEON;
+ }
+ }
+#endif
+ for (int j = 0; j < benchmark_iterations_; j++) {
+ SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
+ }
for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
@@ -3368,5 +4289,144 @@ TEST_F(LibYUVPlanarTest, UVToVURow) {
free_aligned_buffer_page_end(src_pixels_vu);
free_aligned_buffer_page_end(dst_pixels_uv);
}
+#endif // ENABLE_ROW_TESTS
+
+TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) {
+ const int kSize = benchmark_width_ * benchmark_height_ * 4;
+ align_buffer_page_end(orig_pixels, kSize);
+ align_buffer_page_end(dst_pixels_opt, kSize);
+ align_buffer_page_end(dst_pixels_c, kSize);
+
+ for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
+ ((float*)(orig_pixels))[i] = (i & 1023) * 3.14f;
+ }
+ memset(dst_pixels_opt, 1, kSize);
+ memset(dst_pixels_c, 2, kSize);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
+ (float*)(dst_pixels_c), benchmark_width_, benchmark_width_,
+ benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
+ (float*)(dst_pixels_opt), benchmark_width_, benchmark_width_,
+ benchmark_height_);
+ }
+ for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
+ EXPECT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f)
+ << i;
+ }
+
+ free_aligned_buffer_page_end(dst_pixels_c);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) {
+ int dst_width = (benchmark_width_ + 1) / 2;
+ int dst_height = (benchmark_height_ + 1) / 2;
+ align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(src_pixels_v, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(tmp_pixels_u, dst_width * dst_height);
+ align_buffer_page_end(tmp_pixels_v, dst_width * dst_height);
+ align_buffer_page_end(dst_pixels_uv_opt, dst_width * 2 * dst_height);
+ align_buffer_page_end(dst_pixels_uv_c, dst_width * 2 * dst_height);
+
+ MemRandomize(src_pixels_u, benchmark_width_ * benchmark_height_);
+ MemRandomize(src_pixels_v, benchmark_width_ * benchmark_height_);
+ MemRandomize(tmp_pixels_u, dst_width * dst_height);
+ MemRandomize(tmp_pixels_v, dst_width * dst_height);
+ MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height);
+ MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
+ benchmark_width_, dst_pixels_uv_c, dst_width * 2,
+ benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
+ benchmark_width_, dst_pixels_uv_opt, dst_width * 2,
+ benchmark_width_, benchmark_height_);
+ }
+
+ for (int i = 0; i < dst_width * 2 * dst_height; ++i) {
+ EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels_u);
+ free_aligned_buffer_page_end(src_pixels_v);
+ free_aligned_buffer_page_end(tmp_pixels_u);
+ free_aligned_buffer_page_end(tmp_pixels_v);
+ free_aligned_buffer_page_end(dst_pixels_uv_opt);
+ free_aligned_buffer_page_end(dst_pixels_uv_c);
+}
+
+TEST_F(LibYUVPlanarTest, NV12Copy) {
+ const int halfwidth = (benchmark_width_ + 1) >> 1;
+ const int halfheight = (benchmark_height_ + 1) >> 1;
+ align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(src_uv, halfwidth * 2 * halfheight);
+ align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(dst_uv, halfwidth * 2 * halfheight);
+
+ MemRandomize(src_y, benchmark_width_ * benchmark_height_);
+ MemRandomize(src_uv, halfwidth * 2 * halfheight);
+ MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
+ MemRandomize(dst_uv, halfwidth * 2 * halfheight);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ NV12Copy(src_y, benchmark_width_, src_uv, halfwidth * 2, dst_y,
+ benchmark_width_, dst_uv, halfwidth * 2, benchmark_width_,
+ benchmark_height_);
+ }
+
+ for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
+ EXPECT_EQ(src_y[i], dst_y[i]);
+ }
+ for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
+ EXPECT_EQ(src_uv[i], dst_uv[i]);
+ }
+
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_uv);
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVPlanarTest, NV21Copy) {
+ const int halfwidth = (benchmark_width_ + 1) >> 1;
+ const int halfheight = (benchmark_height_ + 1) >> 1;
+ align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(src_vu, halfwidth * 2 * halfheight);
+ align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
+ align_buffer_page_end(dst_vu, halfwidth * 2 * halfheight);
+
+ MemRandomize(src_y, benchmark_width_ * benchmark_height_);
+ MemRandomize(src_vu, halfwidth * 2 * halfheight);
+ MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
+ MemRandomize(dst_vu, halfwidth * 2 * halfheight);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ NV21Copy(src_y, benchmark_width_, src_vu, halfwidth * 2, dst_y,
+ benchmark_width_, dst_vu, halfwidth * 2, benchmark_width_,
+ benchmark_height_);
+ }
+
+ for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
+ EXPECT_EQ(src_y[i], dst_y[i]);
+ }
+ for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
+ EXPECT_EQ(src_vu[i], dst_vu[i]);
+ }
+
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_vu);
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_vu);
+}
} // namespace libyuv
diff --git a/files/unit_test/rotate_argb_test.cc b/files/unit_test/rotate_argb_test.cc
index d2003895..01ed69ca 100644
--- a/files/unit_test/rotate_argb_test.cc
+++ b/files/unit_test/rotate_argb_test.cc
@@ -156,31 +156,73 @@ TEST_F(LibYUVRotateTest, RotatePlane270_Opt) {
}
TEST_F(LibYUVRotateTest, DISABLED_RotatePlane0_Odd) {
- TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
+ TestRotatePlane(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_RotatePlane90_Odd) {
- TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
+ TestRotatePlane(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_RotatePlane180_Odd) {
- TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
+ TestRotatePlane(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_RotatePlane270_Odd) {
- TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
+ TestRotatePlane(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
+TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) {
+ int argb_plane_size = benchmark_width_ * 4 * abs(benchmark_height_);
+
+ align_buffer_page_end(src_argb, argb_plane_size);
+ align_buffer_page_end(dst_argb, argb_plane_size);
+
+ EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
+ benchmark_width_ * 4, benchmark_width_,
+ benchmark_height_, kRotate0));
+
+ EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
+ benchmark_width_ * 4 - 1, benchmark_width_ - 1,
+ benchmark_height_, kRotate0));
+
+ EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
+ benchmark_width_ * 4, benchmark_width_,
+ benchmark_height_, kRotate180));
+
+ EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
+ benchmark_width_ * 4 - 1, benchmark_width_ - 1,
+ benchmark_height_, kRotate180));
+
+ EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
+ abs(benchmark_height_) * 4, benchmark_width_,
+ benchmark_height_, kRotate90));
+
+ EXPECT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
+ abs(benchmark_height_) * 4, benchmark_width_ - 1,
+ benchmark_height_, kRotate90));
+
+ EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
+ abs(benchmark_height_) * 4, benchmark_width_,
+ benchmark_height_, kRotate270));
+
+ EXPECT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
+ abs(benchmark_height_) * 4, benchmark_width_ - 1,
+ benchmark_height_, kRotate270));
+
+ free_aligned_buffer_page_end(dst_argb);
+ free_aligned_buffer_page_end(src_argb);
+}
+
} // namespace libyuv
diff --git a/files/unit_test/rotate_test.cc b/files/unit_test/rotate_test.cc
index 61941e63..d3887414 100644
--- a/files/unit_test/rotate_test.cc
+++ b/files/unit_test/rotate_test.cc
@@ -16,6 +16,8 @@
namespace libyuv {
+#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
+
static void I420TestRotate(int src_width,
int src_height,
int dst_width,
@@ -108,33 +110,121 @@ TEST_F(LibYUVRotateTest, I420Rotate270_Opt) {
// Odd width tests work but disabled because they use C code and can be
// tested by passing an odd width command line or environment variable.
TEST_F(LibYUVRotateTest, DISABLED_I420Rotate0_Odd) {
- I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
+ I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I420Rotate90_Odd) {
- I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
+ I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I420Rotate180_Odd) {
- I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
+ I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) {
- I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
+ I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
+static void I422TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i422_y_size = src_width * Abs(src_height);
+ int src_i422_uv_size = ((src_width + 1) / 2) * Abs(src_height);
+ int src_i422_size = src_i422_y_size + src_i422_uv_size * 2;
+ align_buffer_page_end(src_i422, src_i422_size);
+ for (int i = 0; i < src_i422_size; ++i) {
+ src_i422[i] = fastrand() & 0xff;
+ }
+
+ int dst_i422_y_size = dst_width * dst_height;
+ int dst_i422_uv_size = ((dst_width + 1) / 2) * dst_height;
+ int dst_i422_size = dst_i422_y_size + dst_i422_uv_size * 2;
+ align_buffer_page_end(dst_i422_c, dst_i422_size);
+ align_buffer_page_end(dst_i422_opt, dst_i422_size);
+ memset(dst_i422_c, 2, dst_i422_size);
+ memset(dst_i422_opt, 3, dst_i422_size);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I422Rotate(src_i422, src_width, src_i422 + src_i422_y_size,
+ (src_width + 1) / 2, src_i422 + src_i422_y_size + src_i422_uv_size,
+ (src_width + 1) / 2, dst_i422_c, dst_width,
+ dst_i422_c + dst_i422_y_size, (dst_width + 1) / 2,
+ dst_i422_c + dst_i422_y_size + dst_i422_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I422Rotate(
+ src_i422, src_width, src_i422 + src_i422_y_size, (src_width + 1) / 2,
+ src_i422 + src_i422_y_size + src_i422_uv_size, (src_width + 1) / 2,
+ dst_i422_opt, dst_width, dst_i422_opt + dst_i422_y_size,
+ (dst_width + 1) / 2, dst_i422_opt + dst_i422_y_size + dst_i422_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i422_size; ++i) {
+ EXPECT_EQ(dst_i422_c[i], dst_i422_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(dst_i422_c);
+ free_aligned_buffer_page_end(dst_i422_opt);
+ free_aligned_buffer_page_end(src_i422);
+}
+
+TEST_F(LibYUVRotateTest, I422Rotate0_Opt) {
+ I422TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I422Rotate90_Opt) {
+ I422TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I422Rotate180_Opt) {
+ I422TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I422Rotate270_Opt) {
+ I422TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
static void I444TestRotate(int src_width,
int src_height,
int dst_width,
@@ -225,29 +315,29 @@ TEST_F(LibYUVRotateTest, I444Rotate270_Opt) {
// Odd width tests work but disabled because they use C code and can be
// tested by passing an odd width command line or environment variable.
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) {
- I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
+ I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) {
- I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
+ I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) {
- I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
+ I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) {
- I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
+ I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
@@ -340,29 +430,29 @@ TEST_F(LibYUVRotateTest, NV12Rotate270_Opt) {
}
TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate0_Odd) {
- NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
+ NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate90_Odd) {
- NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
+ NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate180_Odd) {
- NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
+ NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate270_Odd) {
- NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
- benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
+ NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
@@ -391,4 +481,119 @@ TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) {
disable_cpu_flags_, benchmark_cpu_info_);
}
+// Test Android 420 to I420 Rotate
+#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ W1280, N, NEG, OFF, PN, OFF_U, OFF_V, ROT) \
+ TEST_F(LibYUVRotateTest, \
+ SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate##ROT##To##PN##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kSizeUV = \
+ SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
+ align_buffer_page_end(src_uv, \
+ kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
+ align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
+ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
+ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
+ align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
+ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
+ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ uint8_t* src_u = src_uv + OFF_U; \
+ uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
+ int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \
+ for (int i = 0; i < kHeight; ++i) \
+ for (int j = 0; j < kWidth; ++j) \
+ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
+ for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
+ src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \
+ (fastrand() & 0xff); \
+ src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \
+ (fastrand() & 0xff); \
+ } \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight); \
+ memset(dst_u_c, 2, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_v_c, 3, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_y_opt, 101, kWidth* kHeight); \
+ memset(dst_u_opt, 102, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_v_opt, 103, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate( \
+ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
+ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \
+ kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \
+ SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight, \
+ (libyuv::RotationMode)ROT); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate( \
+ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
+ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, \
+ dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \
+ dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight, \
+ (libyuv::RotationMode)ROT); \
+ } \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
+ } \
+ } \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
+ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
+ EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
+ dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
+ } \
+ } \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
+ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
+ EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
+ dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
+ } \
+ } \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_u_c); \
+ free_aligned_buffer_page_end(dst_v_c); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_u_opt); \
+ free_aligned_buffer_page_end(dst_v_opt); \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_uv); \
+ }
+
+#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \
+ SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \
+ SUBSAMP_Y) \
+ TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \
+ _Any, +, 0, PN, OFF_U, OFF_V, 0) \
+ TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \
+ _Unaligned, +, 2, PN, OFF_U, OFF_V, 0) \
+ TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \
+ -, 0, PN, OFF_U, OFF_V, 0) \
+ TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
+ 0, PN, OFF_U, OFF_V, 0) \
+ TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
+ 0, PN, OFF_U, OFF_V, 180)
+
+TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2)
+TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2)
+TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
+#undef TESTAPLANARTOP
+#undef TESTAPLANARTOPI
+
} // namespace libyuv
diff --git a/files/unit_test/scale_argb_test.cc b/files/unit_test/scale_argb_test.cc
index 94aef60e..f54a68f1 100644
--- a/files/unit_test/scale_argb_test.cc
+++ b/files/unit_test/scale_argb_test.cc
@@ -22,6 +22,12 @@ namespace libyuv {
#define STRINGIZE(line) #line
#define FILELINESTR(file, line) file ":" STRINGIZE(line)
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+// SLOW TESTS are those that are unoptimized C code.
+// FULL TESTS are optimized but test many variations of the same code.
+#define ENABLE_FULL_TESTS
+#endif
+
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
static int ARGBTestFilter(int src_width,
int src_height,
@@ -114,8 +120,8 @@ static int ARGBTestFilter(int src_width,
return max_diff;
}
-static const int kTileX = 8;
-static const int kTileY = 8;
+static const int kTileX = 64;
+static const int kTileY = 64;
static int TileARGBScale(const uint8_t* src_argb,
int src_stride_argb,
@@ -232,7 +238,7 @@ static int ARGBClipTestFilter(int src_width,
#define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom)
#define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
-#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
+#define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff) \
TEST_F(LibYUVScaleTest, ARGBScaleDownBy##name##_##filter) { \
int diff = ARGBTestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
@@ -241,7 +247,7 @@ static int ARGBClipTestFilter(int src_width,
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, ARGBScaleDownClipBy##name##_##filter) { \
+ TEST_F(LibYUVScaleTest, DISABLED_##ARGBScaleDownClipBy##name##_##filter) { \
int diff = ARGBClipTestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
@@ -251,15 +257,30 @@ static int ARGBClipTestFilter(int src_width,
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
// filtering is different fixed point implementations for SSSE3, Neon and C.
-#define TEST_FACTOR(name, nom, denom) \
- TEST_FACTOR1(name, None, nom, denom, 0) \
- TEST_FACTOR1(name, Linear, nom, denom, 3) \
- TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
- TEST_FACTOR1(name, Box, nom, denom, 3)
+#ifndef DISABLE_SLOW_TESTS
+#define TEST_FACTOR(name, nom, denom) \
+ TEST_FACTOR1(, name, None, nom, denom, 0) \
+ TEST_FACTOR1(, name, Linear, nom, denom, 3) \
+ TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
+ TEST_FACTOR1(, name, Box, nom, denom, 3)
+#else
+#if defined(ENABLE_FULL_TESTS)
+#define TEST_FACTOR(name, nom, denom) \
+ TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
+ TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
+ TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
+ TEST_FACTOR1(DISABLED_, name, Box, nom, denom, 3)
+#else
+#define TEST_FACTOR(name, nom, denom) \
+ TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3)
+#endif
+#endif
TEST_FACTOR(2, 1, 2)
TEST_FACTOR(4, 1, 4)
+#ifndef DISABLE_SLOW_TESTS
TEST_FACTOR(8, 1, 8)
+#endif
TEST_FACTOR(3by4, 3, 4)
TEST_FACTOR(3by8, 3, 8)
TEST_FACTOR(3, 1, 3)
@@ -268,7 +289,7 @@ TEST_FACTOR(3, 1, 3)
#undef SX
#undef DX
-#define TEST_SCALETO1(name, width, height, filter, max_diff) \
+#define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff) \
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
@@ -282,34 +303,70 @@ TEST_FACTOR(3, 1, 3)
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, name##ClipTo##width##x##height##_##filter) { \
+ TEST_F(LibYUVScaleTest, \
+ DISABLED_##name##ClipTo##width##x##height##_##filter) { \
int diff = \
ARGBClipTestFilter(benchmark_width_, benchmark_height_, width, height, \
kFilter##filter, benchmark_iterations_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, name##ClipFrom##width##x##height##_##filter) { \
+ TEST_F(LibYUVScaleTest, \
+ DISABLED_##name##ClipFrom##width##x##height##_##filter) { \
int diff = ARGBClipTestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_); \
EXPECT_LE(diff, max_diff); \
}
-/// Test scale to a specified size with all 4 filters.
-#define TEST_SCALETO(name, width, height) \
- TEST_SCALETO1(name, width, height, None, 0) \
- TEST_SCALETO1(name, width, height, Linear, 3) \
- TEST_SCALETO1(name, width, height, Bilinear, 3)
+#ifndef DISABLE_SLOW_TESTS
+// Test scale to a specified size with all 4 filters.
+#define TEST_SCALETO(name, width, height) \
+ TEST_SCALETO1(, name, width, height, None, 0) \
+ TEST_SCALETO1(, name, width, height, Linear, 3) \
+ TEST_SCALETO1(, name, width, height, Bilinear, 3)
+#else
+#if defined(ENABLE_FULL_TESTS)
+#define TEST_SCALETO(name, width, height) \
+ TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
+ TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
+ TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3)
+#else
+#define TEST_SCALETO(name, width, height) \
+ TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3)
+#endif
+#endif
TEST_SCALETO(ARGBScale, 1, 1)
-TEST_SCALETO(ARGBScale, 320, 240)
TEST_SCALETO(ARGBScale, 569, 480)
TEST_SCALETO(ARGBScale, 640, 360)
+#ifndef DISABLE_SLOW_TESTS
+TEST_SCALETO(ARGBScale, 256, 144) /* 128x72 * 2 */
+TEST_SCALETO(ARGBScale, 320, 240)
TEST_SCALETO(ARGBScale, 1280, 720)
TEST_SCALETO(ARGBScale, 1920, 1080)
+#endif // DISABLE_SLOW_TESTS
#undef TEST_SCALETO1
#undef TEST_SCALETO
+#define TEST_SCALESWAPXY1(name, filter, max_diff) \
+ TEST_F(LibYUVScaleTest, name##SwapXY_##filter) { \
+ int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \
+ benchmark_height_, benchmark_width_, \
+ kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+// Test scale with swapped width and height with all 3 filters.
+TEST_SCALESWAPXY1(ARGBScale, None, 0)
+TEST_SCALESWAPXY1(ARGBScale, Linear, 0)
+TEST_SCALESWAPXY1(ARGBScale, Bilinear, 0)
+#else
+TEST_SCALESWAPXY1(ARGBScale, Bilinear, 0)
+#endif
+#undef TEST_SCALESWAPXY1
+
// Scale with YUV conversion to ARGB and clipping.
// TODO(fbarchard): Add fourcc support. All 4 ARGB formats is easy to support.
LIBYUV_API
@@ -454,4 +511,78 @@ TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) {
EXPECT_LE(diff, 10);
}
+TEST_F(LibYUVScaleTest, ARGBTest3x) {
+ const int kSrcStride = 480 * 4;
+ const int kDstStride = 160 * 4;
+ const int kSize = kSrcStride * 3;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < 480 * 3; ++i) {
+ orig_pixels[i * 4 + 0] = i;
+ orig_pixels[i * 4 + 1] = 255 - i;
+ orig_pixels[i * 4 + 2] = i + 1;
+ orig_pixels[i * 4 + 3] = i + 10;
+ }
+ align_buffer_page_end(dest_pixels, kDstStride);
+
+ int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+ benchmark_iterations_;
+ for (int i = 0; i < iterations160; ++i) {
+ ARGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+ kFilterBilinear);
+ }
+
+ EXPECT_EQ(225, dest_pixels[0]);
+ EXPECT_EQ(255 - 225, dest_pixels[1]);
+ EXPECT_EQ(226, dest_pixels[2]);
+ EXPECT_EQ(235, dest_pixels[3]);
+
+ ARGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+ kFilterNone);
+
+ EXPECT_EQ(225, dest_pixels[0]);
+ EXPECT_EQ(255 - 225, dest_pixels[1]);
+ EXPECT_EQ(226, dest_pixels[2]);
+ EXPECT_EQ(235, dest_pixels[3]);
+
+ free_aligned_buffer_page_end(dest_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, ARGBTest4x) {
+ const int kSrcStride = 640 * 4;
+ const int kDstStride = 160 * 4;
+ const int kSize = kSrcStride * 4;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < 640 * 4; ++i) {
+ orig_pixels[i * 4 + 0] = i;
+ orig_pixels[i * 4 + 1] = 255 - i;
+ orig_pixels[i * 4 + 2] = i + 1;
+ orig_pixels[i * 4 + 3] = i + 10;
+ }
+ align_buffer_page_end(dest_pixels, kDstStride);
+
+ int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+ benchmark_iterations_;
+ for (int i = 0; i < iterations160; ++i) {
+ ARGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+ kFilterBilinear);
+ }
+
+ EXPECT_NEAR(66, dest_pixels[0], 4);
+ EXPECT_NEAR(255 - 66, dest_pixels[1], 4);
+ EXPECT_NEAR(67, dest_pixels[2], 4);
+ EXPECT_NEAR(76, dest_pixels[3], 4);
+
+ ARGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+ kFilterNone);
+
+ EXPECT_EQ(2, dest_pixels[0]);
+ EXPECT_EQ(255 - 2, dest_pixels[1]);
+ EXPECT_EQ(3, dest_pixels[2]);
+ EXPECT_EQ(12, dest_pixels[3]);
+
+ free_aligned_buffer_page_end(dest_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
} // namespace libyuv
diff --git a/files/unit_test/scale_rgb_test.cc b/files/unit_test/scale_rgb_test.cc
new file mode 100644
index 00000000..8296abe3
--- /dev/null
+++ b/files/unit_test/scale_rgb_test.cc
@@ -0,0 +1,280 @@
+/*
+ * Copyright 2022 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <time.h>
+
+#include "../unit_test/unit_test.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/scale_rgb.h"
+
+namespace libyuv {
+
+#define STRINGIZE(line) #line
+#define FILELINESTR(file, line) file ":" STRINGIZE(line)
+
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+// SLOW TESTS are those that are unoptimized C code.
+// FULL TESTS are optimized but test many variations of the same code.
+#define ENABLE_FULL_TESTS
+#endif
+
+// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
+static int RGBTestFilter(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i, j;
+ const int b = 0; // 128 to test for padding/stride.
+ int64_t src_rgb_plane_size =
+ (Abs(src_width) + b * 3) * (Abs(src_height) + b * 3) * 3LL;
+ int src_stride_rgb = (b * 3 + Abs(src_width)) * 3;
+
+ align_buffer_page_end(src_rgb, src_rgb_plane_size);
+ if (!src_rgb) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ MemRandomize(src_rgb, src_rgb_plane_size);
+
+ int64_t dst_rgb_plane_size = (dst_width + b * 3) * (dst_height + b * 3) * 3LL;
+ int dst_stride_rgb = (b * 3 + dst_width) * 3;
+
+ align_buffer_page_end(dst_rgb_c, dst_rgb_plane_size);
+ align_buffer_page_end(dst_rgb_opt, dst_rgb_plane_size);
+ if (!dst_rgb_c || !dst_rgb_opt) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ memset(dst_rgb_c, 2, dst_rgb_plane_size);
+ memset(dst_rgb_opt, 3, dst_rgb_plane_size);
+
+ // Warm up both versions for consistent benchmarks.
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width,
+ src_height, dst_rgb_c + (dst_stride_rgb * b) + b * 3, dst_stride_rgb,
+ dst_width, dst_height, f);
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width,
+ src_height, dst_rgb_opt + (dst_stride_rgb * b) + b * 3,
+ dst_stride_rgb, dst_width, dst_height, f);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ double c_time = get_time();
+ RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width,
+ src_height, dst_rgb_c + (dst_stride_rgb * b) + b * 3, dst_stride_rgb,
+ dst_width, dst_height, f);
+
+ c_time = (get_time() - c_time);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ double opt_time = get_time();
+ for (i = 0; i < benchmark_iterations; ++i) {
+ RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width,
+ src_height, dst_rgb_opt + (dst_stride_rgb * b) + b * 3,
+ dst_stride_rgb, dst_width, dst_height, f);
+ }
+ opt_time = (get_time() - opt_time) / benchmark_iterations;
+
+ // Report performance of C vs OPT
+ printf("filter %d - %8d us C - %8d us OPT\n", f,
+ static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
+
+ // C version may be a little off from the optimized. Order of
+ // operations may introduce rounding somewhere. So do a difference
+ // of the buffers and look to see that the max difference isn't
+ // over 2.
+ int max_diff = 0;
+ for (i = b; i < (dst_height + b); ++i) {
+ for (j = b * 3; j < (dst_width + b) * 3; ++j) {
+ int abs_diff = Abs(dst_rgb_c[(i * dst_stride_rgb) + j] -
+ dst_rgb_opt[(i * dst_stride_rgb) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_rgb_c);
+ free_aligned_buffer_page_end(dst_rgb_opt);
+ free_aligned_buffer_page_end(src_rgb);
+ return max_diff;
+}
+
+// The following adjustments in dimensions ensure the scale factor will be
+// exactly achieved.
+#define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom)
+#define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
+
+#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
+ TEST_F(LibYUVScaleTest, RGBScaleDownBy##name##_##filter) { \
+ int diff = RGBTestFilter( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
+// filtering is different fixed point implementations for SSSE3, Neon and C.
+#define TEST_FACTOR(name, nom, denom) \
+ TEST_FACTOR1(name, None, nom, denom, 0) \
+ TEST_FACTOR1(name, Linear, nom, denom, 3) \
+ TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
+ TEST_FACTOR1(name, Box, nom, denom, 3)
+#else
+// Test a scale factor with Bilinear.
+#define TEST_FACTOR(name, nom, denom) \
+ TEST_FACTOR1(name, Bilinear, nom, denom, 3)
+#endif
+
+TEST_FACTOR(2, 1, 2)
+#ifndef DISABLE_SLOW_TESTS
+TEST_FACTOR(4, 1, 4)
+// TEST_FACTOR(8, 1, 8) Disable for benchmark performance.
+TEST_FACTOR(3by4, 3, 4)
+TEST_FACTOR(3by8, 3, 8)
+TEST_FACTOR(3, 1, 3)
+#endif
+#undef TEST_FACTOR1
+#undef TEST_FACTOR
+#undef SX
+#undef DX
+
+#define TEST_SCALETO1(name, width, height, filter, max_diff) \
+ TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
+ int diff = RGBTestFilter(benchmark_width_, benchmark_height_, width, \
+ height, kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
+ int diff = RGBTestFilter(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+/// Test scale to a specified size with all 4 filters.
+#define TEST_SCALETO(name, width, height) \
+ TEST_SCALETO1(name, width, height, None, 0) \
+ TEST_SCALETO1(name, width, height, Linear, 3) \
+ TEST_SCALETO1(name, width, height, Bilinear, 3)
+#else
+#define TEST_SCALETO(name, width, height) \
+ TEST_SCALETO1(name, width, height, Bilinear, 3)
+#endif
+
+TEST_SCALETO(RGBScale, 640, 360)
+#ifndef DISABLE_SLOW_TESTS
+TEST_SCALETO(RGBScale, 1, 1)
+TEST_SCALETO(RGBScale, 256, 144) /* 128x72 * 3 */
+TEST_SCALETO(RGBScale, 320, 240)
+TEST_SCALETO(RGBScale, 569, 480)
+TEST_SCALETO(RGBScale, 1280, 720)
+TEST_SCALETO(RGBScale, 1920, 1080)
+#endif // DISABLE_SLOW_TESTS
+#undef TEST_SCALETO1
+#undef TEST_SCALETO
+
+#define TEST_SCALESWAPXY1(name, filter, max_diff) \
+ TEST_F(LibYUVScaleTest, name##SwapXY_##filter) { \
+ int diff = RGBTestFilter(benchmark_width_, benchmark_height_, \
+ benchmark_height_, benchmark_width_, \
+ kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+// Test scale with swapped width and height with all 3 filters.
+TEST_SCALESWAPXY1(RGBScale, None, 0)
+TEST_SCALESWAPXY1(RGBScale, Linear, 0)
+TEST_SCALESWAPXY1(RGBScale, Bilinear, 0)
+#else
+TEST_SCALESWAPXY1(RGBScale, Bilinear, 0)
+#endif
+#undef TEST_SCALESWAPXY1
+
+TEST_F(LibYUVScaleTest, RGBTest3x) {
+ const int kSrcStride = 480 * 3;
+ const int kDstStride = 160 * 3;
+ const int kSize = kSrcStride * 3;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < 480 * 3; ++i) {
+ orig_pixels[i * 3 + 0] = i;
+ orig_pixels[i * 3 + 1] = 255 - i;
+ }
+ align_buffer_page_end(dest_pixels, kDstStride);
+
+ int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+ benchmark_iterations_;
+ for (int i = 0; i < iterations160; ++i) {
+ RGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+ kFilterBilinear);
+ }
+
+ EXPECT_EQ(225, dest_pixels[0]);
+ EXPECT_EQ(255 - 225, dest_pixels[1]);
+
+ RGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+ kFilterNone);
+
+ EXPECT_EQ(225, dest_pixels[0]);
+ EXPECT_EQ(255 - 225, dest_pixels[1]);
+
+ free_aligned_buffer_page_end(dest_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, RGBTest4x) {
+ const int kSrcStride = 640 * 3;
+ const int kDstStride = 160 * 3;
+ const int kSize = kSrcStride * 4;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < 640 * 4; ++i) {
+ orig_pixels[i * 3 + 0] = i;
+ orig_pixels[i * 3 + 1] = 255 - i;
+ }
+ align_buffer_page_end(dest_pixels, kDstStride);
+
+ int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+ benchmark_iterations_;
+ for (int i = 0; i < iterations160; ++i) {
+ RGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+ kFilterBilinear);
+ }
+
+ EXPECT_EQ(66, dest_pixels[0]);
+ EXPECT_EQ(190, dest_pixels[1]);
+
+ RGBScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
+ kFilterNone);
+
+ EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
+ EXPECT_EQ(255 - 2, dest_pixels[1]);
+
+ free_aligned_buffer_page_end(dest_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+} // namespace libyuv
diff --git a/files/unit_test/scale_test.cc b/files/unit_test/scale_test.cc
index 811b2d04..a8c95268 100644
--- a/files/unit_test/scale_test.cc
+++ b/files/unit_test/scale_test.cc
@@ -14,11 +14,20 @@
#include "../unit_test/unit_test.h"
#include "libyuv/cpu_id.h"
#include "libyuv/scale.h"
+
+#ifdef ENABLE_ROW_TESTS
#include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C
+#endif
#define STRINGIZE(line) #line
#define FILELINESTR(file, line) file ":" STRINGIZE(line)
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+// SLOW TESTS are those that are unoptimized C code.
+// FULL TESTS are optimized but test many variations of the same code.
+#define ENABLE_FULL_TESTS
+#endif
+
namespace libyuv {
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
@@ -139,6 +148,123 @@ static int I420TestFilter(int src_width,
return max_diff;
}
+// Test scaling with 8 bit C vs 12 bit C and return maximum pixel difference.
+// 0 = exact.
+static int I420TestFilter_12(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i;
+ int src_width_uv = (Abs(src_width) + 1) >> 1;
+ int src_height_uv = (Abs(src_height) + 1) >> 1;
+
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
+
+ int src_stride_y = Abs(src_width);
+ int src_stride_uv = src_width_uv;
+
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_u, src_uv_plane_size);
+ align_buffer_page_end(src_v, src_uv_plane_size);
+ align_buffer_page_end(src_y_12, src_y_plane_size * 2);
+ align_buffer_page_end(src_u_12, src_uv_plane_size * 2);
+ align_buffer_page_end(src_v_12, src_uv_plane_size * 2);
+ if (!src_y || !src_u || !src_v || !src_y_12 || !src_u_12 || !src_v_12) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ uint16_t* p_src_y_12 = reinterpret_cast<uint16_t*>(src_y_12);
+ uint16_t* p_src_u_12 = reinterpret_cast<uint16_t*>(src_u_12);
+ uint16_t* p_src_v_12 = reinterpret_cast<uint16_t*>(src_v_12);
+
+ MemRandomize(src_y, src_y_plane_size);
+ MemRandomize(src_u, src_uv_plane_size);
+ MemRandomize(src_v, src_uv_plane_size);
+
+ for (i = 0; i < src_y_plane_size; ++i) {
+ p_src_y_12[i] = src_y[i];
+ }
+ for (i = 0; i < src_uv_plane_size; ++i) {
+ p_src_u_12[i] = src_u[i];
+ p_src_v_12[i] = src_v[i];
+ }
+
+ int dst_width_uv = (dst_width + 1) >> 1;
+ int dst_height_uv = (dst_height + 1) >> 1;
+
+ int dst_y_plane_size = (dst_width) * (dst_height);
+ int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
+
+ int dst_stride_y = dst_width;
+ int dst_stride_uv = dst_width_uv;
+
+ align_buffer_page_end(dst_y_8, dst_y_plane_size);
+ align_buffer_page_end(dst_u_8, dst_uv_plane_size);
+ align_buffer_page_end(dst_v_8, dst_uv_plane_size);
+ align_buffer_page_end(dst_y_12, dst_y_plane_size * 2);
+ align_buffer_page_end(dst_u_12, dst_uv_plane_size * 2);
+ align_buffer_page_end(dst_v_12, dst_uv_plane_size * 2);
+
+ uint16_t* p_dst_y_12 = reinterpret_cast<uint16_t*>(dst_y_12);
+ uint16_t* p_dst_u_12 = reinterpret_cast<uint16_t*>(dst_u_12);
+ uint16_t* p_dst_v_12 = reinterpret_cast<uint16_t*>(dst_v_12);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
+ src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
+ dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (i = 0; i < benchmark_iterations; ++i) {
+ I420Scale_12(p_src_y_12, src_stride_y, p_src_u_12, src_stride_uv,
+ p_src_v_12, src_stride_uv, src_width, src_height, p_dst_y_12,
+ dst_stride_y, p_dst_u_12, dst_stride_uv, p_dst_v_12,
+ dst_stride_uv, dst_width, dst_height, f);
+ }
+
+ // Expect an exact match.
+ int max_diff = 0;
+ for (i = 0; i < dst_y_plane_size; ++i) {
+ int abs_diff = Abs(dst_y_8[i] - p_dst_y_12[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ for (i = 0; i < dst_uv_plane_size; ++i) {
+ int abs_diff = Abs(dst_u_8[i] - p_dst_u_12[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ abs_diff = Abs(dst_v_8[i] - p_dst_v_12[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_y_8);
+ free_aligned_buffer_page_end(dst_u_8);
+ free_aligned_buffer_page_end(dst_v_8);
+ free_aligned_buffer_page_end(dst_y_12);
+ free_aligned_buffer_page_end(dst_u_12);
+ free_aligned_buffer_page_end(dst_v_12);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_u);
+ free_aligned_buffer_page_end(src_v);
+ free_aligned_buffer_page_end(src_y_12);
+ free_aligned_buffer_page_end(src_u_12);
+ free_aligned_buffer_page_end(src_v_12);
+
+ return max_diff;
+}
+
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
// 0 = exact.
static int I420TestFilter_16(int src_width,
@@ -374,6 +500,123 @@ static int I444TestFilter(int src_width,
return max_diff;
}
+// Test scaling with 8 bit C vs 12 bit C and return maximum pixel difference.
+// 0 = exact.
+static int I444TestFilter_12(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i;
+ int src_width_uv = Abs(src_width);
+ int src_height_uv = Abs(src_height);
+
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
+
+ int src_stride_y = Abs(src_width);
+ int src_stride_uv = src_width_uv;
+
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_u, src_uv_plane_size);
+ align_buffer_page_end(src_v, src_uv_plane_size);
+ align_buffer_page_end(src_y_12, src_y_plane_size * 2);
+ align_buffer_page_end(src_u_12, src_uv_plane_size * 2);
+ align_buffer_page_end(src_v_12, src_uv_plane_size * 2);
+ if (!src_y || !src_u || !src_v || !src_y_12 || !src_u_12 || !src_v_12) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ uint16_t* p_src_y_12 = reinterpret_cast<uint16_t*>(src_y_12);
+ uint16_t* p_src_u_12 = reinterpret_cast<uint16_t*>(src_u_12);
+ uint16_t* p_src_v_12 = reinterpret_cast<uint16_t*>(src_v_12);
+
+ MemRandomize(src_y, src_y_plane_size);
+ MemRandomize(src_u, src_uv_plane_size);
+ MemRandomize(src_v, src_uv_plane_size);
+
+ for (i = 0; i < src_y_plane_size; ++i) {
+ p_src_y_12[i] = src_y[i];
+ }
+ for (i = 0; i < src_uv_plane_size; ++i) {
+ p_src_u_12[i] = src_u[i];
+ p_src_v_12[i] = src_v[i];
+ }
+
+ int dst_width_uv = dst_width;
+ int dst_height_uv = dst_height;
+
+ int dst_y_plane_size = (dst_width) * (dst_height);
+ int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
+
+ int dst_stride_y = dst_width;
+ int dst_stride_uv = dst_width_uv;
+
+ align_buffer_page_end(dst_y_8, dst_y_plane_size);
+ align_buffer_page_end(dst_u_8, dst_uv_plane_size);
+ align_buffer_page_end(dst_v_8, dst_uv_plane_size);
+ align_buffer_page_end(dst_y_12, dst_y_plane_size * 2);
+ align_buffer_page_end(dst_u_12, dst_uv_plane_size * 2);
+ align_buffer_page_end(dst_v_12, dst_uv_plane_size * 2);
+
+ uint16_t* p_dst_y_12 = reinterpret_cast<uint16_t*>(dst_y_12);
+ uint16_t* p_dst_u_12 = reinterpret_cast<uint16_t*>(dst_u_12);
+ uint16_t* p_dst_v_12 = reinterpret_cast<uint16_t*>(dst_v_12);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
+ src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
+ dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (i = 0; i < benchmark_iterations; ++i) {
+ I444Scale_12(p_src_y_12, src_stride_y, p_src_u_12, src_stride_uv,
+ p_src_v_12, src_stride_uv, src_width, src_height, p_dst_y_12,
+ dst_stride_y, p_dst_u_12, dst_stride_uv, p_dst_v_12,
+ dst_stride_uv, dst_width, dst_height, f);
+ }
+
+ // Expect an exact match.
+ int max_diff = 0;
+ for (i = 0; i < dst_y_plane_size; ++i) {
+ int abs_diff = Abs(dst_y_8[i] - p_dst_y_12[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ for (i = 0; i < dst_uv_plane_size; ++i) {
+ int abs_diff = Abs(dst_u_8[i] - p_dst_u_12[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ abs_diff = Abs(dst_v_8[i] - p_dst_v_12[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_y_8);
+ free_aligned_buffer_page_end(dst_u_8);
+ free_aligned_buffer_page_end(dst_v_8);
+ free_aligned_buffer_page_end(dst_y_12);
+ free_aligned_buffer_page_end(dst_u_12);
+ free_aligned_buffer_page_end(dst_v_12);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_u);
+ free_aligned_buffer_page_end(src_v);
+ free_aligned_buffer_page_end(src_y_12);
+ free_aligned_buffer_page_end(src_u_12);
+ free_aligned_buffer_page_end(src_v_12);
+
+ return max_diff;
+}
+
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
// 0 = exact.
static int I444TestFilter_16(int src_width,
@@ -491,57 +734,185 @@ static int I444TestFilter_16(int src_width,
return max_diff;
}
+// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
+static int NV12TestFilter(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i, j;
+ int src_width_uv = (Abs(src_width) + 1) >> 1;
+ int src_height_uv = (Abs(src_height) + 1) >> 1;
+
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv)*2;
+
+ int src_stride_y = Abs(src_width);
+ int src_stride_uv = src_width_uv * 2;
+
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_uv, src_uv_plane_size);
+ if (!src_y || !src_uv) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ MemRandomize(src_y, src_y_plane_size);
+ MemRandomize(src_uv, src_uv_plane_size);
+
+ int dst_width_uv = (dst_width + 1) >> 1;
+ int dst_height_uv = (dst_height + 1) >> 1;
+
+ int64_t dst_y_plane_size = (dst_width) * (dst_height);
+ int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv)*2;
+
+ int dst_stride_y = dst_width;
+ int dst_stride_uv = dst_width_uv * 2;
+
+ align_buffer_page_end(dst_y_c, dst_y_plane_size);
+ align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
+ align_buffer_page_end(dst_y_opt, dst_y_plane_size);
+ align_buffer_page_end(dst_uv_opt, dst_uv_plane_size);
+ if (!dst_y_c || !dst_uv_c || !dst_y_opt || !dst_uv_opt) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ double c_time = get_time();
+ NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
+ dst_y_c, dst_stride_y, dst_uv_c, dst_stride_uv, dst_width,
+ dst_height, f);
+ c_time = (get_time() - c_time);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ double opt_time = get_time();
+ for (i = 0; i < benchmark_iterations; ++i) {
+ NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
+ dst_y_opt, dst_stride_y, dst_uv_opt, dst_stride_uv, dst_width,
+ dst_height, f);
+ }
+ opt_time = (get_time() - opt_time) / benchmark_iterations;
+ // Report performance of C vs OPT.
+ printf("filter %d - %8d us C - %8d us OPT\n", f,
+ static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
+
+ // C version may be a little off from the optimized. Order of
+ // operations may introduce rounding somewhere. So do a difference
+ // of the buffers and look to see that the max difference is not
+ // over 3.
+ int max_diff = 0;
+ for (i = 0; i < (dst_height); ++i) {
+ for (j = 0; j < (dst_width); ++j) {
+ int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
+ dst_y_opt[(i * dst_stride_y) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ }
+
+ for (i = 0; i < (dst_height_uv); ++i) {
+ for (j = 0; j < (dst_width_uv * 2); ++j) {
+ int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] -
+ dst_uv_opt[(i * dst_stride_uv) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_y_c);
+ free_aligned_buffer_page_end(dst_uv_c);
+ free_aligned_buffer_page_end(dst_y_opt);
+ free_aligned_buffer_page_end(dst_uv_opt);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_uv);
+
+ return max_diff;
+}
+
// The following adjustments in dimensions ensure the scale factor will be
// exactly achieved.
// 2 is chroma subsample.
#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
-#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
- TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
- int diff = I420TestFilter( \
- SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
- DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
- kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
- benchmark_cpu_info_); \
- EXPECT_LE(diff, max_diff); \
- } \
- TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
- int diff = I444TestFilter( \
- SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
- DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
- kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
- benchmark_cpu_info_); \
- EXPECT_LE(diff, max_diff); \
- } \
- TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \
- int diff = I420TestFilter_16( \
- SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
- DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
- kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
- benchmark_cpu_info_); \
- EXPECT_LE(diff, max_diff); \
- } \
- TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \
- int diff = I444TestFilter_16( \
- SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
- DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
- kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
- benchmark_cpu_info_); \
- EXPECT_LE(diff, max_diff); \
+#define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff) \
+ TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
+ int diff = I420TestFilter( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
+ int diff = I444TestFilter( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_12) { \
+ int diff = I420TestFilter_12( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_12) { \
+ int diff = I444TestFilter_12( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, NV12ScaleDownBy##name##_##filter) { \
+ int diff = NV12TestFilter( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
}
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
// filtering is different fixed point implementations for SSSE3, Neon and C.
-#define TEST_FACTOR(name, nom, denom, boxdiff) \
- TEST_FACTOR1(name, None, nom, denom, 0) \
- TEST_FACTOR1(name, Linear, nom, denom, 3) \
- TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
- TEST_FACTOR1(name, Box, nom, denom, boxdiff)
+#ifndef DISABLE_SLOW_TESTS
+#define TEST_FACTOR(name, nom, denom, boxdiff) \
+ TEST_FACTOR1(, name, None, nom, denom, 0) \
+ TEST_FACTOR1(, name, Linear, nom, denom, 3) \
+ TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
+ TEST_FACTOR1(, name, Box, nom, denom, boxdiff)
+#else
+#if defined(ENABLE_FULL_TESTS)
+#define TEST_FACTOR(name, nom, denom, boxdiff) \
+ TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
+ TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
+ TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
+ TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
+#else
+#define TEST_FACTOR(name, nom, denom, boxdiff) \
+ TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
+ TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
+#endif
+#endif
TEST_FACTOR(2, 1, 2, 0)
TEST_FACTOR(4, 1, 4, 0)
+#ifndef DISABLE_SLOW_TESTS
TEST_FACTOR(8, 1, 8, 0)
+#endif
TEST_FACTOR(3by4, 3, 4, 1)
TEST_FACTOR(3by8, 3, 8, 1)
TEST_FACTOR(3, 1, 3, 0)
@@ -550,7 +921,7 @@ TEST_FACTOR(3, 1, 3, 0)
#undef SX
#undef DX
-#define TEST_SCALETO1(name, width, height, filter, max_diff) \
+#define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff) \
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
@@ -563,18 +934,40 @@ TEST_FACTOR(3, 1, 3, 0)
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
+ TEST_F(LibYUVScaleTest, \
+ DISABLED_##I420##name##To##width##x##height##_##filter##_12) { \
+ int diff = I420TestFilter_12( \
+ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, \
+ DISABLED_##I444##name##To##width##x##height##_##filter##_12) { \
+ int diff = I444TestFilter_12( \
+ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, \
+ DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
int diff = I420TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
+ TEST_F(LibYUVScaleTest, \
+ DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
int diff = I444TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
+ TEST_F(LibYUVScaleTest, NV12##name##To##width##x##height##_##filter) { \
+ int diff = NV12TestFilter(benchmark_width_, benchmark_height_, width, \
+ height, kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
@@ -590,7 +983,23 @@ TEST_FACTOR(3, 1, 3, 0)
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
- I420##name##From##width##x##height##_##filter##_16) { \
+ DISABLED_##I420##name##From##width##x##height##_##filter##_12) { \
+ int diff = I420TestFilter_12(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, \
+ DISABLED_##I444##name##From##width##x##height##_##filter##_12) { \
+ int diff = I444TestFilter_12(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, \
+ DISABLED_##I420##name##From##width##x##height##_##filter##_16) { \
int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
@@ -598,30 +1007,126 @@ TEST_FACTOR(3, 1, 3, 0)
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
- I444##name##From##width##x##height##_##filter##_16) { \
+ DISABLED_##I444##name##From##width##x##height##_##filter##_16) { \
int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, NV12##name##From##width##x##height##_##filter) { \
+ int diff = NV12TestFilter(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
}
+#ifndef DISABLE_SLOW_TESTS
// Test scale to a specified size with all 4 filters.
-#define TEST_SCALETO(name, width, height) \
- TEST_SCALETO1(name, width, height, None, 0) \
- TEST_SCALETO1(name, width, height, Linear, 3) \
- TEST_SCALETO1(name, width, height, Bilinear, 3) \
- TEST_SCALETO1(name, width, height, Box, 3)
+#define TEST_SCALETO(name, width, height) \
+ TEST_SCALETO1(, name, width, height, None, 0) \
+ TEST_SCALETO1(, name, width, height, Linear, 3) \
+ TEST_SCALETO1(, name, width, height, Bilinear, 3) \
+ TEST_SCALETO1(, name, width, height, Box, 3)
+#else
+#if defined(ENABLE_FULL_TESTS)
+#define TEST_SCALETO(name, width, height) \
+ TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
+ TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
+ TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
+ TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
+#else
+#define TEST_SCALETO(name, width, height) \
+ TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
+ TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
+#endif
+#endif
TEST_SCALETO(Scale, 1, 1)
-TEST_SCALETO(Scale, 320, 240)
TEST_SCALETO(Scale, 569, 480)
TEST_SCALETO(Scale, 640, 360)
+#ifndef DISABLE_SLOW_TESTS
+TEST_SCALETO(Scale, 256, 144) /* 128x72 * 2 */
+TEST_SCALETO(Scale, 320, 240)
TEST_SCALETO(Scale, 1280, 720)
TEST_SCALETO(Scale, 1920, 1080)
+#endif // DISABLE_SLOW_TESTS
#undef TEST_SCALETO1
#undef TEST_SCALETO
+#define TEST_SCALESWAPXY1(DISABLED_, name, filter, max_diff) \
+ TEST_F(LibYUVScaleTest, I420##name##SwapXY_##filter) { \
+ int diff = I420TestFilter(benchmark_width_, benchmark_height_, \
+ benchmark_height_, benchmark_width_, \
+ kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I444##name##SwapXY_##filter) { \
+ int diff = I444TestFilter(benchmark_width_, benchmark_height_, \
+ benchmark_height_, benchmark_width_, \
+ kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_12) { \
+ int diff = I420TestFilter_12(benchmark_width_, benchmark_height_, \
+ benchmark_height_, benchmark_width_, \
+ kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_12) { \
+ int diff = I444TestFilter_12(benchmark_width_, benchmark_height_, \
+ benchmark_height_, benchmark_width_, \
+ kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_16) { \
+ int diff = I420TestFilter_16(benchmark_width_, benchmark_height_, \
+ benchmark_height_, benchmark_width_, \
+ kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_16) { \
+ int diff = I444TestFilter_16(benchmark_width_, benchmark_height_, \
+ benchmark_height_, benchmark_width_, \
+ kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, NV12##name##SwapXY_##filter) { \
+ int diff = NV12TestFilter(benchmark_width_, benchmark_height_, \
+ benchmark_height_, benchmark_width_, \
+ kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ }
+
+// Test scale to a specified size with all 4 filters.
+#ifndef DISABLE_SLOW_TESTS
+TEST_SCALESWAPXY1(, Scale, None, 0)
+TEST_SCALESWAPXY1(, Scale, Linear, 3)
+TEST_SCALESWAPXY1(, Scale, Bilinear, 3)
+TEST_SCALESWAPXY1(, Scale, Box, 3)
+#else
+#if defined(ENABLE_FULL_TESTS)
+TEST_SCALESWAPXY1(DISABLED_, Scale, None, 0)
+TEST_SCALESWAPXY1(DISABLED_, Scale, Linear, 3)
+TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
+TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
+#else
+TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
+TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
+#endif
+#endif
+
+#undef TEST_SCALESWAPXY1
+
+#ifdef ENABLE_ROW_TESTS
#ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
@@ -716,10 +1221,6 @@ extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
-extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width);
extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
@@ -746,13 +1247,6 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
} else {
ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
}
-#elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
- int has_mmi = TestCpuFlag(kCpuHasMMI);
- if (has_mmi) {
- ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
- } else {
- ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
- }
#else
ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
#endif
@@ -803,8 +1297,9 @@ TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
EXPECT_EQ(dst_pixels_c[1279], 3839);
}
+#endif // ENABLE_ROW_TESTS
-// Test scaling plane with 8 bit C vs 16 bit C and return maximum pixel
+// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel
// difference.
// 0 = exact.
static int TestPlaneFilter_16(int src_width,
@@ -873,14 +1368,14 @@ static int TestPlaneFilter_16(int src_width,
#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
-#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
- TEST_F(LibYUVScaleTest, ScalePlaneDownBy##name##_##filter##_16) { \
- int diff = TestPlaneFilter_16( \
- SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
- DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
- kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
- benchmark_cpu_info_); \
- EXPECT_LE(diff, max_diff); \
+#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
+ TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
+ int diff = TestPlaneFilter_16( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
}
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
@@ -893,7 +1388,7 @@ static int TestPlaneFilter_16(int src_width,
TEST_FACTOR(2, 1, 2, 0)
TEST_FACTOR(4, 1, 4, 0)
-TEST_FACTOR(8, 1, 8, 0)
+// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
TEST_FACTOR(3by4, 3, 4, 1)
TEST_FACTOR(3by8, 3, 8, 1)
TEST_FACTOR(3, 1, 3, 0)
@@ -901,4 +1396,206 @@ TEST_FACTOR(3, 1, 3, 0)
#undef TEST_FACTOR
#undef SX
#undef DX
+
+TEST_F(LibYUVScaleTest, PlaneTest3x) {
+ const int kSrcStride = 480;
+ const int kDstStride = 160;
+ const int kSize = kSrcStride * 3;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < 480 * 3; ++i) {
+ orig_pixels[i] = i;
+ }
+ align_buffer_page_end(dest_pixels, kDstStride);
+
+ int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+ benchmark_iterations_;
+ for (int i = 0; i < iterations160; ++i) {
+ ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+ kFilterBilinear);
+ }
+
+ EXPECT_EQ(225, dest_pixels[0]);
+
+ ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+ kFilterNone);
+
+ EXPECT_EQ(225, dest_pixels[0]);
+
+ free_aligned_buffer_page_end(dest_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest4x) {
+ const int kSrcStride = 640;
+ const int kDstStride = 160;
+ const int kSize = kSrcStride * 4;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < 640 * 4; ++i) {
+ orig_pixels[i] = i;
+ }
+ align_buffer_page_end(dest_pixels, kDstStride);
+
+ int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+ benchmark_iterations_;
+ for (int i = 0; i < iterations160; ++i) {
+ ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+ kFilterBilinear);
+ }
+
+ EXPECT_EQ(66, dest_pixels[0]);
+
+ ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+ kFilterNone);
+
+ EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
+
+ free_aligned_buffer_page_end(dest_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+// Intent is to test 200x50 to 50x200 but width and height can be parameters.
+TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
+ const int kSize = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < kSize; ++i) {
+ orig_pixels[i] = i;
+ }
+ align_buffer_page_end(dest_opt_pixels, kSize);
+ align_buffer_page_end(dest_c_pixels, kSize);
+
+ MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+ dest_c_pixels, benchmark_height_, benchmark_height_,
+ benchmark_width_, kFilterNone);
+ MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+ benchmark_height_, dest_opt_pixels, benchmark_height_,
+ benchmark_height_, benchmark_width_, kFilterNone);
+ }
+
+ for (int i = 0; i < kSize; ++i) {
+ EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+ }
+
+ free_aligned_buffer_page_end(dest_c_pixels);
+ free_aligned_buffer_page_end(dest_opt_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
+ const int kSize = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < kSize; ++i) {
+ orig_pixels[i] = i;
+ }
+ align_buffer_page_end(dest_opt_pixels, kSize);
+ align_buffer_page_end(dest_c_pixels, kSize);
+
+ MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+ dest_c_pixels, benchmark_height_, benchmark_height_,
+ benchmark_width_, kFilterBilinear);
+ MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+ benchmark_height_, dest_opt_pixels, benchmark_height_,
+ benchmark_height_, benchmark_width_, kFilterBilinear);
+ }
+
+ for (int i = 0; i < kSize; ++i) {
+ EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+ }
+
+ free_aligned_buffer_page_end(dest_c_pixels);
+ free_aligned_buffer_page_end(dest_opt_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+// Intent is to test 200x50 to 50x200 but width and height can be parameters.
+TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
+ const int kSize = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < kSize; ++i) {
+ orig_pixels[i] = i;
+ }
+ align_buffer_page_end(dest_opt_pixels, kSize);
+ align_buffer_page_end(dest_c_pixels, kSize);
+
+ MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+ dest_c_pixels, benchmark_height_, benchmark_height_,
+ benchmark_width_, kFilterBox);
+ MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+ benchmark_height_, dest_opt_pixels, benchmark_height_,
+ benchmark_height_, benchmark_width_, kFilterBox);
+ }
+
+ for (int i = 0; i < kSize; ++i) {
+ EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+ }
+
+ free_aligned_buffer_page_end(dest_c_pixels);
+ free_aligned_buffer_page_end(dest_opt_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
+ align_buffer_page_end(orig_pixels, 3);
+ align_buffer_page_end(dst_pixels, 3);
+
+ // Pad the 1x1 byte image with invalid values before and after in case libyuv
+ // reads outside the memory boundaries.
+ orig_pixels[0] = 0;
+ orig_pixels[1] = 1; // scale this pixel
+ orig_pixels[2] = 2;
+ dst_pixels[0] = 3;
+ dst_pixels[1] = 3;
+ dst_pixels[2] = 3;
+
+ libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
+ /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
+ /* dst_width= */ 1, /* dst_height= */ 2,
+ libyuv::kFilterBox);
+
+ EXPECT_EQ(dst_pixels[0], 1);
+ EXPECT_EQ(dst_pixels[1], 1);
+ EXPECT_EQ(dst_pixels[2], 3);
+
+ free_aligned_buffer_page_end(dst_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
+ align_buffer_page_end(orig_pixels_alloc, 3 * 2);
+ align_buffer_page_end(dst_pixels_alloc, 3 * 2);
+ uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc;
+ uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc;
+
+ // Pad the 1x1 byte image with invalid values before and after in case libyuv
+ // reads outside the memory boundaries.
+ orig_pixels[0] = 0;
+ orig_pixels[1] = 1; // scale this pixel
+ orig_pixels[2] = 2;
+ dst_pixels[0] = 3;
+ dst_pixels[1] = 3;
+ dst_pixels[2] = 3;
+
+ libyuv::ScalePlane_16(
+ orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
+ /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
+ /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
+
+ EXPECT_EQ(dst_pixels[0], 1);
+ EXPECT_EQ(dst_pixels[1], 1);
+ EXPECT_EQ(dst_pixels[2], 3);
+
+ free_aligned_buffer_page_end(dst_pixels_alloc);
+ free_aligned_buffer_page_end(orig_pixels_alloc);
+}
} // namespace libyuv
diff --git a/files/unit_test/scale_uv_test.cc b/files/unit_test/scale_uv_test.cc
new file mode 100644
index 00000000..3d524bef
--- /dev/null
+++ b/files/unit_test/scale_uv_test.cc
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <time.h>
+
+#include "../unit_test/unit_test.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/scale_uv.h"
+
+namespace libyuv {
+
+#define STRINGIZE(line) #line
+#define FILELINESTR(file, line) file ":" STRINGIZE(line)
+
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+// SLOW TESTS are those that are unoptimized C code.
+// FULL TESTS are optimized but test many variations of the same code.
+#define ENABLE_FULL_TESTS
+#endif
+
+// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
+static int UVTestFilter(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i, j;
+ const int b = 0; // 128 to test for padding/stride.
+ int64_t src_uv_plane_size =
+ (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 2LL;
+ int src_stride_uv = (b * 2 + Abs(src_width)) * 2;
+
+ align_buffer_page_end(src_uv, src_uv_plane_size);
+ if (!src_uv) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ MemRandomize(src_uv, src_uv_plane_size);
+
+ int64_t dst_uv_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 2LL;
+ int dst_stride_uv = (b * 2 + dst_width) * 2;
+
+ align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
+ align_buffer_page_end(dst_uv_opt, dst_uv_plane_size);
+ if (!dst_uv_c || !dst_uv_opt) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ memset(dst_uv_c, 2, dst_uv_plane_size);
+ memset(dst_uv_opt, 3, dst_uv_plane_size);
+
+ // Warm up both versions for consistent benchmarks.
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
+ src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
+ dst_width, dst_height, f);
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
+ src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
+ dst_width, dst_height, f);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ double c_time = get_time();
+ UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
+ src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
+ dst_width, dst_height, f);
+
+ c_time = (get_time() - c_time);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ double opt_time = get_time();
+ for (i = 0; i < benchmark_iterations; ++i) {
+ UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
+ src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
+ dst_width, dst_height, f);
+ }
+ opt_time = (get_time() - opt_time) / benchmark_iterations;
+
+ // Report performance of C vs OPT
+ printf("filter %d - %8d us C - %8d us OPT\n", f,
+ static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
+
+ // C version may be a little off from the optimized. Order of
+ // operations may introduce rounding somewhere. So do a difference
+ // of the buffers and look to see that the max difference isn't
+ // over 2.
+ int max_diff = 0;
+ for (i = b; i < (dst_height + b); ++i) {
+ for (j = b * 2; j < (dst_width + b) * 2; ++j) {
+ int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] -
+ dst_uv_opt[(i * dst_stride_uv) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_uv_c);
+ free_aligned_buffer_page_end(dst_uv_opt);
+ free_aligned_buffer_page_end(src_uv);
+ return max_diff;
+}
+
+// The following adjustments in dimensions ensure the scale factor will be
+// exactly achieved.
+#define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom)
+#define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
+
+#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
+ TEST_F(LibYUVScaleTest, UVScaleDownBy##name##_##filter) { \
+ int diff = UVTestFilter( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
+// filtering is different fixed point implementations for SSSE3, Neon and C.
+#define TEST_FACTOR(name, nom, denom) \
+ TEST_FACTOR1(name, None, nom, denom, 0) \
+ TEST_FACTOR1(name, Linear, nom, denom, 3) \
+ TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
+ TEST_FACTOR1(name, Box, nom, denom, 3)
+#else
+// Test a scale factor with Bilinear.
+#define TEST_FACTOR(name, nom, denom) \
+ TEST_FACTOR1(name, Bilinear, nom, denom, 3)
+#endif
+
+TEST_FACTOR(2, 1, 2)
+TEST_FACTOR(4, 1, 4)
+// TEST_FACTOR(8, 1, 8) Disable for benchmark performance.
+TEST_FACTOR(3by4, 3, 4)
+TEST_FACTOR(3by8, 3, 8)
+TEST_FACTOR(3, 1, 3)
+#undef TEST_FACTOR1
+#undef TEST_FACTOR
+#undef SX
+#undef DX
+
+#define TEST_SCALETO1(name, width, height, filter, max_diff) \
+ TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
+ int diff = UVTestFilter(benchmark_width_, benchmark_height_, width, \
+ height, kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
+ int diff = UVTestFilter(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+/// Test scale to a specified size with all 4 filters.
+#define TEST_SCALETO(name, width, height) \
+ TEST_SCALETO1(name, width, height, None, 0) \
+ TEST_SCALETO1(name, width, height, Linear, 3) \
+ TEST_SCALETO1(name, width, height, Bilinear, 3)
+#else
+#define TEST_SCALETO(name, width, height) \
+ TEST_SCALETO1(name, width, height, Bilinear, 3)
+#endif
+
+TEST_SCALETO(UVScale, 1, 1)
+TEST_SCALETO(UVScale, 569, 480)
+TEST_SCALETO(UVScale, 640, 360)
+#ifndef DISABLE_SLOW_TESTS
+TEST_SCALETO(UVScale, 256, 144) /* 128x72 * 2 */
+TEST_SCALETO(UVScale, 320, 240)
+TEST_SCALETO(UVScale, 1280, 720)
+TEST_SCALETO(UVScale, 1920, 1080)
+#endif // DISABLE_SLOW_TESTS
+#undef TEST_SCALETO1
+#undef TEST_SCALETO
+
+#define TEST_SCALESWAPXY1(name, filter, max_diff) \
+ TEST_F(LibYUVScaleTest, name##SwapXY_##filter) { \
+ int diff = \
+ UVTestFilter(benchmark_width_, benchmark_height_, benchmark_height_, \
+ benchmark_width_, kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+// Test scale with swapped width and height with all 3 filters.
+TEST_SCALESWAPXY1(UVScale, None, 0)
+TEST_SCALESWAPXY1(UVScale, Linear, 0)
+TEST_SCALESWAPXY1(UVScale, Bilinear, 0)
+#else
+TEST_SCALESWAPXY1(UVScale, Bilinear, 0)
+#endif
+#undef TEST_SCALESWAPXY1
+
+TEST_F(LibYUVScaleTest, UVTest3x) {
+ const int kSrcStride = 480 * 2;
+ const int kDstStride = 160 * 2;
+ const int kSize = kSrcStride * 3;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < 480 * 3; ++i) {
+ orig_pixels[i * 2 + 0] = i;
+ orig_pixels[i * 2 + 1] = 255 - i;
+ }
+ align_buffer_page_end(dest_pixels, kDstStride);
+
+ int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+ benchmark_iterations_;
+ for (int i = 0; i < iterations160; ++i) {
+ UVScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+ kFilterBilinear);
+ }
+
+ EXPECT_EQ(225, dest_pixels[0]);
+ EXPECT_EQ(255 - 225, dest_pixels[1]);
+
+ UVScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+ kFilterNone);
+
+ EXPECT_EQ(225, dest_pixels[0]);
+ EXPECT_EQ(255 - 225, dest_pixels[1]);
+
+ free_aligned_buffer_page_end(dest_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, UVTest4x) {
+ const int kSrcStride = 640 * 2;
+ const int kDstStride = 160 * 2;
+ const int kSize = kSrcStride * 4;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < 640 * 4; ++i) {
+ orig_pixels[i * 2 + 0] = i;
+ orig_pixels[i * 2 + 1] = 255 - i;
+ }
+ align_buffer_page_end(dest_pixels, kDstStride);
+
+ int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+ benchmark_iterations_;
+ for (int i = 0; i < iterations160; ++i) {
+ UVScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+ kFilterBilinear);
+ }
+
+ EXPECT_EQ(66, dest_pixels[0]);
+ EXPECT_EQ(190, dest_pixels[1]);
+
+ UVScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
+ kFilterNone);
+
+ EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
+ EXPECT_EQ(255 - 2, dest_pixels[1]);
+
+ free_aligned_buffer_page_end(dest_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+} // namespace libyuv
diff --git a/files/unit_test/testdata/mips.txt b/files/unit_test/testdata/mips.txt
new file mode 100644
index 00000000..d9f28cbf
--- /dev/null
+++ b/files/unit_test/testdata/mips.txt
@@ -0,0 +1,7 @@
+system type : generic-loongson-machine
+machine : loongson,generic
+processor : 0
+
+isa : mips1 mips2 mips3 mips4 mips5 mips32r1 mips32r2 mips64r1 mips64r2
+ASEs implemented : vz
+shadow register sets : 1
diff --git a/files/unit_test/testdata/mips_loongson2k.txt b/files/unit_test/testdata/mips_loongson2k.txt
new file mode 100644
index 00000000..8a88d38f
--- /dev/null
+++ b/files/unit_test/testdata/mips_loongson2k.txt
@@ -0,0 +1,5 @@
+system type : Loongson2K-SBC
+machine : loongson,LS2k1000-EVP
+processor : 0
+cpu model : Loongson-2K V0.3 FPU V0.1
+BogoMIPS : 1980.41
diff --git a/files/unit_test/testdata/mips_loongson3.txt b/files/unit_test/testdata/mips_loongson3.txt
new file mode 100644
index 00000000..1f540b12
--- /dev/null
+++ b/files/unit_test/testdata/mips_loongson3.txt
@@ -0,0 +1,10 @@
+system type : generic-loongson-machine
+machine : Unknown
+processor : 0
+cpu model : ICT Loongson-3 V0.9 FPU V0.1
+model name : ICT Loongson-3A R3 (Loongson-3A3000) @ 1500MHz
+BogoMIPS : 2990.15
+
+isa : mips1 mips2 mips3 mips4 mips5 mips32r1 mips32r2 mips64r1 mips64r2
+ASEs implemented : dsp dsp2 vz
+shadow register sets : 1
diff --git a/files/unit_test/testdata/mips_loongson_mmi.txt b/files/unit_test/testdata/mips_loongson_mmi.txt
new file mode 100644
index 00000000..0f10b8bb
--- /dev/null
+++ b/files/unit_test/testdata/mips_loongson_mmi.txt
@@ -0,0 +1,7 @@
+system type : generic-loongson-machine
+machine : loongson,generic
+processor : 0
+
+isa : mips1 mips2 mips3 mips4 mips5 mips32r1 mips32r2 mips64r1 mips64r2
+ASEs implemented : vz loongson-mmi loongson-ext
+shadow register sets : 1
diff --git a/files/unit_test/testdata/mips_msa.txt b/files/unit_test/testdata/mips_msa.txt
new file mode 100644
index 00000000..ac930615
--- /dev/null
+++ b/files/unit_test/testdata/mips_msa.txt
@@ -0,0 +1,7 @@
+system type : generic-loongson-machine
+machine : loongson,generic
+processor : 0
+
+isa : mips1 mips2 mips3 mips4 mips5 mips32r1 mips32r2 mips64r1 mips64r2
+ASEs implemented : vz msa
+shadow register sets : 1
diff --git a/files/unit_test/unit_test.cc b/files/unit_test/unit_test.cc
index a1ae7ea3..61145a46 100644
--- a/files/unit_test/unit_test.cc
+++ b/files/unit_test/unit_test.cc
@@ -14,23 +14,28 @@
#include <cstring>
-#ifdef LIBYUV_USE_GFLAGS
-#include "gflags/gflags.h"
+#ifdef LIBYUV_USE_ABSL_FLAGS
+#include "absl/flags/flag.h"
+#include "absl/flags/parse.h"
#endif
#include "libyuv/cpu_id.h"
unsigned int fastrand_seed = 0xfb;
-#ifdef LIBYUV_USE_GFLAGS
-DEFINE_int32(libyuv_width, 0, "width of test image.");
-DEFINE_int32(libyuv_height, 0, "height of test image.");
-DEFINE_int32(libyuv_repeat, 0, "number of times to repeat test.");
-DEFINE_int32(libyuv_flags, 0, "cpu flags for reference code. 1 = C, -1 = SIMD");
-DEFINE_int32(libyuv_cpu_info,
- 0,
- "cpu flags for benchmark code. 1 = C, -1 = SIMD");
+#ifdef LIBYUV_USE_ABSL_FLAGS
+ABSL_FLAG(int32_t, libyuv_width, 0, "width of test image.");
+ABSL_FLAG(int32_t, libyuv_height, 0, "height of test image.");
+ABSL_FLAG(int32_t, libyuv_repeat, 0, "number of times to repeat test.");
+ABSL_FLAG(int32_t,
+ libyuv_flags,
+ 0,
+ "cpu flags for reference code. 1 = C, -1 = SIMD");
+ABSL_FLAG(int32_t,
+ libyuv_cpu_info,
+ 0,
+ "cpu flags for benchmark code. 1 = C, -1 = SIMD");
#else
-// Disable command line parameters if gflags disabled.
+// Disable command line parameters if absl/flags disabled.
static const int32_t FLAGS_libyuv_width = 0;
static const int32_t FLAGS_libyuv_height = 0;
static const int32_t FLAGS_libyuv_repeat = 0;
@@ -38,6 +43,12 @@ static const int32_t FLAGS_libyuv_flags = 0;
static const int32_t FLAGS_libyuv_cpu_info = 0;
#endif
+#ifdef LIBYUV_USE_ABSL_FLAGS
+#define LIBYUV_GET_FLAG(f) absl::GetFlag(f)
+#else
+#define LIBYUV_GET_FLAG(f) f
+#endif
+
// Test environment variable for disabling CPU features. Any non-zero value
// to disable. Zero ignored to make it easy to set the variable on/off.
#if !defined(__native_client__) && !defined(_M_ARM)
@@ -66,8 +77,15 @@ int TestCpuEnv(int cpu_info) {
if (TestEnv("LIBYUV_DISABLE_MSA")) {
cpu_info &= ~libyuv::kCpuHasMSA;
}
- if (TestEnv("LIBYUV_DISABLE_MMI")) {
- cpu_info &= ~libyuv::kCpuHasMMI;
+#endif
+#if defined(__longarch__) && defined(__linux__)
+ if (TestEnv("LIBYUV_DISABLE_LSX")) {
+ cpu_info &= ~libyuv::kCpuHasLSX;
+ }
+#endif
+#if defined(__longarch__) && defined(__linux__)
+ if (TestEnv("LIBYUV_DISABLE_LASX")) {
+ cpu_info &= ~libyuv::kCpuHasLASX;
}
#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
@@ -109,6 +127,9 @@ int TestCpuEnv(int cpu_info) {
if (TestEnv("LIBYUV_DISABLE_AVX512VL")) {
cpu_info &= ~libyuv::kCpuHasAVX512VL;
}
+ if (TestEnv("LIBYUV_DISABLE_AVX512VNNI")) {
+ cpu_info &= ~libyuv::kCpuHasAVX512VNNI;
+ }
if (TestEnv("LIBYUV_DISABLE_AVX512VBMI")) {
cpu_info &= ~libyuv::kCpuHasAVX512VBMI;
}
@@ -145,8 +166,8 @@ LibYUVConvertTest::LibYUVConvertTest()
if (repeat) {
benchmark_iterations_ = atoi(repeat); // NOLINT
}
- if (FLAGS_libyuv_repeat) {
- benchmark_iterations_ = FLAGS_libyuv_repeat;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) {
+ benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat);
}
if (benchmark_iterations_ > 1) {
benchmark_width_ = 1280;
@@ -156,29 +177,29 @@ LibYUVConvertTest::LibYUVConvertTest()
if (width) {
benchmark_width_ = atoi(width); // NOLINT
}
- if (FLAGS_libyuv_width) {
- benchmark_width_ = FLAGS_libyuv_width;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) {
+ benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width);
}
const char* height = getenv("LIBYUV_HEIGHT");
if (height) {
benchmark_height_ = atoi(height); // NOLINT
}
- if (FLAGS_libyuv_height) {
- benchmark_height_ = FLAGS_libyuv_height;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) {
+ benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height);
}
const char* cpu_flags = getenv("LIBYUV_FLAGS");
if (cpu_flags) {
disable_cpu_flags_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_flags) {
- disable_cpu_flags_ = FLAGS_libyuv_flags;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) {
+ disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags);
}
const char* cpu_info = getenv("LIBYUV_CPU_INFO");
if (cpu_info) {
benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_cpu_info) {
- benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) {
+ benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info);
}
disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_);
benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_);
@@ -201,8 +222,8 @@ LibYUVColorTest::LibYUVColorTest()
if (repeat) {
benchmark_iterations_ = atoi(repeat); // NOLINT
}
- if (FLAGS_libyuv_repeat) {
- benchmark_iterations_ = FLAGS_libyuv_repeat;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) {
+ benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat);
}
if (benchmark_iterations_ > 1) {
benchmark_width_ = 1280;
@@ -212,29 +233,29 @@ LibYUVColorTest::LibYUVColorTest()
if (width) {
benchmark_width_ = atoi(width); // NOLINT
}
- if (FLAGS_libyuv_width) {
- benchmark_width_ = FLAGS_libyuv_width;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) {
+ benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width);
}
const char* height = getenv("LIBYUV_HEIGHT");
if (height) {
benchmark_height_ = atoi(height); // NOLINT
}
- if (FLAGS_libyuv_height) {
- benchmark_height_ = FLAGS_libyuv_height;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) {
+ benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height);
}
const char* cpu_flags = getenv("LIBYUV_FLAGS");
if (cpu_flags) {
disable_cpu_flags_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_flags) {
- disable_cpu_flags_ = FLAGS_libyuv_flags;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) {
+ disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags);
}
const char* cpu_info = getenv("LIBYUV_CPU_INFO");
if (cpu_info) {
benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_cpu_info) {
- benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) {
+ benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info);
}
disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_);
benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_);
@@ -257,8 +278,8 @@ LibYUVScaleTest::LibYUVScaleTest()
if (repeat) {
benchmark_iterations_ = atoi(repeat); // NOLINT
}
- if (FLAGS_libyuv_repeat) {
- benchmark_iterations_ = FLAGS_libyuv_repeat;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) {
+ benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat);
}
if (benchmark_iterations_ > 1) {
benchmark_width_ = 1280;
@@ -268,29 +289,29 @@ LibYUVScaleTest::LibYUVScaleTest()
if (width) {
benchmark_width_ = atoi(width); // NOLINT
}
- if (FLAGS_libyuv_width) {
- benchmark_width_ = FLAGS_libyuv_width;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) {
+ benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width);
}
const char* height = getenv("LIBYUV_HEIGHT");
if (height) {
benchmark_height_ = atoi(height); // NOLINT
}
- if (FLAGS_libyuv_height) {
- benchmark_height_ = FLAGS_libyuv_height;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) {
+ benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height);
}
const char* cpu_flags = getenv("LIBYUV_FLAGS");
if (cpu_flags) {
disable_cpu_flags_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_flags) {
- disable_cpu_flags_ = FLAGS_libyuv_flags;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) {
+ disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags);
}
const char* cpu_info = getenv("LIBYUV_CPU_INFO");
if (cpu_info) {
benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_cpu_info) {
- benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) {
+ benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info);
}
disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_);
benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_);
@@ -313,8 +334,8 @@ LibYUVRotateTest::LibYUVRotateTest()
if (repeat) {
benchmark_iterations_ = atoi(repeat); // NOLINT
}
- if (FLAGS_libyuv_repeat) {
- benchmark_iterations_ = FLAGS_libyuv_repeat;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) {
+ benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat);
}
if (benchmark_iterations_ > 1) {
benchmark_width_ = 1280;
@@ -324,29 +345,29 @@ LibYUVRotateTest::LibYUVRotateTest()
if (width) {
benchmark_width_ = atoi(width); // NOLINT
}
- if (FLAGS_libyuv_width) {
- benchmark_width_ = FLAGS_libyuv_width;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) {
+ benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width);
}
const char* height = getenv("LIBYUV_HEIGHT");
if (height) {
benchmark_height_ = atoi(height); // NOLINT
}
- if (FLAGS_libyuv_height) {
- benchmark_height_ = FLAGS_libyuv_height;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) {
+ benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height);
}
const char* cpu_flags = getenv("LIBYUV_FLAGS");
if (cpu_flags) {
disable_cpu_flags_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_flags) {
- disable_cpu_flags_ = FLAGS_libyuv_flags;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) {
+ disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags);
}
const char* cpu_info = getenv("LIBYUV_CPU_INFO");
if (cpu_info) {
benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_cpu_info) {
- benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) {
+ benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info);
}
disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_);
benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_);
@@ -369,8 +390,8 @@ LibYUVPlanarTest::LibYUVPlanarTest()
if (repeat) {
benchmark_iterations_ = atoi(repeat); // NOLINT
}
- if (FLAGS_libyuv_repeat) {
- benchmark_iterations_ = FLAGS_libyuv_repeat;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) {
+ benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat);
}
if (benchmark_iterations_ > 1) {
benchmark_width_ = 1280;
@@ -380,29 +401,29 @@ LibYUVPlanarTest::LibYUVPlanarTest()
if (width) {
benchmark_width_ = atoi(width); // NOLINT
}
- if (FLAGS_libyuv_width) {
- benchmark_width_ = FLAGS_libyuv_width;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) {
+ benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width);
}
const char* height = getenv("LIBYUV_HEIGHT");
if (height) {
benchmark_height_ = atoi(height); // NOLINT
}
- if (FLAGS_libyuv_height) {
- benchmark_height_ = FLAGS_libyuv_height;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) {
+ benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height);
}
const char* cpu_flags = getenv("LIBYUV_FLAGS");
if (cpu_flags) {
disable_cpu_flags_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_flags) {
- disable_cpu_flags_ = FLAGS_libyuv_flags;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) {
+ disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags);
}
const char* cpu_info = getenv("LIBYUV_CPU_INFO");
if (cpu_info) {
benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_cpu_info) {
- benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) {
+ benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info);
}
disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_);
benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_);
@@ -425,8 +446,8 @@ LibYUVBaseTest::LibYUVBaseTest()
if (repeat) {
benchmark_iterations_ = atoi(repeat); // NOLINT
}
- if (FLAGS_libyuv_repeat) {
- benchmark_iterations_ = FLAGS_libyuv_repeat;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) {
+ benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat);
}
if (benchmark_iterations_ > 1) {
benchmark_width_ = 1280;
@@ -436,29 +457,29 @@ LibYUVBaseTest::LibYUVBaseTest()
if (width) {
benchmark_width_ = atoi(width); // NOLINT
}
- if (FLAGS_libyuv_width) {
- benchmark_width_ = FLAGS_libyuv_width;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) {
+ benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width);
}
const char* height = getenv("LIBYUV_HEIGHT");
if (height) {
benchmark_height_ = atoi(height); // NOLINT
}
- if (FLAGS_libyuv_height) {
- benchmark_height_ = FLAGS_libyuv_height;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) {
+ benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height);
}
const char* cpu_flags = getenv("LIBYUV_FLAGS");
if (cpu_flags) {
disable_cpu_flags_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_flags) {
- disable_cpu_flags_ = FLAGS_libyuv_flags;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) {
+ disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags);
}
const char* cpu_info = getenv("LIBYUV_CPU_INFO");
if (cpu_info) {
benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_cpu_info) {
- benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) {
+ benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info);
}
disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_);
benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_);
@@ -481,8 +502,8 @@ LibYUVCompareTest::LibYUVCompareTest()
if (repeat) {
benchmark_iterations_ = atoi(repeat); // NOLINT
}
- if (FLAGS_libyuv_repeat) {
- benchmark_iterations_ = FLAGS_libyuv_repeat;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) {
+ benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat);
}
if (benchmark_iterations_ > 1) {
benchmark_width_ = 1280;
@@ -492,29 +513,29 @@ LibYUVCompareTest::LibYUVCompareTest()
if (width) {
benchmark_width_ = atoi(width); // NOLINT
}
- if (FLAGS_libyuv_width) {
- benchmark_width_ = FLAGS_libyuv_width;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) {
+ benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width);
}
const char* height = getenv("LIBYUV_HEIGHT");
if (height) {
benchmark_height_ = atoi(height); // NOLINT
}
- if (FLAGS_libyuv_height) {
- benchmark_height_ = FLAGS_libyuv_height;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) {
+ benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height);
}
const char* cpu_flags = getenv("LIBYUV_FLAGS");
if (cpu_flags) {
disable_cpu_flags_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_flags) {
- disable_cpu_flags_ = FLAGS_libyuv_flags;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) {
+ disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags);
}
const char* cpu_info = getenv("LIBYUV_CPU_INFO");
if (cpu_info) {
benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT
}
- if (FLAGS_libyuv_cpu_info) {
- benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
+ if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) {
+ benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info);
}
disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_);
benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_);
@@ -529,11 +550,8 @@ LibYUVCompareTest::LibYUVCompareTest()
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
-#ifdef LIBYUV_USE_GFLAGS
- // AllowCommandLineParsing allows us to ignore flags passed on to us by
- // Chromium build bots without having to explicitly disable them.
- google::AllowCommandLineReparsing();
- google::ParseCommandLineFlags(&argc, &argv, true);
+#ifdef LIBYUV_USE_ABSL_FLAGS
+ absl::ParseCommandLine(argc, argv);
#endif
return RUN_ALL_TESTS();
}
diff --git a/files/unit_test/unit_test.h b/files/unit_test/unit_test.h
index 87907fa1..0a8df4d2 100644
--- a/files/unit_test/unit_test.h
+++ b/files/unit_test/unit_test.h
@@ -11,7 +11,7 @@
#ifndef UNIT_TEST_UNIT_TEST_H_ // NOLINT
#define UNIT_TEST_UNIT_TEST_H_
-#ifdef WIN32
+#ifdef _WIN32
#include <windows.h>
#else
#include <sys/resource.h>
@@ -111,10 +111,13 @@ inline int fastrand() {
return static_cast<int>((fastrand_seed >> 16) & 0xffff);
}
+// ubsan fails if dst is unaligned unless we use uint8
static inline void MemRandomize(uint8_t* dst, int64_t len) {
int64_t i;
for (i = 0; i < len - 1; i += 2) {
- *reinterpret_cast<uint16_t*>(dst) = fastrand();
+ int r = fastrand();
+ dst[0] = static_cast<uint8_t>(r);
+ dst[1] = static_cast<uint8_t>(r >> 8);
dst += 2;
}
for (; i < len; ++i) {
diff --git a/files/unit_test/video_common_test.cc b/files/unit_test/video_common_test.cc
index a84206a2..36728ea9 100644
--- a/files/unit_test/video_common_test.cc
+++ b/files/unit_test/video_common_test.cc
@@ -29,7 +29,7 @@ static bool TestValidFourCC(uint32_t fourcc, int bpp) {
!TestValidChar((fourcc >> 24) & 0xff)) {
return false;
}
- if (bpp < 0 || bpp > 32) {
+ if (bpp < 0 || bpp > 64) {
return false;
}
return true;
@@ -65,13 +65,15 @@ TEST_F(LibYUVBaseTest, TestFourCC) {
EXPECT_TRUE(TestValidFourCC(FOURCC_NV12, FOURCC_BPP_NV12));
EXPECT_TRUE(TestValidFourCC(FOURCC_YUY2, FOURCC_BPP_YUY2));
EXPECT_TRUE(TestValidFourCC(FOURCC_UYVY, FOURCC_BPP_UYVY));
- EXPECT_TRUE(TestValidFourCC(FOURCC_M420, FOURCC_BPP_M420));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_M420, FOURCC_BPP_M420)); // deprecated.
EXPECT_TRUE(TestValidFourCC(FOURCC_Q420, FOURCC_BPP_Q420)); // deprecated.
EXPECT_TRUE(TestValidFourCC(FOURCC_ARGB, FOURCC_BPP_ARGB));
EXPECT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA));
EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR));
EXPECT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30));
EXPECT_TRUE(TestValidFourCC(FOURCC_AB30, FOURCC_BPP_AB30));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_AR64, FOURCC_BPP_AR64));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_AB64, FOURCC_BPP_AB64));
EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG));
EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW));
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA));
@@ -81,6 +83,11 @@ TEST_F(LibYUVBaseTest, TestFourCC) {
EXPECT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420));
EXPECT_TRUE(TestValidFourCC(FOURCC_H422, FOURCC_BPP_H422));
EXPECT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_H210, FOURCC_BPP_H210));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_I010, FOURCC_BPP_I010));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_I210, FOURCC_BPP_I210));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_P010, FOURCC_BPP_P010));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_P210, FOURCC_BPP_P210));
EXPECT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG));
EXPECT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12));
EXPECT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16));