From 678702573531f19ae36847a6a07257aaae623fbe Mon Sep 17 00:00:00 2001 From: Sadaf Ebrahimi Date: Fri, 25 Aug 2023 16:27:50 +0000 Subject: Move libyuv/files/ directly under libyuv Test: TreeHugger Merged-In: I773d1ae01539cc5d200768b526f10b2922567f72 Change-Id: I4ba1f1e781d7fd3ad96639dfdc08f654e45ae3d3 --- unit_test/basictypes_test.cc | 43 + unit_test/color_test.cc | 848 ++++++ unit_test/compare_test.cc | 739 +++++ unit_test/convert_test.cc | 4647 ++++++++++++++++++++++++++++++ unit_test/cpu_test.cc | 337 +++ unit_test/cpu_thread_test.cc | 63 + unit_test/math_test.cc | 160 + unit_test/planar_test.cc | 4471 ++++++++++++++++++++++++++++ unit_test/rotate_argb_test.cc | 334 +++ unit_test/rotate_test.cc | 962 +++++++ unit_test/scale_argb_test.cc | 588 ++++ unit_test/scale_rgb_test.cc | 280 ++ unit_test/scale_test.cc | 1601 ++++++++++ unit_test/scale_uv_test.cc | 249 ++ unit_test/testdata/arm_v7.txt | 12 + unit_test/testdata/juno.txt | 15 + unit_test/testdata/mips.txt | 7 + unit_test/testdata/mips_loongson2k.txt | 5 + unit_test/testdata/mips_loongson3.txt | 10 + unit_test/testdata/mips_loongson_mmi.txt | 7 + unit_test/testdata/mips_msa.txt | 7 + unit_test/testdata/riscv64.txt | 4 + unit_test/testdata/riscv64_rvv.txt | 4 + unit_test/testdata/riscv64_rvv_zvfh.txt | 4 + unit_test/testdata/tegra3.txt | 23 + unit_test/testdata/test0.jpg | Bin 0 -> 421 bytes unit_test/testdata/test1.jpg | Bin 0 -> 735 bytes unit_test/testdata/test2.jpg | Bin 0 -> 685 bytes unit_test/testdata/test3.jpg | Bin 0 -> 704 bytes unit_test/testdata/test4.jpg | Bin 0 -> 701 bytes unit_test/unit_test.cc | 562 ++++ unit_test/unit_test.h | 223 ++ unit_test/video_common_test.cc | 112 + 33 files changed, 16317 insertions(+) create mode 100644 unit_test/basictypes_test.cc create mode 100644 unit_test/color_test.cc create mode 100644 unit_test/compare_test.cc create mode 100644 unit_test/convert_test.cc create mode 100644 unit_test/cpu_test.cc create mode 100644 unit_test/cpu_thread_test.cc create mode 100644 unit_test/math_test.cc create mode 100644 unit_test/planar_test.cc create mode 100644 unit_test/rotate_argb_test.cc create mode 100644 unit_test/rotate_test.cc create mode 100644 unit_test/scale_argb_test.cc create mode 100644 unit_test/scale_rgb_test.cc create mode 100644 unit_test/scale_test.cc create mode 100644 unit_test/scale_uv_test.cc create mode 100644 unit_test/testdata/arm_v7.txt create mode 100644 unit_test/testdata/juno.txt create mode 100644 unit_test/testdata/mips.txt create mode 100644 unit_test/testdata/mips_loongson2k.txt create mode 100644 unit_test/testdata/mips_loongson3.txt create mode 100644 unit_test/testdata/mips_loongson_mmi.txt create mode 100644 unit_test/testdata/mips_msa.txt create mode 100644 unit_test/testdata/riscv64.txt create mode 100644 unit_test/testdata/riscv64_rvv.txt create mode 100644 unit_test/testdata/riscv64_rvv_zvfh.txt create mode 100644 unit_test/testdata/tegra3.txt create mode 100644 unit_test/testdata/test0.jpg create mode 100644 unit_test/testdata/test1.jpg create mode 100644 unit_test/testdata/test2.jpg create mode 100644 unit_test/testdata/test3.jpg create mode 100644 unit_test/testdata/test4.jpg create mode 100644 unit_test/unit_test.cc create mode 100644 unit_test/unit_test.h create mode 100644 unit_test/video_common_test.cc (limited to 'unit_test') diff --git a/unit_test/basictypes_test.cc b/unit_test/basictypes_test.cc new file mode 100644 index 00000000..9aaa2dcd --- /dev/null +++ b/unit_test/basictypes_test.cc @@ -0,0 +1,43 @@ +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "../unit_test/unit_test.h" +#include "libyuv/basic_types.h" + +namespace libyuv { + +TEST_F(LibYUVBaseTest, SizeOfTypes) { + int8_t i8 = -1; + uint8_t u8 = 1u; + int16_t i16 = -1; + uint16_t u16 = 1u; + int32_t i32 = -1; + uint32_t u32 = 1u; + int64_t i64 = -1; + uint64_t u64 = 1u; + EXPECT_EQ(1u, sizeof(i8)); + EXPECT_EQ(1u, sizeof(u8)); + EXPECT_EQ(2u, sizeof(i16)); + EXPECT_EQ(2u, sizeof(u16)); + EXPECT_EQ(4u, sizeof(i32)); + EXPECT_EQ(4u, sizeof(u32)); + EXPECT_EQ(8u, sizeof(i64)); + EXPECT_EQ(8u, sizeof(u64)); + EXPECT_GT(0, i8); + EXPECT_LT(0u, u8); + EXPECT_GT(0, i16); + EXPECT_LT(0u, u16); + EXPECT_GT(0, i32); + EXPECT_LT(0u, u32); + EXPECT_GT(0, i64); + EXPECT_LT(0u, u64); +} + +} // namespace libyuv diff --git a/unit_test/color_test.cc b/unit_test/color_test.cc new file mode 100644 index 00000000..01267ff1 --- /dev/null +++ b/unit_test/color_test.cc @@ -0,0 +1,848 @@ +/* + * Copyright 2015 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/basic_types.h" +#include "libyuv/convert.h" +#include "libyuv/convert_argb.h" +#include "libyuv/convert_from.h" +#include "libyuv/convert_from_argb.h" +#include "libyuv/cpu_id.h" + +namespace libyuv { + +// TODO(fbarchard): clang x86 has a higher accuracy YUV to RGB. +// Port to Visual C and other CPUs +#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || defined(__i386__)) +#define ERROR_FULL 5 +#define ERROR_J420 4 +#else +#define ERROR_FULL 6 +#define ERROR_J420 6 +#endif +#define ERROR_R 1 +#define ERROR_G 1 +#ifdef LIBYUV_UNLIMITED_DATA +#define ERROR_B 1 +#else +#define ERROR_B 18 +#endif + +#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF) \ + TEST_F(LibYUVColorTest, TESTNAME) { \ + const int kPixels = benchmark_width_ * benchmark_height_; \ + const int kHalfPixels = \ + ((benchmark_width_ + 1) / 2) * ((benchmark_height_ + HS1) / HS); \ + align_buffer_page_end(orig_y, kPixels); \ + align_buffer_page_end(orig_u, kHalfPixels); \ + align_buffer_page_end(orig_v, kHalfPixels); \ + align_buffer_page_end(orig_pixels, kPixels * 4); \ + align_buffer_page_end(temp_y, kPixels); \ + align_buffer_page_end(temp_u, kHalfPixels); \ + align_buffer_page_end(temp_v, kHalfPixels); \ + align_buffer_page_end(dst_pixels_opt, kPixels * 4); \ + align_buffer_page_end(dst_pixels_c, kPixels * 4); \ + \ + MemRandomize(orig_pixels, kPixels * 4); \ + MemRandomize(orig_y, kPixels); \ + MemRandomize(orig_u, kHalfPixels); \ + MemRandomize(orig_v, kHalfPixels); \ + MemRandomize(temp_y, kPixels); \ + MemRandomize(temp_u, kHalfPixels); \ + MemRandomize(temp_v, kHalfPixels); \ + MemRandomize(dst_pixels_opt, kPixels * 4); \ + MemRandomize(dst_pixels_c, kPixels * 4); \ + \ + /* The test is overall for color conversion matrix being reversible, so */ \ + /* this initializes the pixel with 2x2 blocks to eliminate subsampling. */ \ + uint8_t* p = orig_y; \ + for (int y = 0; y < benchmark_height_ - HS1; y += HS) { \ + for (int x = 0; x < benchmark_width_ - 1; x += 2) { \ + uint8_t r = static_cast(fastrand()); \ + p[0] = r; \ + p[1] = r; \ + p[HN] = r; \ + p[HN + 1] = r; \ + p += 2; \ + } \ + if (benchmark_width_ & 1) { \ + uint8_t r = static_cast(fastrand()); \ + p[0] = r; \ + p[HN] = r; \ + p += 1; \ + } \ + p += HN; \ + } \ + if ((benchmark_height_ & 1) && HS == 2) { \ + for (int x = 0; x < benchmark_width_ - 1; x += 2) { \ + uint8_t r = static_cast(fastrand()); \ + p[0] = r; \ + p[1] = r; \ + p += 2; \ + } \ + if (benchmark_width_ & 1) { \ + uint8_t r = static_cast(fastrand()); \ + p[0] = r; \ + p += 1; \ + } \ + } \ + /* Start with YUV converted to ARGB. */ \ + YUVTOARGB(orig_y, benchmark_width_, orig_u, (benchmark_width_ + 1) / 2, \ + orig_v, (benchmark_width_ + 1) / 2, orig_pixels, \ + benchmark_width_ * 4, benchmark_width_, benchmark_height_); \ + \ + ARGBTOYUV(orig_pixels, benchmark_width_ * 4, temp_y, benchmark_width_, \ + temp_u, (benchmark_width_ + 1) / 2, temp_v, \ + (benchmark_width_ + 1) / 2, benchmark_width_, \ + benchmark_height_); \ + \ + MaskCpuFlags(disable_cpu_flags_); \ + YUVTOARGB(temp_y, benchmark_width_, temp_u, (benchmark_width_ + 1) / 2, \ + temp_v, (benchmark_width_ + 1) / 2, dst_pixels_c, \ + benchmark_width_ * 4, benchmark_width_, benchmark_height_); \ + MaskCpuFlags(benchmark_cpu_info_); \ + \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + YUVTOARGB(temp_y, benchmark_width_, temp_u, (benchmark_width_ + 1) / 2, \ + temp_v, (benchmark_width_ + 1) / 2, dst_pixels_opt, \ + benchmark_width_ * 4, benchmark_width_, benchmark_height_); \ + } \ + /* Test C and SIMD match. */ \ + for (int i = 0; i < kPixels * 4; ++i) { \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + } \ + /* Test SIMD is close to original. */ \ + for (int i = 0; i < kPixels * 4; ++i) { \ + EXPECT_NEAR(static_cast(orig_pixels[i]), \ + static_cast(dst_pixels_opt[i]), DIFF); \ + } \ + \ + free_aligned_buffer_page_end(orig_pixels); \ + free_aligned_buffer_page_end(orig_y); \ + free_aligned_buffer_page_end(orig_u); \ + free_aligned_buffer_page_end(orig_v); \ + free_aligned_buffer_page_end(temp_y); \ + free_aligned_buffer_page_end(temp_u); \ + free_aligned_buffer_page_end(temp_v); \ + free_aligned_buffer_page_end(dst_pixels_opt); \ + free_aligned_buffer_page_end(dst_pixels_c); \ + } + +TESTCS(TestI420, I420ToARGB, ARGBToI420, 1, 2, benchmark_width_, ERROR_FULL) +TESTCS(TestI422, I422ToARGB, ARGBToI422, 0, 1, 0, ERROR_FULL) +TESTCS(TestJ420, J420ToARGB, ARGBToJ420, 1, 2, benchmark_width_, ERROR_J420) +TESTCS(TestJ422, J422ToARGB, ARGBToJ422, 0, 1, 0, ERROR_J420) + +static void YUVToRGB(int y, int u, int v, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); + + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + memset(orig_u, u, kHalfPixels); + memset(orig_v, v, kHalfPixels); + + /* YUV converted to ARGB. */ + I422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2, + orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +static void YUVJToRGB(int y, int u, int v, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); + + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + memset(orig_u, u, kHalfPixels); + memset(orig_v, v, kHalfPixels); + + /* YUV converted to ARGB. */ + J422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2, + orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +static void YUVHToRGB(int y, int u, int v, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); + + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + memset(orig_u, u, kHalfPixels); + memset(orig_v, v, kHalfPixels); + + /* YUV converted to ARGB. */ + H422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2, + orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +#define F422ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) + +static void YUVFToRGB(int y, int u, int v, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); + + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + memset(orig_u, u, kHalfPixels); + memset(orig_v, v, kHalfPixels); + + /* YUV converted to ARGB. */ + F422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2, + orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +static void YUVUToRGB(int y, int u, int v, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); + + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + memset(orig_u, u, kHalfPixels); + memset(orig_v, v, kHalfPixels); + + /* YUV converted to ARGB. */ + U422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2, + orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +#define V422ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) + +static void YUVVToRGB(int y, int u, int v, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); + + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + memset(orig_u, u, kHalfPixels); + memset(orig_v, v, kHalfPixels); + + /* YUV converted to ARGB. */ + V422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2, + orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +static void YToRGB(int y, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + + /* YUV converted to ARGB. */ + I400ToARGB(orig_y, kWidth, orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +static void YJToRGB(int y, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + + /* YUV converted to ARGB. */ + J400ToARGB(orig_y, kWidth, orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +// Pick a method for clamping. +// #define CLAMPMETHOD_IF 1 +// #define CLAMPMETHOD_TABLE 1 +#define CLAMPMETHOD_TERNARY 1 +// #define CLAMPMETHOD_MASK 1 + +// Pick a method for rounding. +#define ROUND(f) static_cast(f + 0.5f) +// #define ROUND(f) lrintf(f) +// #define ROUND(f) static_cast(round(f)) +// #define ROUND(f) _mm_cvt_ss2si(_mm_load_ss(&f)) + +#if defined(CLAMPMETHOD_IF) +static int RoundToByte(float f) { + int i = ROUND(f); + if (i < 0) { + i = 0; + } + if (i > 255) { + i = 255; + } + return i; +} +#elif defined(CLAMPMETHOD_TABLE) +static const unsigned char clamptable[811] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, + 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, + 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, + 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, + 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, + 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, + 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, + 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, + 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, + 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, + 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, + 249, 250, 251, 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255}; + +static int RoundToByte(float f) { + return clamptable[ROUND(f) + 276]; +} +#elif defined(CLAMPMETHOD_TERNARY) +static int RoundToByte(float f) { + int i = ROUND(f); + return (i < 0) ? 0 : ((i > 255) ? 255 : i); +} +#elif defined(CLAMPMETHOD_MASK) +static int RoundToByte(float f) { + int i = ROUND(f); + i = ((-(i) >> 31) & (i)); // clamp to 0. + return (((255 - (i)) >> 31) | (i)) & 255; // clamp to 255. +} +#endif + +#define RANDOM256(s) ((s & 1) ? ((s >> 1) ^ 0xb8) : (s >> 1)) + +TEST_F(LibYUVColorTest, TestRoundToByte) { + int allb = 0; + int count = benchmark_width_ * benchmark_height_; + for (int i = 0; i < benchmark_iterations_; ++i) { + float f = (fastrand() & 255) * 3.14f - 260.f; + for (int j = 0; j < count; ++j) { + int b = RoundToByte(f); + f += 0.91f; + allb |= b; + } + } + EXPECT_GE(allb, 0); + EXPECT_LE(allb, 255); +} + +// BT.601 limited range YUV to RGB reference +static void YUVToRGBReference(int y, int u, int v, int* r, int* g, int* b) { + *r = RoundToByte((y - 16) * 1.164 - (v - 128) * -1.596); + *g = RoundToByte((y - 16) * 1.164 - (u - 128) * 0.391 - (v - 128) * 0.813); + *b = RoundToByte((y - 16) * 1.164 - (u - 128) * -2.018); +} + +// BT.601 full range YUV to RGB reference (aka JPEG) +static void YUVJToRGBReference(int y, int u, int v, int* r, int* g, int* b) { + *r = RoundToByte(y - (v - 128) * -1.40200); + *g = RoundToByte(y - (u - 128) * 0.34414 - (v - 128) * 0.71414); + *b = RoundToByte(y - (u - 128) * -1.77200); +} + +// BT.709 limited range YUV to RGB reference +// See also http://www.equasys.de/colorconversion.html +static void YUVHToRGBReference(int y, int u, int v, int* r, int* g, int* b) { + *r = RoundToByte((y - 16) * 1.164 - (v - 128) * -1.793); + *g = RoundToByte((y - 16) * 1.164 - (u - 128) * 0.213 - (v - 128) * 0.533); + *b = RoundToByte((y - 16) * 1.164 - (u - 128) * -2.112); +} + +// BT.709 full range YUV to RGB reference +static void YUVFToRGBReference(int y, int u, int v, int* r, int* g, int* b) { + *r = RoundToByte(y - (v - 128) * -1.5748); + *g = RoundToByte(y - (u - 128) * 0.18732 - (v - 128) * 0.46812); + *b = RoundToByte(y - (u - 128) * -1.8556); +} + +// BT.2020 limited range YUV to RGB reference +static void YUVUToRGBReference(int y, int u, int v, int* r, int* g, int* b) { + *r = RoundToByte((y - 16) * 1.164384 - (v - 128) * -1.67867); + *g = RoundToByte((y - 16) * 1.164384 - (u - 128) * 0.187326 - + (v - 128) * 0.65042); + *b = RoundToByte((y - 16) * 1.164384 - (u - 128) * -2.14177); +} + +// BT.2020 full range YUV to RGB reference +static void YUVVToRGBReference(int y, int u, int v, int* r, int* g, int* b) { + *r = RoundToByte(y + (v - 128) * 1.474600); + *g = RoundToByte(y - (u - 128) * 0.164553 - (v - 128) * 0.571353); + *b = RoundToByte(y + (u - 128) * 1.881400); +} + +TEST_F(LibYUVColorTest, TestYUV) { + int r0, g0, b0, r1, g1, b1; + + // cyan (less red) + YUVToRGBReference(240, 255, 0, &r0, &g0, &b0); + EXPECT_EQ(56, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(255, b0); + + YUVToRGB(240, 255, 0, &r1, &g1, &b1); + EXPECT_EQ(57, r1); + EXPECT_EQ(255, g1); + EXPECT_EQ(255, b1); + + // green (less red and blue) + YUVToRGBReference(240, 0, 0, &r0, &g0, &b0); + EXPECT_EQ(56, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(2, b0); + + YUVToRGB(240, 0, 0, &r1, &g1, &b1); + EXPECT_EQ(57, r1); + EXPECT_EQ(255, g1); +#ifdef LIBYUV_UNLIMITED_DATA + EXPECT_EQ(3, b1); +#else + EXPECT_EQ(5, b1); +#endif + + for (int i = 0; i < 256; ++i) { + YUVToRGBReference(i, 128, 128, &r0, &g0, &b0); + YUVToRGB(i, 128, 128, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + + YUVToRGBReference(i, 0, 0, &r0, &g0, &b0); + YUVToRGB(i, 0, 0, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + + YUVToRGBReference(i, 0, 255, &r0, &g0, &b0); + YUVToRGB(i, 0, 255, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + } +} + +TEST_F(LibYUVColorTest, TestGreyYUV) { + int r0, g0, b0, r1, g1, b1, r2, g2, b2; + + // black + YUVToRGBReference(16, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(0, r0); + EXPECT_EQ(0, g0); + EXPECT_EQ(0, b0); + + YUVToRGB(16, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(0, r1); + EXPECT_EQ(0, g1); + EXPECT_EQ(0, b1); + + // white + YUVToRGBReference(240, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(255, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(255, b0); + + YUVToRGB(240, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(255, r1); + EXPECT_EQ(255, g1); + EXPECT_EQ(255, b1); + + // grey + YUVToRGBReference(128, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(130, r0); + EXPECT_EQ(130, g0); + EXPECT_EQ(130, b0); + + YUVToRGB(128, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(130, r1); + EXPECT_EQ(130, g1); + EXPECT_EQ(130, b1); + + for (int y = 0; y < 256; ++y) { + YUVToRGBReference(y, 128, 128, &r0, &g0, &b0); + YUVToRGB(y, 128, 128, &r1, &g1, &b1); + YToRGB(y, &r2, &g2, &b2); + EXPECT_EQ(r0, r1); + EXPECT_EQ(g0, g1); + EXPECT_EQ(b0, b1); + EXPECT_EQ(r0, r2); + EXPECT_EQ(g0, g2); + EXPECT_EQ(b0, b2); + } +} + +static void PrintHistogram(int rh[256], int gh[256], int bh[256]) { + int i; + printf("hist "); + for (i = 0; i < 256; ++i) { + if (rh[i] || gh[i] || bh[i]) { + printf(" %8d", i - 128); + } + } + printf("\nred "); + for (i = 0; i < 256; ++i) { + if (rh[i] || gh[i] || bh[i]) { + printf(" %8d", rh[i]); + } + } + printf("\ngreen"); + for (i = 0; i < 256; ++i) { + if (rh[i] || gh[i] || bh[i]) { + printf(" %8d", gh[i]); + } + } + printf("\nblue "); + for (i = 0; i < 256; ++i) { + if (rh[i] || gh[i] || bh[i]) { + printf(" %8d", bh[i]); + } + } + printf("\n"); +} + +// Step by 5 on inner loop goes from 0 to 255 inclusive. +// Set to 1 for better converage. 3, 5 or 17 for faster testing. +#ifdef DISABLE_SLOW_TESTS +#define FASTSTEP 5 +#else +#define FASTSTEP 1 +#endif + +// BT.601 limited range. +TEST_F(LibYUVColorTest, TestFullYUV) { + int rh[256] = { + 0, + }; + int gh[256] = { + 0, + }; + int bh[256] = { + 0, + }; + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { + for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { + int r0, g0, b0, r1, g1, b1; + int y = RANDOM256(y2); + YUVToRGBReference(y, u, v, &r0, &g0, &b0); + YUVToRGB(y, u, v, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + ++rh[r1 - r0 + 128]; + ++gh[g1 - g0 + 128]; + ++bh[b1 - b0 + 128]; + } + } + } + PrintHistogram(rh, gh, bh); +} + +// BT.601 full range. +TEST_F(LibYUVColorTest, TestFullYUVJ) { + int rh[256] = { + 0, + }; + int gh[256] = { + 0, + }; + int bh[256] = { + 0, + }; + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { + for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { + int r0, g0, b0, r1, g1, b1; + int y = RANDOM256(y2); + YUVJToRGBReference(y, u, v, &r0, &g0, &b0); + YUVJToRGB(y, u, v, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + ++rh[r1 - r0 + 128]; + ++gh[g1 - g0 + 128]; + ++bh[b1 - b0 + 128]; + } + } + } + PrintHistogram(rh, gh, bh); +} + +// BT.709 limited range. +TEST_F(LibYUVColorTest, TestFullYUVH) { + int rh[256] = { + 0, + }; + int gh[256] = { + 0, + }; + int bh[256] = { + 0, + }; + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { + for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { + int r0, g0, b0, r1, g1, b1; + int y = RANDOM256(y2); + YUVHToRGBReference(y, u, v, &r0, &g0, &b0); + YUVHToRGB(y, u, v, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + ++rh[r1 - r0 + 128]; + ++gh[g1 - g0 + 128]; + ++bh[b1 - b0 + 128]; + } + } + } + PrintHistogram(rh, gh, bh); +} + +// BT.709 full range. +TEST_F(LibYUVColorTest, TestFullYUVF) { + int rh[256] = { + 0, + }; + int gh[256] = { + 0, + }; + int bh[256] = { + 0, + }; + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { + for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { + int r0, g0, b0, r1, g1, b1; + int y = RANDOM256(y2); + YUVFToRGBReference(y, u, v, &r0, &g0, &b0); + YUVFToRGB(y, u, v, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + ++rh[r1 - r0 + 128]; + ++gh[g1 - g0 + 128]; + ++bh[b1 - b0 + 128]; + } + } + } + PrintHistogram(rh, gh, bh); +} + +// BT.2020 limited range. +TEST_F(LibYUVColorTest, TestFullYUVU) { + int rh[256] = { + 0, + }; + int gh[256] = { + 0, + }; + int bh[256] = { + 0, + }; + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { + for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { + int r0, g0, b0, r1, g1, b1; + int y = RANDOM256(y2); + YUVUToRGBReference(y, u, v, &r0, &g0, &b0); + YUVUToRGB(y, u, v, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + ++rh[r1 - r0 + 128]; + ++gh[g1 - g0 + 128]; + ++bh[b1 - b0 + 128]; + } + } + } + PrintHistogram(rh, gh, bh); +} + +// BT.2020 full range. +TEST_F(LibYUVColorTest, TestFullYUVV) { + int rh[256] = { + 0, + }; + int gh[256] = { + 0, + }; + int bh[256] = { + 0, + }; + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { + for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { + int r0, g0, b0, r1, g1, b1; + int y = RANDOM256(y2); + YUVVToRGBReference(y, u, v, &r0, &g0, &b0); + YUVVToRGB(y, u, v, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, 2); + EXPECT_NEAR(b0, b1, ERROR_B); + ++rh[r1 - r0 + 128]; + ++gh[g1 - g0 + 128]; + ++bh[b1 - b0 + 128]; + } + } + } + PrintHistogram(rh, gh, bh); +} +#undef FASTSTEP + +TEST_F(LibYUVColorTest, TestGreyYUVJ) { + int r0, g0, b0, r1, g1, b1, r2, g2, b2; + + // black + YUVJToRGBReference(0, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(0, r0); + EXPECT_EQ(0, g0); + EXPECT_EQ(0, b0); + + YUVJToRGB(0, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(0, r1); + EXPECT_EQ(0, g1); + EXPECT_EQ(0, b1); + + // white + YUVJToRGBReference(255, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(255, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(255, b0); + + YUVJToRGB(255, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(255, r1); + EXPECT_EQ(255, g1); + EXPECT_EQ(255, b1); + + // grey + YUVJToRGBReference(128, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(128, r0); + EXPECT_EQ(128, g0); + EXPECT_EQ(128, b0); + + YUVJToRGB(128, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(128, r1); + EXPECT_EQ(128, g1); + EXPECT_EQ(128, b1); + + for (int y = 0; y < 256; ++y) { + YUVJToRGBReference(y, 128, 128, &r0, &g0, &b0); + YUVJToRGB(y, 128, 128, &r1, &g1, &b1); + YJToRGB(y, &r2, &g2, &b2); + EXPECT_EQ(r0, r1); + EXPECT_EQ(g0, g1); + EXPECT_EQ(b0, b1); + EXPECT_EQ(r0, r2); + EXPECT_EQ(g0, g2); + EXPECT_EQ(b0, b2); + } +} + +} // namespace libyuv diff --git a/unit_test/compare_test.cc b/unit_test/compare_test.cc new file mode 100644 index 00000000..c29562cb --- /dev/null +++ b/unit_test/compare_test.cc @@ -0,0 +1,739 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/basic_types.h" +#include "libyuv/compare.h" +#include "libyuv/cpu_id.h" +#include "libyuv/video_common.h" + +#ifdef ENABLE_ROW_TESTS +#include "libyuv/compare_row.h" /* For HammingDistance_C */ +#endif + +namespace libyuv { + +// hash seed of 5381 recommended. +static uint32_t ReferenceHashDjb2(const uint8_t* src, + uint64_t count, + uint32_t seed) { + uint32_t hash = seed; + if (count > 0) { + do { + hash = hash * 33 + *src++; + } while (--count); + } + return hash; +} + +TEST_F(LibYUVCompareTest, Djb2_Test) { + const int kMaxTest = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_a, kMaxTest); + align_buffer_page_end(src_b, kMaxTest); + + const char* fox = + "The quick brown fox jumps over the lazy dog" + " and feels as if he were in the seventh heaven of typography" + " together with Hermann Zapf"; + uint32_t foxhash = HashDjb2(reinterpret_cast(fox), 131, 5381); + const uint32_t kExpectedFoxHash = 2611006483u; + EXPECT_EQ(kExpectedFoxHash, foxhash); + + for (int i = 0; i < kMaxTest; ++i) { + src_a[i] = (fastrand() & 0xff); + src_b[i] = (fastrand() & 0xff); + } + // Compare different buffers. Expect hash is different. + uint32_t h1 = HashDjb2(src_a, kMaxTest, 5381); + uint32_t h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make last half same. Expect hash is different. + memcpy(src_a + kMaxTest / 2, src_b + kMaxTest / 2, kMaxTest / 2); + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make first half same. Expect hash is different. + memcpy(src_a + kMaxTest / 2, src_a, kMaxTest / 2); + memcpy(src_b + kMaxTest / 2, src_b, kMaxTest / 2); + memcpy(src_a, src_b, kMaxTest / 2); + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make same. Expect hash is same. + memcpy(src_a, src_b, kMaxTest); + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_EQ(h1, h2); + + // Mask seed different. Expect hash is different. + memcpy(src_a, src_b, kMaxTest); + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 1234); + EXPECT_NE(h1, h2); + + // Make one byte different in middle. Expect hash is different. + memcpy(src_a, src_b, kMaxTest); + ++src_b[kMaxTest / 2]; + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make first byte different. Expect hash is different. + memcpy(src_a, src_b, kMaxTest); + ++src_b[0]; + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make last byte different. Expect hash is different. + memcpy(src_a, src_b, kMaxTest); + ++src_b[kMaxTest - 1]; + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make a zeros. Test different lengths. Expect hash is different. + memset(src_a, 0, kMaxTest); + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_a, kMaxTest / 2, 5381); + EXPECT_NE(h1, h2); + + // Make a zeros and seed of zero. Test different lengths. Expect hash is same. + memset(src_a, 0, kMaxTest); + h1 = HashDjb2(src_a, kMaxTest, 0); + h2 = HashDjb2(src_a, kMaxTest / 2, 0); + EXPECT_EQ(h1, h2); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, BenchmarkDjb2_Opt) { + const int kMaxTest = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_a, kMaxTest); + + for (int i = 0; i < kMaxTest; ++i) { + src_a[i] = i; + } + uint32_t h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381); + uint32_t h1; + for (int i = 0; i < benchmark_iterations_; ++i) { + h1 = HashDjb2(src_a, kMaxTest, 5381); + } + EXPECT_EQ(h1, h2); + free_aligned_buffer_page_end(src_a); +} + +TEST_F(LibYUVCompareTest, BenchmarkDjb2_Unaligned) { + const int kMaxTest = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_a, kMaxTest + 1); + for (int i = 0; i < kMaxTest; ++i) { + src_a[i + 1] = i; + } + uint32_t h2 = ReferenceHashDjb2(src_a + 1, kMaxTest, 5381); + uint32_t h1; + for (int i = 0; i < benchmark_iterations_; ++i) { + h1 = HashDjb2(src_a + 1, kMaxTest, 5381); + } + EXPECT_EQ(h1, h2); + free_aligned_buffer_page_end(src_a); +} + +TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Opt) { + uint32_t fourcc; + const int kMaxTest = benchmark_width_ * benchmark_height_ * 4; + align_buffer_page_end(src_a, kMaxTest); + for (int i = 0; i < kMaxTest; ++i) { + src_a[i] = 255; + } + + src_a[0] = 0; + fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + EXPECT_EQ(static_cast(libyuv::FOURCC_BGRA), fourcc); + src_a[0] = 255; + src_a[3] = 0; + fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + EXPECT_EQ(static_cast(libyuv::FOURCC_ARGB), fourcc); + src_a[3] = 255; + + for (int i = 0; i < benchmark_iterations_; ++i) { + fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + } + EXPECT_EQ(0u, fourcc); + + free_aligned_buffer_page_end(src_a); +} + +TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Unaligned) { + uint32_t fourcc; + const int kMaxTest = benchmark_width_ * benchmark_height_ * 4 + 1; + align_buffer_page_end(src_a, kMaxTest); + for (int i = 1; i < kMaxTest; ++i) { + src_a[i] = 255; + } + + src_a[0 + 1] = 0; + fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + EXPECT_EQ(static_cast(libyuv::FOURCC_BGRA), fourcc); + src_a[0 + 1] = 255; + src_a[3 + 1] = 0; + fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + EXPECT_EQ(static_cast(libyuv::FOURCC_ARGB), fourcc); + src_a[3 + 1] = 255; + + for (int i = 0; i < benchmark_iterations_; ++i) { + fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + } + EXPECT_EQ(0u, fourcc); + + free_aligned_buffer_page_end(src_a); +} + +#ifdef ENABLE_ROW_TESTS +TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) { + const int kMaxWidth = 4096 * 3; + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); + memset(src_a, 0, kMaxWidth); + memset(src_b, 0, kMaxWidth); + + // Test known value + memcpy(src_a, "test0123test4567", 16); + memcpy(src_b, "tick0123tock4567", 16); + uint32_t h1 = HammingDistance_C(src_a, src_b, 16); + EXPECT_EQ(16u, h1); + + // Test C vs OPT on random buffer + MemRandomize(src_a, kMaxWidth); + MemRandomize(src_b, kMaxWidth); + + uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth); + + int count = + benchmark_iterations_ * + ((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth); + for (int i = 0; i < count; ++i) { +#if defined(HAS_HAMMINGDISTANCE_NEON) + h1 = HammingDistance_NEON(src_a, src_b, kMaxWidth); +#elif defined(HAS_HAMMINGDISTANCE_AVX2) + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + if (has_avx2) { + h1 = HammingDistance_AVX2(src_a, src_b, kMaxWidth); + } else { + int has_sse42 = TestCpuFlag(kCpuHasSSE42); + if (has_sse42) { + h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth); + } else { + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + if (has_ssse3) { + h1 = HammingDistance_SSSE3(src_a, src_b, kMaxWidth); + } else { + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); + } + } + } +#elif defined(HAS_HAMMINGDISTANCE_SSE42) + int has_sse42 = TestCpuFlag(kCpuHasSSE42); + if (has_sse42) { + h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth); + } else { + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); + } +#else + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); +#endif + } + EXPECT_EQ(h0, h1); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_C) { + const int kMaxWidth = 4096 * 3; + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); + memset(src_a, 0, kMaxWidth); + memset(src_b, 0, kMaxWidth); + + // Test known value + memcpy(src_a, "test0123test4567", 16); + memcpy(src_b, "tick0123tock4567", 16); + uint32_t h1 = HammingDistance_C(src_a, src_b, 16); + EXPECT_EQ(16u, h1); + + // Test C vs OPT on random buffer + MemRandomize(src_a, kMaxWidth); + MemRandomize(src_b, kMaxWidth); + + uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth); + + int count = + benchmark_iterations_ * + ((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth); + for (int i = 0; i < count; ++i) { + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); + } + + EXPECT_EQ(h0, h1); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, BenchmarkHammingDistance) { + const int kMaxWidth = 4096 * 3; + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); + memset(src_a, 0, kMaxWidth); + memset(src_b, 0, kMaxWidth); + + memcpy(src_a, "test0123test4567", 16); + memcpy(src_b, "tick0123tock4567", 16); + uint64_t h1 = ComputeHammingDistance(src_a, src_b, 16); + EXPECT_EQ(16u, h1); + + // Test C vs OPT on random buffer + MemRandomize(src_a, kMaxWidth); + MemRandomize(src_b, kMaxWidth); + + uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth); + + int count = + benchmark_iterations_ * + ((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth); + for (int i = 0; i < count; ++i) { + h1 = ComputeHammingDistance(src_a, src_b, kMaxWidth); + } + + EXPECT_EQ(h0, h1); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +// Tests low levels match reference C for specified size. +// The opt implementations have size limitations +// For NEON the counters are 16 bit so the shorts overflow after 65536 bytes. +// So doing one less iteration of the loop is the maximum. +#if defined(HAS_HAMMINGDISTANCE_NEON) +static const int kMaxOptCount = 65536 - 32; // 65504 +#else +static const int kMaxOptCount = (1 << (32 - 3)) - 64; // 536870848 +#endif + +TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) { + uint32_t h1 = 0; + const int kMaxWidth = (benchmark_width_ * benchmark_height_ + 63) & ~63; + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); + memset(src_a, 255u, kMaxWidth); + memset(src_b, 0u, kMaxWidth); + + uint64_t h0 = ComputeHammingDistance(src_a, src_b, kMaxWidth); + EXPECT_EQ(kMaxWidth * 8ULL, h0); + + for (int i = 0; i < benchmark_iterations_; ++i) { +#if defined(HAS_HAMMINGDISTANCE_NEON) + h1 = HammingDistance_NEON(src_a, src_b, kMaxWidth); +#elif defined(HAS_HAMMINGDISTANCE_AVX2) + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + if (has_avx2) { + h1 = HammingDistance_AVX2(src_a, src_b, kMaxWidth); + } else { + int has_sse42 = TestCpuFlag(kCpuHasSSE42); + if (has_sse42) { + h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth); + } else { + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + if (has_ssse3) { + h1 = HammingDistance_SSSE3(src_a, src_b, kMaxWidth); + } else { + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); + } + } + } +#elif defined(HAS_HAMMINGDISTANCE_SSE42) + int has_sse42 = TestCpuFlag(kCpuHasSSE42); + if (has_sse42) { + h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth); + } else { + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); + } +#else + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); +#endif + } + + // A large count will cause the low level to potentially overflow so the + // result can not be expected to be correct. + // TODO(fbarchard): Consider expecting the low 16 bits to match. + if (kMaxWidth <= kMaxOptCount) { + EXPECT_EQ(kMaxWidth * 8U, h1); + } else { + if (kMaxWidth * 8ULL != static_cast(h1)) { + printf( + "warning - HammingDistance_Opt %u does not match %llu " + "but length of %u is longer than guaranteed.\n", + h1, kMaxWidth * 8ULL, kMaxWidth); + } else { + printf( + "warning - HammingDistance_Opt %u matches but length of %u " + "is longer than guaranteed.\n", + h1, kMaxWidth); + } + } + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} +#endif // ENABLE_ROW_TESTS + +TEST_F(LibYUVCompareTest, TestHammingDistance) { + align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); + memset(src_a, 255u, benchmark_width_ * benchmark_height_); + memset(src_b, 0, benchmark_width_ * benchmark_height_); + + uint64_t h1 = 0; + for (int i = 0; i < benchmark_iterations_; ++i) { + h1 = ComputeHammingDistance(src_a, src_b, + benchmark_width_ * benchmark_height_); + } + EXPECT_EQ(benchmark_width_ * benchmark_height_ * 8ULL, h1); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, BenchmarkSumSquareError_Opt) { + const int kMaxWidth = 4096 * 3; + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); + memset(src_a, 0, kMaxWidth); + memset(src_b, 0, kMaxWidth); + + memcpy(src_a, "test0123test4567", 16); + memcpy(src_b, "tick0123tock4567", 16); + uint64_t h1 = ComputeSumSquareError(src_a, src_b, 16); + EXPECT_EQ(790u, h1); + + for (int i = 0; i < kMaxWidth; ++i) { + src_a[i] = i; + src_b[i] = i; + } + memset(src_a, 0, kMaxWidth); + memset(src_b, 0, kMaxWidth); + + int count = + benchmark_iterations_ * + ((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth); + for (int i = 0; i < count; ++i) { + h1 = ComputeSumSquareError(src_a, src_b, kMaxWidth); + } + + EXPECT_EQ(0u, h1); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, SumSquareError) { + const int kMaxWidth = 4096 * 3; + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); + memset(src_a, 0, kMaxWidth); + memset(src_b, 0, kMaxWidth); + + uint64_t err; + err = ComputeSumSquareError(src_a, src_b, kMaxWidth); + + EXPECT_EQ(0u, err); + + memset(src_a, 1, kMaxWidth); + err = ComputeSumSquareError(src_a, src_b, kMaxWidth); + + EXPECT_EQ(static_cast(err), kMaxWidth); + + memset(src_a, 190, kMaxWidth); + memset(src_b, 193, kMaxWidth); + err = ComputeSumSquareError(src_a, src_b, kMaxWidth); + + EXPECT_EQ(static_cast(err), kMaxWidth * 3 * 3); + + for (int i = 0; i < kMaxWidth; ++i) { + src_a[i] = (fastrand() & 0xff); + src_b[i] = (fastrand() & 0xff); + } + + MaskCpuFlags(disable_cpu_flags_); + uint64_t c_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); + + MaskCpuFlags(benchmark_cpu_info_); + uint64_t opt_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); + + EXPECT_EQ(c_err, opt_err); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, BenchmarkPsnr_Opt) { + align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + src_a[i] = i; + src_b[i] = i; + } + + MaskCpuFlags(benchmark_cpu_info_); + + double opt_time = get_time(); + for (int i = 0; i < benchmark_iterations_; ++i) { + CalcFramePsnr(src_a, benchmark_width_, src_b, benchmark_width_, + benchmark_width_, benchmark_height_); + } + + opt_time = (get_time() - opt_time) / benchmark_iterations_; + printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6); + + EXPECT_EQ(0, 0); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, BenchmarkPsnr_Unaligned) { + align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_ + 1); + align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + src_a[i + 1] = i; + src_b[i] = i; + } + + MaskCpuFlags(benchmark_cpu_info_); + + double opt_time = get_time(); + for (int i = 0; i < benchmark_iterations_; ++i) { + CalcFramePsnr(src_a + 1, benchmark_width_, src_b, benchmark_width_, + benchmark_width_, benchmark_height_); + } + + opt_time = (get_time() - opt_time) / benchmark_iterations_; + printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6); + + EXPECT_EQ(0, 0); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, Psnr) { + const int kSrcWidth = benchmark_width_; + const int kSrcHeight = benchmark_height_; + const int b = 128; + const int kSrcPlaneSize = (kSrcWidth + b * 2) * (kSrcHeight + b * 2); + const int kSrcStride = 2 * b + kSrcWidth; + align_buffer_page_end(src_a, kSrcPlaneSize); + align_buffer_page_end(src_b, kSrcPlaneSize); + memset(src_a, 0, kSrcPlaneSize); + memset(src_b, 0, kSrcPlaneSize); + + double err; + err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + EXPECT_EQ(err, kMaxPsnr); + + memset(src_a, 255, kSrcPlaneSize); + + err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + EXPECT_EQ(err, 0.0); + + memset(src_a, 1, kSrcPlaneSize); + + err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + EXPECT_GT(err, 48.0); + EXPECT_LT(err, 49.0); + + for (int i = 0; i < kSrcPlaneSize; ++i) { + src_a[i] = i; + } + + err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + EXPECT_GT(err, 2.0); + if (kSrcWidth * kSrcHeight >= 256) { + EXPECT_LT(err, 6.0); + } + + memset(src_a, 0, kSrcPlaneSize); + memset(src_b, 0, kSrcPlaneSize); + + for (int i = b; i < (kSrcHeight + b); ++i) { + for (int j = b; j < (kSrcWidth + b); ++j) { + src_a[(i * kSrcStride) + j] = (fastrand() & 0xff); + src_b[(i * kSrcStride) + j] = (fastrand() & 0xff); + } + } + + MaskCpuFlags(disable_cpu_flags_); + double c_err, opt_err; + + c_err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + MaskCpuFlags(benchmark_cpu_info_); + + opt_err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + EXPECT_EQ(opt_err, c_err); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, DISABLED_BenchmarkSsim_Opt) { + align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + src_a[i] = i; + src_b[i] = i; + } + + MaskCpuFlags(benchmark_cpu_info_); + + double opt_time = get_time(); + for (int i = 0; i < benchmark_iterations_; ++i) { + CalcFrameSsim(src_a, benchmark_width_, src_b, benchmark_width_, + benchmark_width_, benchmark_height_); + } + + opt_time = (get_time() - opt_time) / benchmark_iterations_; + printf("BenchmarkSsim_Opt - %8.2f us opt\n", opt_time * 1e6); + + EXPECT_EQ(0, 0); // Pass if we get this far. + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, Ssim) { + const int kSrcWidth = benchmark_width_; + const int kSrcHeight = benchmark_height_; + const int b = 128; + const int kSrcPlaneSize = (kSrcWidth + b * 2) * (kSrcHeight + b * 2); + const int kSrcStride = 2 * b + kSrcWidth; + align_buffer_page_end(src_a, kSrcPlaneSize); + align_buffer_page_end(src_b, kSrcPlaneSize); + memset(src_a, 0, kSrcPlaneSize); + memset(src_b, 0, kSrcPlaneSize); + + if (kSrcWidth <= 8 || kSrcHeight <= 8) { + printf("warning - Ssim size too small. Testing function executes.\n"); + } + + double err; + err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + if (kSrcWidth > 8 && kSrcHeight > 8) { + EXPECT_EQ(err, 1.0); + } + + memset(src_a, 255, kSrcPlaneSize); + + err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + if (kSrcWidth > 8 && kSrcHeight > 8) { + EXPECT_LT(err, 0.0001); + } + + memset(src_a, 1, kSrcPlaneSize); + + err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + if (kSrcWidth > 8 && kSrcHeight > 8) { + EXPECT_GT(err, 0.0001); + EXPECT_LT(err, 0.9); + } + + for (int i = 0; i < kSrcPlaneSize; ++i) { + src_a[i] = i; + } + + err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + if (kSrcWidth > 8 && kSrcHeight > 8) { + EXPECT_GT(err, 0.0); + EXPECT_LT(err, 0.01); + } + + for (int i = b; i < (kSrcHeight + b); ++i) { + for (int j = b; j < (kSrcWidth + b); ++j) { + src_a[(i * kSrcStride) + j] = (fastrand() & 0xff); + src_b[(i * kSrcStride) + j] = (fastrand() & 0xff); + } + } + + MaskCpuFlags(disable_cpu_flags_); + double c_err, opt_err; + + c_err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + MaskCpuFlags(benchmark_cpu_info_); + + opt_err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride, + src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, + kSrcHeight); + + if (kSrcWidth > 8 && kSrcHeight > 8) { + EXPECT_EQ(opt_err, c_err); + } + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +} // namespace libyuv diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc new file mode 100644 index 00000000..1f1896b0 --- /dev/null +++ b/unit_test/convert_test.cc @@ -0,0 +1,4647 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "libyuv/basic_types.h" +#include "libyuv/compare.h" +#include "libyuv/convert.h" +#include "libyuv/convert_argb.h" +#include "libyuv/convert_from.h" +#include "libyuv/convert_from_argb.h" +#include "libyuv/cpu_id.h" +#ifdef HAVE_JPEG +#include "libyuv/mjpeg_decoder.h" +#endif +#include "../unit_test/unit_test.h" +#include "libyuv/planar_functions.h" +#include "libyuv/rotate.h" +#include "libyuv/video_common.h" + +#ifdef ENABLE_ROW_TESTS +#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */ +#endif + +// Some functions fail on big endian. Enable these tests on all cpus except +// PowerPC, but they are not optimized so disabled by default. +#if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__) +#define LITTLE_ENDIAN_ONLY_TEST 1 +#endif +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +// SLOW TESTS are those that are unoptimized C code. +// FULL TESTS are optimized but test many variations of the same code. +#define ENABLE_FULL_TESTS +#endif + +namespace libyuv { + +// Alias to copy pixels as is +#define AR30ToAR30 ARGBCopy +#define ABGRToABGR ARGBCopy + +// subsample amount uses a divide. +#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) + +// Planar test + +#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ + SRC_DEPTH) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_u, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_v, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ + MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + SRC_T* src_y_p = reinterpret_cast(src_y + OFF); \ + SRC_T* src_u_p = reinterpret_cast(src_u + OFF); \ + SRC_T* src_v_p = reinterpret_cast(src_v + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \ + src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ + src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ + reinterpret_cast(dst_y_c), kWidth, \ + reinterpret_cast(dst_u_c), kDstHalfWidth, \ + reinterpret_cast(dst_v_c), kDstHalfWidth, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ + reinterpret_cast(dst_y_opt), kWidth, \ + reinterpret_cast(dst_u_opt), kDstHalfWidth, \ + reinterpret_cast(dst_v_opt), kDstHalfWidth, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ + } \ + for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + } + +#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH) + +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1, 8) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I444, uint8_t, 1, 1, 1, 8) +TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1, 8) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I012, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H012, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I410, uint16_t, 2, 1, 1, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I410, uint16_t, 2, 1, 1, 10) +TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I412, uint16_t, 2, 1, 1, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I412, uint16_t, 2, 1, 1, 12) +TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10) +TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10) +TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12) +TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 12) +TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12) + +// Test Android 420 to I420 +#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF, PN, OFF_U, OFF_V) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##To##PN##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSizeUV = \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_uv, \ + kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + uint8_t* src_u = src_uv + OFF_U; \ + uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ + int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ + (fastrand() & 0xff); \ + src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ + (fastrand() & 0xff); \ + } \ + } \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_u_c, 2, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_c, 3, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_u_opt, 102, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_opt, 103, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \ + kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, \ + dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \ + SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \ + SUBSAMP_Y) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \ + _Any, +, 0, PN, OFF_U, OFF_V) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \ + _Unaligned, +, 2, PN, OFF_U, OFF_V) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \ + -, 0, PN, OFF_U, OFF_V) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \ + 0, PN, OFF_U, OFF_V) + +TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2) +TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2) +TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) +#undef TESTAPLANARTOP +#undef TESTAPLANARTOPI + +// wrapper to keep API the same +int I400ToNV21(const uint8_t* src_y, + int src_stride_y, + const uint8_t* /* src_u */, + int /* src_stride_u */, + const uint8_t* /* src_v */, + int /* src_stride_v */, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + return I400ToNV21(src_y, src_stride_y, dst_y, dst_stride_y, dst_vu, + dst_stride_vu, width, height); +} + +#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ + SRC_DEPTH) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_u, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_v, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_c, \ + kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_opt, \ + kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ + MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + SRC_T* src_y_p = reinterpret_cast(src_y + OFF); \ + SRC_T* src_u_p = reinterpret_cast(src_u + OFF); \ + SRC_T* src_v_p = reinterpret_cast(src_v + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \ + src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ + src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \ + src_v_p, kSrcHalfWidth, \ + reinterpret_cast(dst_y_c), kWidth, \ + reinterpret_cast(dst_uv_c), \ + kDstHalfWidth * 2, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ + reinterpret_cast(dst_y_opt), kWidth, \ + reinterpret_cast(dst_uv_opt), kDstHalfWidth * 2, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ + } \ + for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) { \ + EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + } + +#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \ + SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) + +TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I422, uint8_t, 1, 2, 1, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV12, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I400, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I010, uint16_t, 2, 2, 2, P010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10) +TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12) +TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) + +#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \ + TILE_WIDTH, TILE_HEIGHT) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ + const int kPaddedHeight = \ + (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ + const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ + align_buffer_page_end( \ + src_uv, \ + 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_c, \ + 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_opt, \ + 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + SRC_T* src_y_p = reinterpret_cast(src_y + OFF); \ + SRC_T* src_uv_p = reinterpret_cast(src_uv + OFF); \ + for (int i = 0; \ + i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \ + ++i) { \ + src_y_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \ + SRC_BPC / (int)sizeof(SRC_T); \ + ++i) { \ + src_uv_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ + DOY ? reinterpret_cast(dst_y_c) : NULL, kWidth, \ + reinterpret_cast(dst_uv_c), 2 * kDstHalfWidth, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ + DOY ? reinterpret_cast(dst_y_opt) : NULL, kWidth, \ + reinterpret_cast(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \ + NEG kHeight); \ + } \ + if (DOY) { \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + } \ + for (int i = 0; i < kDstHalfHeight; ++i) { \ + for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \ + EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \ + dst_uv_opt[i * 2 * kDstHalfWidth + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) + +TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1) +TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1) +TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) +TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32) + +#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ + const int kPaddedHeight = \ + (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ + const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ + align_buffer_page_end( \ + src_uv, kSrcHalfPaddedWidth* kSrcHalfPaddedHeight* SRC_BPC * 2 + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + SRC_T* src_y_p = reinterpret_cast(src_y + OFF); \ + SRC_T* src_uv_p = reinterpret_cast(src_uv + OFF); \ + for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \ + src_y_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \ + src_uv_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \ + reinterpret_cast(dst_y_c), kWidth, \ + reinterpret_cast(dst_u_c), kDstHalfWidth, \ + reinterpret_cast(dst_v_c), kDstHalfWidth, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \ + reinterpret_cast(dst_y_opt), kWidth, \ + reinterpret_cast(dst_u_opt), kDstHalfWidth, \ + reinterpret_cast(dst_v_opt), kDstHalfWidth, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ + } \ + for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) + +TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) +TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1) +TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1) + +// Provide matrix wrappers for full range bt.709 +#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) +#define F420ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) +#define F422ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) +#define F422ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) +#define F444ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) +#define F444ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) + +// Provide matrix wrappers for full range bt.2020 +#define V420ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) +#define V420ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) +#define V422ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) +#define V422ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) +#define V444ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) +#define V444ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) + +#define I420ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ + I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ + I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) + +#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) + +#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + double time0 = get_time(); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_c + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + double time1 = get_time(); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_opt + OFF, \ + kStrideB, kWidth, NEG kHeight); \ + } \ + double time2 = get_time(); \ + printf(" %8d us C - %8d us OPT\n", \ + static_cast((time1 - time0) * 1e6), \ + static_cast((time2 - time1) * 1e6 / benchmark_iterations_)); \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ + 1, _Any, +, 0) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Unaligned, +, 4) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Invert, -, 0) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0) +#else +#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ + 1, _Any, +, 0) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0) +#endif + +#if defined(ENABLE_FULL_TESTS) +TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(J420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(J420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(F420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(F420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(U420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(U420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(V420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(V420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1) +TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1) +TESTPLANARTOB(J420, 2, 2, RAW, 3, 3, 1) +TESTPLANARTOB(J420, 2, 2, RGB24, 3, 3, 1) +TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1) +TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1) +TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1) +TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1) +TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1) +TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1) +TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1) +#endif +TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(U422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(U422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) +TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1) +TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1) +TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1) +TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1) +TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(H444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(U444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(U444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(V444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(V444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) +TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) +TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) +TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1) +TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, AB30, 4, 4, 1) +TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1) +#endif +TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) +TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1) +#else +TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1) +TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1) +TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1) +TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1) +TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1) +TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1) +#endif +TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) +TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) +TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) +TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) +TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) +#endif + +#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, OFF, ATTEN) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + src_a[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ + dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \ + ATTEN); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ + dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \ + ATTEN); \ + } \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(src_a); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Unaligned, +, 2, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Invert, -, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Premult, +, 0, 1) +#else +#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0) +#endif + +#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) + +#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) +#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) + +#if defined(ENABLE_FULL_TESTS) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) +#else +TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) +#endif + +#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = kWidth * BPP_B; \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_uv, \ + kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < kStrideUV * 2; ++j) { \ + src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \ + } \ + } \ + memset(dst_argb_c, 1, kStrideB* kHeight); \ + memset(dst_argb_opt, 101, kStrideB* kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \ + dst_argb_c, kWidth * BPP_B, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \ + dst_argb_opt, kWidth * BPP_B, kWidth, \ + NEG kHeight); \ + } \ + /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ + align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight); \ + align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \ + memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \ + memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \ + FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \ + kHeight); \ + FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \ + kHeight); \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth * 4; ++j) { \ + EXPECT_EQ(dst_argb32_c[i * kWidth * 4 + j], \ + dst_argb32_opt[i * kWidth * 4 + j]); \ + } \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + free_aligned_buffer_page_end(dst_argb32_c); \ + free_aligned_buffer_page_end(dst_argb32_opt); \ + } + +#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Invert, -, 0) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Opt, +, 0) + +#define JNV12ToARGB(a, b, c, d, e, f, g, h) \ + NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) +#define JNV21ToARGB(a, b, c, d, e, f, g, h) \ + NV21ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) +#define JNV12ToABGR(a, b, c, d, e, f, g, h) \ + NV21ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) +#define JNV21ToABGR(a, b, c, d, e, f, g, h) \ + NV12ToARGBMatrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) +#define JNV12ToRGB24(a, b, c, d, e, f, g, h) \ + NV12ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) +#define JNV21ToRGB24(a, b, c, d, e, f, g, h) \ + NV21ToRGB24Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) +#define JNV12ToRAW(a, b, c, d, e, f, g, h) \ + NV21ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) +#define JNV21ToRAW(a, b, c, d, e, f, g, h) \ + NV12ToRGB24Matrix(a, b, c, d, e, f, &kYvuJPEGConstants, g, h) +#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \ + NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) + +TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3) +TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2) +#endif + +TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(NV12, 2, 2, RAW, RAW, 3) +TESTBPTOB(NV21, 2, 2, RAW, RAW, 3) +TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2) +#endif + +#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ + kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ + dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \ + kStrideUV * 2, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ + for (int j = 0; j < kStrideUV; ++j) { \ + EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_argb); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#else +#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#endif + +TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2) +TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2) +TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1) +TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1) +TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2) +TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1) +TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2) +TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2) +TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2) +TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2) +#endif +TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2) +TESTATOPLANAR(I400, 1, 1, I420, 2, 2) +TESTATOPLANAR(J400, 1, 1, J420, 2, 2) +TESTATOPLANAR(RAW, 3, 1, I420, 2, 2) +TESTATOPLANAR(RAW, 3, 1, J420, 2, 2) +TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2) +TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2) +TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2) +TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2) +TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1) +TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2) +TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) + +#define TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, \ + SUBSAMP_Y, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_a_c, kWidth* kHeight); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_a_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_a_c, 1, kWidth* kHeight); \ + memset(dst_y_c, 2, kWidth* kHeight); \ + memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_a_opt, 101, kWidth* kHeight); \ + memset(dst_y_opt, 102, kWidth* kHeight); \ + memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ + kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \ + dst_a_c, kWidth, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ + dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \ + kStrideUV * 2, dst_a_opt, kWidth, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + EXPECT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ + for (int j = 0; j < kStrideUV; ++j) { \ + EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_a_c); \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_a_opt); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_argb); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#else +#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#endif + +TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2) + +#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ + kStrideUV * 2, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ + dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < kStrideUV * 2; ++j) { \ + EXPECT_EQ(dst_uv_c[i * kStrideUV * 2 + j], \ + dst_uv_opt[i * kStrideUV * 2 + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_argb); \ + } + +#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTATOBP(ARGB, 1, 4, NV12, 2, 2) +TESTATOBP(ARGB, 1, 4, NV21, 2, 2) +TESTATOBP(ABGR, 1, 4, NV12, 2, 2) +TESTATOBP(ABGR, 1, 4, NV21, 2, 2) +TESTATOBP(RAW, 1, 3, JNV21, 2, 2) +TESTATOBP(YUY2, 2, 4, NV12, 2, 2) +TESTATOBP(UYVY, 2, 4, NV12, 2, 2) +TESTATOBP(AYUV, 1, 4, NV12, 2, 2) +TESTATOBP(AYUV, 1, 4, NV21, 2, 2) + +#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 1, kStrideB* kHeightB); \ + memset(dst_argb_opt, 101, kStrideB* kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \ + kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, \ + (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \ + TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \ + for (int times = 0; times < benchmark_iterations_; ++times) { \ + const int kWidth = (fastrand() & 63) + 1; \ + const int kHeight = (fastrand() & 31) + 1; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ + align_buffer_page_end(dst_argb_c, \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i] = 0xfe; \ + } \ + memset(dst_argb_c, 123, kStrideB* kHeightB); \ + memset(dst_argb_opt, 123, kStrideB* kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \ + kStrideB, kWidth, kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \ + kStrideB, kWidth, kHeight); \ + for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B) \ + TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_ + 1, _Any, +, 0) \ + TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Unaligned, +, 4) \ + TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Invert, -, 0) \ + TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0) \ + TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B) +#else +#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B) \ + TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0) +#endif + +TESTATOB(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOB(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOB(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1) +#endif +TESTATOB(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +TESTATOB(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOB(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1) +#endif +TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) +TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1) +TESTATOB(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1) +TESTATOB(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOB(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1) +TESTATOB(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1) +TESTATOB(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOB(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOB(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +TESTATOB(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOB(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOB(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) + +// in place test +#define TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memcpy(dst_argb_c + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + memcpy(dst_argb_opt + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_c /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_c, kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ + } \ + memcpy(dst_argb_opt + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ + for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTATOA(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B) \ + TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Inplace, +, 0) + +TESTATOA(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOA(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1) +#endif +TESTATOA(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +TESTATOA(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1) +// TODO(fbarchard): Support in place for mirror. +// TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOA(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +TESTATOA(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOA(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) +// TODO(fbarchard): Support in place for conversions that increase bpp. +// TESTATOA(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1) +// TESTATOA(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1) +// TESTATOA(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1) +// TESTATOA(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOA(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1) +// TESTATOA(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +// TESTATOA(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1) +TESTATOA(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +// TESTATOA(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOA(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1) +// TESTATOA(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +// TESTATOA(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +// TESTATOA(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +// TESTATOA(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) + +#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 1, kStrideB* kHeightB); \ + memset(dst_argb_opt, 101, kStrideB* kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, \ + NULL, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_opt, \ + kStrideB, NULL, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kStrideB * kHeightB; ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, \ + STRIDE_B, HEIGHT_B) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither_Random) { \ + for (int times = 0; times < benchmark_iterations_; ++times) { \ + const int kWidth = (fastrand() & 63) + 1; \ + const int kHeight = (fastrand() & 31) + 1; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, kStrideA* kHeightA); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ + src_argb[i] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 123, kStrideB* kHeightB); \ + memset(dst_argb_opt, 123, kStrideB* kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_c, kStrideB, NULL, \ + kWidth, kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_opt, kStrideB, \ + NULL, kWidth, kHeight); \ + for (int i = 0; i < kStrideB * kHeightB; ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } \ + } + +#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B, benchmark_width_ + 1, _Any, +, 0) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B, benchmark_width_, _Unaligned, +, 2) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B, benchmark_width_, _Invert, -, 0) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B, benchmark_width_, _Opt, +, 0) \ + TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B) + +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1) +#endif + +// These conversions called twice, produce the original result. +// e.g. endian swap twice. +#define TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG, \ + OFF) \ + TEST_F(LibYUVConvertTest, FMT_ATOB##_Endswap##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + align_buffer_page_end(src_argb, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 1, kStrideA* kHeightA); \ + memset(dst_argb_opt, 101, kStrideA* kHeightA); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c, \ + kStrideA, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_opt, \ + kStrideA, kWidth, NEG kHeight); \ + } \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_ATOB((TYPE_A*)dst_argb_c, kStrideA, (TYPE_A*)dst_argb_c, kStrideA, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \ + kWidth, NEG kHeight); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A) \ + TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_ + 1, \ + _Any, +, 0) \ + TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \ + _Unaligned, +, 2) \ + TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \ + _Opt, +, 0) +#else +#define TESTEND(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A) \ + TESTENDI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \ + _Opt, +, 0) +#endif + +TESTEND(ARGBToBGRA, uint8_t, 4, 4, 1) +TESTEND(ARGBToABGR, uint8_t, 4, 4, 1) +TESTEND(BGRAToARGB, uint8_t, 4, 4, 1) +TESTEND(ABGRToARGB, uint8_t, 4, 4, 1) +TESTEND(AB64ToAR64, uint16_t, 4, 4, 1) + +#ifdef HAVE_JPEG +TEST_F(LibYUVConvertTest, ValidateJpeg) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg + ? benchmark_width_ * benchmark_height_ + : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_page_end(orig_pixels, kSize); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // Test special value that matches marker start. + memset(orig_pixels, 0xff, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // EOI, SOI. Expect pass. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize)); + } + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, ValidateJpegLarge) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg + ? benchmark_width_ * benchmark_height_ + : kMinJpeg; + const int kSize = kImageSize + kOff; + const int kMultiple = 10; + const int kBufSize = kImageSize * kMultiple + kOff; + align_buffer_page_end(orig_pixels, kBufSize); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kBufSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kBufSize)); + + // EOI, SOI. Expect pass. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_TRUE(ValidateJpeg(orig_pixels, kBufSize)); + } + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, InvalidateJpeg) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg + ? benchmark_width_ * benchmark_height_ + : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_page_end(orig_pixels, kSize); + + // NULL pointer. Expect fail. + EXPECT_FALSE(ValidateJpeg(NULL, kSize)); + + // Negative size. Expect fail. + EXPECT_FALSE(ValidateJpeg(orig_pixels, -1)); + + // Too large size. Expect fail. + EXPECT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull)); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // SOI but no EOI. Expect fail. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + } + + // EOI but no SOI. Expect fail. + orig_pixels[0] = 0; + orig_pixels[1] = 0; + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, FuzzJpeg) { + // SOI but no EOI. Expect fail. + for (int times = 0; times < benchmark_iterations_; ++times) { + const int kSize = fastrand() % 5000 + 3; + align_buffer_page_end(orig_pixels, kSize); + MemRandomize(orig_pixels, kSize); + + // Add SOI so frame will be scanned. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + orig_pixels[kSize - 1] = 0xff; + ValidateJpeg(orig_pixels, + kSize); // Failure normally expected. + free_aligned_buffer_page_end(orig_pixels); + } +} + +// Test data created in GIMP. In export jpeg, disable +// thumbnails etc, choose a subsampling, and use low quality +// (50) to keep size small. Generated with xxd -i test.jpg +// test 0 is J400 +static const uint8_t kTest0Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xc2, 0x00, 0x0b, 0x08, 0x00, 0x10, + 0x00, 0x20, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x01, + 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x43, 0x7e, 0xa7, 0x97, 0x57, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0xc0, 0x6f, 0x66, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, + 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, + 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, + 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x21, 0x65, 0x6e, 0x31, 0x86, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, + 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, + 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x10, 0x35, 0xff, 0xc4, 0x00, 0x1f, 0x10, + 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, + 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x10, 0x0b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x88, 0xab, 0x8b, + 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, + 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, + 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, + 0xd9}; +static const size_t kTest0JpgLen = 421; + +// test 1 is J444 +static const uint8_t kTest1Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, + 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x03, 0xff, 0xda, + 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, 0x01, + 0x40, 0x8f, 0x26, 0xe8, 0xf4, 0xcc, 0xf9, 0x69, 0x2b, 0x1b, 0x2a, 0xcb, + 0xff, 0xc4, 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, + 0x00, 0x03, 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, + 0x01, 0x05, 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, + 0x0d, 0x26, 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x01, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x10, 0x11, 0x02, 0x12, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x03, 0x01, 0x01, 0x3f, 0x01, 0xf1, 0x00, 0x27, 0x45, 0xbb, 0x31, + 0xaf, 0xff, 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x02, 0x03, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x02, 0x10, 0x11, 0x41, 0x12, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, + 0x01, 0x3f, 0x01, 0xf6, 0x4b, 0x5f, 0x48, 0xb3, 0x69, 0x63, 0x35, 0x72, + 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, + 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, + 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, + 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, + 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, + 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, + 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, + 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, + 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, + 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x26, 0x61, 0xd4, 0xff, + 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, + 0x31, 0x41, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, + 0x10, 0x54, 0xa8, 0xbf, 0x50, 0x87, 0xb0, 0x9d, 0x8b, 0xc4, 0x6a, 0x26, + 0x6b, 0x2a, 0x9c, 0x1f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x11, 0x21, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, + 0x01, 0x01, 0x3f, 0x10, 0x70, 0xe1, 0x3e, 0xd1, 0x8e, 0x0d, 0xe1, 0xb5, + 0xd5, 0x91, 0x76, 0x43, 0x82, 0x45, 0x4c, 0x7b, 0x7f, 0xff, 0xc4, 0x00, + 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, + 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, + 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, + 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, + 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, + 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, + 0xd4, 0xff, 0xd9}; +static const size_t kTest1JpgLen = 735; + +// test 2 is J420 +static const uint8_t kTest2Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x01, 0x02, 0x04, 0xff, + 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x02, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0x20, 0xe7, 0x28, 0xa3, 0x0b, 0x2e, 0x2d, 0xcf, 0xff, 0xc4, 0x00, + 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, 0x10, + 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, 0x02, + 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, 0x62, + 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, + 0x01, 0xc8, 0x53, 0xff, 0xc4, 0x00, 0x16, 0x11, 0x01, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x32, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, + 0x01, 0xd2, 0xc7, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, + 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, + 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, + 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, + 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, + 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, + 0x03, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x13, 0x5f, + 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, + 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x0e, + 0xa1, 0x3a, 0x76, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x21, 0x11, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, + 0x3f, 0x10, 0x57, 0x0b, 0x08, 0x70, 0xdb, 0xff, 0xc4, 0x00, 0x1f, 0x10, + 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, + 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, + 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, + 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, + 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, + 0xd9}; +static const size_t kTest2JpgLen = 685; + +// test 3 is J422 +static const uint8_t kTest3Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x21, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, + 0x00, 0x17, 0x01, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x00, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0x43, 0x8d, 0x1f, 0xa2, 0xb3, 0xca, 0x1b, 0x57, 0x0f, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x02, 0x10, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, + 0x01, 0x01, 0x3f, 0x01, 0x51, 0xce, 0x8c, 0x75, 0xff, 0xc4, 0x00, 0x18, + 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x61, 0x21, 0xff, 0xda, + 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xa6, 0xd9, 0x2f, 0x84, + 0xe8, 0xf0, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, + 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, + 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, + 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, + 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, + 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, + 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, + 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, + 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x2e, 0x45, 0xff, + 0xc4, 0x00, 0x18, 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, + 0x31, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x53, + 0x50, 0xba, 0x54, 0xc1, 0x67, 0x4f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, 0x00, 0x10, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x02, 0x01, 0x01, 0x3f, 0x10, 0x18, 0x81, 0x5c, 0x04, 0x1a, 0xca, + 0x91, 0xbf, 0xff, 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, + 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, + 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, + 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, + 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, + 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; +static const size_t kTest3JpgLen = 704; + +// test 4 is J422 vertical - not supported +static const uint8_t kTest4Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x12, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x05, 0x01, 0x02, 0x03, 0xff, + 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0xd2, 0x98, 0xe9, 0x03, 0x0c, 0x00, 0x46, 0x21, 0xd9, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x01, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, + 0x3f, 0x01, 0x98, 0xb1, 0xbd, 0x47, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xb6, 0x35, 0xa2, 0xe1, 0x47, 0xff, + 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x21, 0x02, + 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, 0xed, 0xf9, + 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, 0x10, 0x01, + 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, 0x81, 0xf0, + 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, 0x75, 0x6e, + 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, 0xf3, 0xde, + 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, 0x5d, 0x7a, + 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x24, 0xaf, 0xff, 0xc4, 0x00, 0x19, + 0x11, 0x00, 0x03, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x51, 0x21, 0x31, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x59, 0x11, 0xca, + 0x42, 0x60, 0x9f, 0x69, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x11, 0x21, 0x31, 0x61, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x02, 0x01, 0x01, 0x3f, 0x10, 0xb0, 0xd7, 0x27, 0x51, 0xb6, 0x41, 0xff, + 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, + 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, + 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, + 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, + 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, + 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; +static const size_t kTest4JpgLen = 701; + +TEST_F(LibYUVConvertTest, TestMJPGSize) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + printf("test jpeg size %d x %d\n", width, height); +} + +TEST_F(LibYUVConvertTest, TestMJPGToI420) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_u, half_width * half_height); + align_buffer_page_end(dst_v, half_width * half_height); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_u, half_width, + dst_v, half_width, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381); + uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_u_hash, 2501859930u); + EXPECT_EQ(dst_v_hash, 2126459123u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); +} + +TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + // Convert to NV21 + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_vu, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert to I420 + align_buffer_page_end(dst2_y, width * height); + align_buffer_page_end(dst2_u, half_width * half_height); + align_buffer_page_end(dst2_v, half_width * half_height); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, + dst2_v, half_width, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert I420 to NV21 + align_buffer_page_end(dst3_y, width * height); + align_buffer_page_end(dst3_vu, half_width * half_height * 2); + + I420ToNV21(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, + width, dst3_vu, half_width * 2, width, height); + + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_y[i], dst3_y[i]); + } + for (int i = 0; i < half_width * half_height * 2; ++i) { + EXPECT_EQ(dst_vu[i], dst3_vu[i]); + EXPECT_EQ(dst_vu[i], dst3_vu[i]); + } + + free_aligned_buffer_page_end(dst3_y); + free_aligned_buffer_page_end(dst3_vu); + + free_aligned_buffer_page_end(dst2_y); + free_aligned_buffer_page_end(dst2_u); + free_aligned_buffer_page_end(dst2_v); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + // Convert to NV12 + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert to I420 + align_buffer_page_end(dst2_y, width * height); + align_buffer_page_end(dst2_u, half_width * half_height); + align_buffer_page_end(dst2_v, half_width * half_height); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, + dst2_v, half_width, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert I420 to NV12 + align_buffer_page_end(dst3_y, width * height); + align_buffer_page_end(dst3_uv, half_width * half_height * 2); + + I420ToNV12(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, + width, dst3_uv, half_width * 2, width, height); + + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_y[i], dst3_y[i]); + } + for (int i = 0; i < half_width * half_height * 2; ++i) { + EXPECT_EQ(dst_uv[i], dst3_uv[i]); + EXPECT_EQ(dst_uv[i], dst3_uv[i]); + } + + free_aligned_buffer_page_end(dst3_y); + free_aligned_buffer_page_end(dst3_uv); + + free_aligned_buffer_page_end(dst2_y); + free_aligned_buffer_page_end(dst2_u); + free_aligned_buffer_page_end(dst2_v); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 1069662856u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 1069662856u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +// TODO(fbarchard): Improve test to compare against I422, not checksum +TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 493520167u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 493520167u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 330644005u); + EXPECT_EQ(dst_uv_hash, 135214341u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 330644005u); + EXPECT_EQ(dst_vu_hash, 135214341u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 506143297u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 506143297u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToARGB) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_argb, width * height * 4); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToARGB(kTest3Jpg, kTest3JpgLen, dst_argb, width * 4, width, + height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381); +#ifdef LIBYUV_UNLIMITED_DATA + EXPECT_EQ(dst_argb_hash, 3900633302u); +#else + EXPECT_EQ(dst_argb_hash, 2355976473u); +#endif + + free_aligned_buffer_page_end(dst_argb); +} + +static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) { + MJpegDecoder mjpeg_decoder; + LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); + + int width = mjpeg_decoder.GetWidth(); + int height = mjpeg_decoder.GetHeight(); + + // YUV420 + if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 2 && + mjpeg_decoder.GetHorizSampFactor(0) == 2 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J420, %dx%d %d bytes\n", width, height, + static_cast(sample_size)); + // YUV422 + } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 2 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J422, %dx%d %d bytes\n", width, height, + static_cast(sample_size)); + // YUV444 + } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 1 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J444, %dx%d %d bytes\n", width, height, + static_cast(sample_size)); + // YUV400 + } else if (mjpeg_decoder.GetColorSpace() == + MJpegDecoder::kColorSpaceGrayscale && + mjpeg_decoder.GetNumComponents() == 1 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 1) { + printf("JPeg is J400, %dx%d %d bytes\n", width, height, + static_cast(sample_size)); + } else { + // Unknown colorspace. + printf("JPeg is Unknown colorspace.\n"); + } + mjpeg_decoder.UnloadFrame(); + return ret; +} + +TEST_F(LibYUVConvertTest, TestMJPGInfo) { + EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg, + kTest4JpgLen)); // Valid but unsupported. +} +#endif // HAVE_JPEG + +TEST_F(LibYUVConvertTest, NV12Crop) { + const int SUBSAMP_X = 2; + const int SUBSAMP_Y = 2; + const int kWidth = benchmark_width_; + const int kHeight = benchmark_height_; + const int crop_y = + ((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1; + const int kDestWidth = benchmark_width_; + const int kDestHeight = benchmark_height_ - crop_y * 2; + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); + const int sample_size = + kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; + align_buffer_page_end(src_y, sample_size); + uint8_t* src_uv = src_y + kWidth * kHeight; + + align_buffer_page_end(dst_y, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + for (int i = 0; i < kHeight * kWidth; ++i) { + src_y[i] = (fastrand() & 0xff); + } + for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideUV) * 2; ++i) { + src_uv[i] = (fastrand() & 0xff); + } + memset(dst_y, 1, kDestWidth * kDestHeight); + memset(dst_u, 2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v, 3, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_y_2, 1, kDestWidth * kDestHeight); + memset(dst_u_2, 2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v_2, 3, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + ConvertToI420(src_y, sample_size, dst_y_2, kDestWidth, dst_u_2, + SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v_2, + SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, + kDestWidth, kDestHeight, libyuv::kRotate0, libyuv::FOURCC_NV12); + + NV12ToI420(src_y + crop_y * kWidth, kWidth, + src_uv + (crop_y / 2) * kStrideUV * 2, kStrideUV * 2, dst_y, + kDestWidth, dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, + SUBSAMPLE(kDestWidth, SUBSAMP_X), kDestWidth, kDestHeight); + + for (int i = 0; i < kDestHeight; ++i) { + for (int j = 0; j < kDestWidth; ++j) { + EXPECT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], + dst_u_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], + dst_v_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); + free_aligned_buffer_page_end(dst_y_2); + free_aligned_buffer_page_end(dst_u_2); + free_aligned_buffer_page_end(dst_v_2); + free_aligned_buffer_page_end(src_y); +} + +TEST_F(LibYUVConvertTest, I420CropOddY) { + const int SUBSAMP_X = 2; + const int SUBSAMP_Y = 2; + const int kWidth = benchmark_width_; + const int kHeight = benchmark_height_; + const int crop_y = benchmark_height_ > 1 ? 1 : 0; + const int kDestWidth = benchmark_width_; + const int kDestHeight = benchmark_height_ - crop_y * 2; + const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X); + const int kStrideV = SUBSAMPLE(kWidth, SUBSAMP_X); + const int sample_size = kWidth * kHeight + + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y) + + kStrideV * SUBSAMPLE(kHeight, SUBSAMP_Y); + align_buffer_page_end(src_y, sample_size); + uint8_t* src_u = src_y + kWidth * kHeight; + uint8_t* src_v = src_u + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y); + + align_buffer_page_end(dst_y, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + for (int i = 0; i < kHeight * kWidth; ++i) { + src_y[i] = (fastrand() & 0xff); + } + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideU; ++i) { + src_u[i] = (fastrand() & 0xff); + } + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideV; ++i) { + src_v[i] = (fastrand() & 0xff); + } + memset(dst_y, 1, kDestWidth * kDestHeight); + memset(dst_u, 2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v, 3, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + MaskCpuFlags(benchmark_cpu_info_); + for (int i = 0; i < benchmark_iterations_; ++i) { + ConvertToI420(src_y, sample_size, dst_y, kDestWidth, dst_u, + SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, + SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, + kDestWidth, kDestHeight, libyuv::kRotate0, + libyuv::FOURCC_I420); + } + + for (int i = 0; i < kDestHeight; ++i) { + for (int j = 0; j < kDestWidth; ++j) { + EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j], + dst_y[i * kDestWidth + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j], + dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j], + dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); + free_aligned_buffer_page_end(src_y); +} + +TEST_F(LibYUVConvertTest, TestYToARGB) { + uint8_t y[32]; + uint8_t expectedg[32]; + for (int i = 0; i < 32; ++i) { + y[i] = i * 5 + 17; + expectedg[i] = static_cast((y[i] - 16) * 1.164f + 0.5f); + } + uint8_t argb[32 * 4]; + YToARGB(y, 0, argb, 0, 32, 1); + + for (int i = 0; i < 32; ++i) { + printf("%2d %d: %d <-> %d,%d,%d,%d\n", i, y[i], expectedg[i], + argb[i * 4 + 0], argb[i * 4 + 1], argb[i * 4 + 2], argb[i * 4 + 3]); + } + for (int i = 0; i < 32; ++i) { + EXPECT_EQ(expectedg[i], argb[i * 4 + 0]); + } +} + +static const uint8_t kNoDither4x4[16] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +TEST_F(LibYUVConvertTest, TestNoDither) { + align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_rgb565dither, + benchmark_width_ * benchmark_height_ * 2); + MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); + ARGBToRGB565(src_argb, benchmark_width_ * 4, dst_rgb565, benchmark_width_ * 2, + benchmark_width_, benchmark_height_); + ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, dst_rgb565dither, + benchmark_width_ * 2, kNoDither4x4, benchmark_width_, + benchmark_height_); + for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) { + EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]); + } + + free_aligned_buffer_page_end(src_argb); + free_aligned_buffer_page_end(dst_rgb565); + free_aligned_buffer_page_end(dst_rgb565dither); +} + +// Ordered 4x4 dither for 888 to 565. Values from 0 to 7. +static const uint8_t kDither565_4x4[16] = { + 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, +}; + +TEST_F(LibYUVConvertTest, TestDither) { + align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_rgb565dither, + benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_argb, benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_argbdither, + benchmark_width_ * benchmark_height_ * 4); + MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); + MemRandomize(dst_argb, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(dst_argbdither, benchmark_width_ * benchmark_height_ * 4); + ARGBToRGB565(src_argb, benchmark_width_ * 4, dst_rgb565, benchmark_width_ * 2, + benchmark_width_, benchmark_height_); + ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, dst_rgb565dither, + benchmark_width_ * 2, kDither565_4x4, benchmark_width_, + benchmark_height_); + RGB565ToARGB(dst_rgb565, benchmark_width_ * 2, dst_argb, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + RGB565ToARGB(dst_rgb565dither, benchmark_width_ * 2, dst_argbdither, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + + for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) { + EXPECT_NEAR(dst_argb[i], dst_argbdither[i], 9); + } + free_aligned_buffer_page_end(src_argb); + free_aligned_buffer_page_end(dst_rgb565); + free_aligned_buffer_page_end(dst_rgb565dither); + free_aligned_buffer_page_end(dst_argb); + free_aligned_buffer_page_end(dst_argbdither); +} + +#define TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, OFF, FMT_C, BPP_C) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_c + OFF, \ + kStrideB, NULL, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B##Dither( \ + src_y + OFF, kWidth, src_u + OFF, kStrideUV, src_v + OFF, kStrideUV, \ + dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight); \ + } \ + /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ + align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight); \ + align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight); \ + memset(dst_argb32_c, 2, kWidth* BPP_C* kHeight); \ + memset(dst_argb32_opt, 102, kWidth* BPP_C* kHeight); \ + FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, dst_argb32_c, kWidth * BPP_C, \ + kWidth, kHeight); \ + FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt, \ + kWidth * BPP_C, kWidth, kHeight); \ + for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ + EXPECT_EQ(dst_argb32_c[i], dst_argb32_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + free_aligned_buffer_page_end(dst_argb32_c); \ + free_aligned_buffer_page_end(dst_argb32_opt); \ + } + +#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) + +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4) +#endif + +#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \ + TEST_F(LibYUVConvertTest, NAME) { \ + const int kWidth = benchmark_width_; \ + const int kHeight = benchmark_height_; \ + \ + align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ + align_buffer_page_end(orig_y, kWidth* kHeight); \ + align_buffer_page_end(orig_u, \ + SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + align_buffer_page_end(orig_v, \ + SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + \ + align_buffer_page_end(dst_y_orig, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_orig, \ + 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + \ + align_buffer_page_end(dst_y, kWidth* kHeight); \ + align_buffer_page_end(dst_uv, \ + 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + \ + MemRandomize(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ + \ + /* Convert UYVY to NV12 in 2 steps for reference */ \ + libyuv::UYVYTOI420(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), orig_y, kWidth, \ + orig_u, SUBSAMPLE(kWidth, 2), orig_v, \ + SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ + libyuv::I420ToNV12(orig_y, kWidth, orig_u, SUBSAMPLE(kWidth, 2), orig_v, \ + SUBSAMPLE(kWidth, 2), dst_y_orig, kWidth, dst_uv_orig, \ + 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ + \ + /* Convert to NV12 */ \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + libyuv::UYVYTONV12(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), dst_y, kWidth, \ + dst_uv, 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ + } \ + \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + EXPECT_EQ(orig_y[i], dst_y[i]); \ + } \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + EXPECT_EQ(dst_y_orig[i], dst_y[i]); \ + } \ + for (int i = 0; i < 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2); \ + ++i) { \ + EXPECT_EQ(dst_uv_orig[i], dst_uv[i]); \ + } \ + \ + free_aligned_buffer_page_end(orig_uyvy); \ + free_aligned_buffer_page_end(orig_y); \ + free_aligned_buffer_page_end(orig_u); \ + free_aligned_buffer_page_end(orig_v); \ + free_aligned_buffer_page_end(dst_y_orig); \ + free_aligned_buffer_page_end(dst_uv_orig); \ + free_aligned_buffer_page_end(dst_y); \ + free_aligned_buffer_page_end(dst_uv); \ + } + +TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12) +TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12) + +TEST_F(LibYUVConvertTest, MM21ToYUY2) { + const int kWidth = (benchmark_width_ + 15) & (~15); + const int kHeight = (benchmark_height_ + 31) & (~31); + + align_buffer_page_end(orig_y, kWidth * kHeight); + align_buffer_page_end(orig_uv, + 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + align_buffer_page_end(tmp_y, kWidth * kHeight); + align_buffer_page_end(tmp_u, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + align_buffer_page_end(tmp_v, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + align_buffer_page_end(dst_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight); + align_buffer_page_end(golden_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight); + + MemRandomize(orig_y, kWidth * kHeight); + MemRandomize(orig_uv, 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + /* Convert MM21 to YUY2 in 2 steps for reference */ + libyuv::MM21ToI420(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), tmp_y, + kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v, + SUBSAMPLE(kWidth, 2), kWidth, kHeight); + libyuv::I420ToYUY2(tmp_y, kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v, + SUBSAMPLE(kWidth, 2), golden_yuyv, + 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); + + /* Convert to NV12 */ + for (int i = 0; i < benchmark_iterations_; ++i) { + libyuv::MM21ToYUY2(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), + dst_yuyv, 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); + } + + for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) { + EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]); + } + + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(tmp_y); + free_aligned_buffer_page_end(tmp_u); + free_aligned_buffer_page_end(tmp_v); + free_aligned_buffer_page_end(dst_yuyv); + free_aligned_buffer_page_end(golden_yuyv); +} + +// Transitive test. A to B to C is same as A to C. +// Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere. +#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + W1280, N, NEG, OFF, FMT_C, BPP_C) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_b + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_c + OFF, \ + kStrideC, kWidth, NEG kHeight); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \ + kStrideC, kWidth, kHeight); \ + } \ + for (int i = 0; i < kStrideC * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_bc); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) +#else +#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) +#endif + +#if defined(ENABLE_FULL_TESTS) +TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3) +TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3) +TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3) +TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3) +TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RGB24, 3) +TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3) +TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3) +TESTPLANARTOE(J420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(J420, 2, 2, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(U420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(U420, 2, 2, ARGB, 1, 4, ARGB, 4) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2) +TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2) +#endif +TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(J422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(J422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(H422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(U422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(U422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(V422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(V422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4) +TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(J444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(H444, 1, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(H444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(U444, 1, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(U444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(V444, 1, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(V444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4) +#else +TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB24, 3) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2) +TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3) +TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3) +TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2) +TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4) +TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4) +#endif + +// Transitive test: Compare 1 step vs 2 step conversion for YUVA to ARGB. +// Benchmark 2 step conversion for comparison to 1 step conversion. +#define TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + W1280, N, NEG, OFF, FMT_C, BPP_C, ATTEN) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##To##FMT_C##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ + const int kSizeUV = \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + src_a[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + /* Convert A to B */ \ + FMT_PLANAR##To##FMT_B( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth, \ + dst_argb_b + OFF, kStrideB, kWidth, NEG kHeight, ATTEN); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \ + kStrideC, kWidth, kHeight); \ + } \ + /* Convert A to C */ \ + FMT_PLANAR##To##FMT_C( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth, \ + dst_argb_c + OFF, kStrideC, kWidth, NEG kHeight, ATTEN); \ + for (int i = 0; i < kStrideC * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(src_a); \ + free_aligned_buffer_page_end(dst_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_bc); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Unaligned, +, 2, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Invert, -, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Premult, +, 0, FMT_C, BPP_C, 1) +#else +#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0) +#endif + +#if defined(ENABLE_FULL_TESTS) +TESTQPLANARTOE(I420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(J420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(J420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(H420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(H420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(F420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(F420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(U420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(U420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(V420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(V420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(I422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(J422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(J422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(F422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(F422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(H422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(H422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(U422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(U422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(V422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(V422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(I444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(J444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(J444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(H444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(H444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(U444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(U444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(V444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(V444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +#else +TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +#endif + +#define TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, W1280, N, NEG, \ + OFF, FMT_C, BPP_C) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##To##FMT_C##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ + const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ + align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ + MemRandomize(src_argb_a + OFF, kStrideA * kHeight); \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_C(src_argb_a + OFF, kStrideA, dst_argb_c + OFF, kStrideC, \ + kWidth, NEG kHeight); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, \ + kStrideC, kWidth, kHeight); \ + } \ + for (int i = 0; i < kStrideC * kHeight; i += 4) { \ + EXPECT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]); \ + EXPECT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]); \ + EXPECT_EQ(dst_argb_c[i + OFF + 2], dst_argb_bc[i + OFF + 2]); \ + EXPECT_NEAR(dst_argb_c[i + OFF + 3], dst_argb_bc[i + OFF + 3], 64); \ + } \ + free_aligned_buffer_page_end(src_argb_a); \ + free_aligned_buffer_page_end(dst_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_bc); \ + } + +#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, \ + benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Unaligned, +, 4, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Opt, +, 0, FMT_C, BPP_C) + +// Caveat: Destination needs to be 4 bytes +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4) +TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4) +TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4) +TESTPLANETOE(AR30, 1, 4, ABGR, 1, 4, ARGB, 4) +TESTPLANETOE(ARGB, 1, 4, AB30, 1, 4, ARGB, 4) +TESTPLANETOE(ABGR, 1, 4, AB30, 1, 4, ABGR, 4) +TESTPLANETOE(AB30, 1, 4, ARGB, 1, 4, ABGR, 4) +TESTPLANETOE(AB30, 1, 4, ABGR, 1, 4, ARGB, 4) +#endif + +TEST_F(LibYUVConvertTest, RotateWithARGBSource) { + // 2x2 frames + uint32_t src[4]; + uint32_t dst[4]; + // some random input + src[0] = 0x11000000; + src[1] = 0x00450000; + src[2] = 0x00009f00; + src[3] = 0x000000ff; + // zeros on destination + dst[0] = 0x00000000; + dst[1] = 0x00000000; + dst[2] = 0x00000000; + dst[3] = 0x00000000; + + int r = ConvertToARGB(reinterpret_cast(src), + 16, // input size + reinterpret_cast(dst), + 8, // destination stride + 0, // crop_x + 0, // crop_y + 2, // width + 2, // height + 2, // crop width + 2, // crop height + kRotate90, FOURCC_ARGB); + + EXPECT_EQ(r, 0); + // 90 degrees rotation, no conversion + EXPECT_EQ(dst[0], src[2]); + EXPECT_EQ(dst[1], src[0]); + EXPECT_EQ(dst[2], src[3]); + EXPECT_EQ(dst[3], src[1]); +} + +#ifdef HAS_ARGBTOAR30ROW_AVX2 +TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) { + // ARGBToAR30Row_AVX2 expects a multiple of 8 pixels. + const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; + align_buffer_page_end(src, kPixels * 4); + align_buffer_page_end(dst_opt, kPixels * 4); + align_buffer_page_end(dst_c, kPixels * 4); + MemRandomize(src, kPixels * 4); + memset(dst_opt, 0, kPixels * 4); + memset(dst_c, 1, kPixels * 4); + + ARGBToAR30Row_C(src, dst_c, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + ARGBToAR30Row_AVX2(src, dst_opt, kPixels); + } else if (has_ssse3) { + ARGBToAR30Row_SSSE3(src, dst_opt, kPixels); + } else { + ARGBToAR30Row_C(src, dst_opt, kPixels); + } + } + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_opt[i], dst_c[i]); + } + + free_aligned_buffer_page_end(src); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(dst_c); +} +#endif // HAS_ARGBTOAR30ROW_AVX2 + +#ifdef HAS_ABGRTOAR30ROW_AVX2 +TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { + // ABGRToAR30Row_AVX2 expects a multiple of 8 pixels. + const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; + align_buffer_page_end(src, kPixels * 4); + align_buffer_page_end(dst_opt, kPixels * 4); + align_buffer_page_end(dst_c, kPixels * 4); + MemRandomize(src, kPixels * 4); + memset(dst_opt, 0, kPixels * 4); + memset(dst_c, 1, kPixels * 4); + + ABGRToAR30Row_C(src, dst_c, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + ABGRToAR30Row_AVX2(src, dst_opt, kPixels); + } else if (has_ssse3) { + ABGRToAR30Row_SSSE3(src, dst_opt, kPixels); + } else { + ABGRToAR30Row_C(src, dst_opt, kPixels); + } + } + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_opt[i], dst_c[i]); + } + + free_aligned_buffer_page_end(src); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(dst_c); +} +#endif // HAS_ABGRTOAR30ROW_AVX2 + +// Provide matrix wrappers for 12 bit YUV +#define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \ + I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define I012ToAB30(a, b, c, d, e, f, g, h, i, j) \ + I012ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) + +#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define I410ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define H410ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) +#define H410ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) +#define U410ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) +#define U410ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) +#define I410ToAR30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define I410ToAB30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define H410ToAR30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) +#define H410ToAB30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j) +#define U410ToAR30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) +#define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \ + I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) + +#define I010ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I010ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I010ToAR30Filter(a, b, c, d, e, f, g, h, i, j) \ + I010ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I210ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I210ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I210ToAR30Filter(a, b, c, d, e, f, g, h, i, j) \ + I210ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) + +// TODO(fbarchard): Fix clamping issue affected by U channel. +#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ + BPP_B, ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + const int kBpc = 2; \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ + align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + reinterpret_cast(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + reinterpret_cast(src_u + SOFF)[i] = (fastrand() & FMT_MASK); \ + reinterpret_cast(src_v + SOFF)[i] = (fastrand() & FMT_MASK); \ + } \ + memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B( \ + reinterpret_cast(src_y + SOFF), kWidth, \ + reinterpret_cast(src_u + SOFF), kStrideUV, \ + reinterpret_cast(src_v + SOFF), kStrideUV, \ + dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B( \ + reinterpret_cast(src_y + SOFF), kWidth, \ + reinterpret_cast(src_u + SOFF), kStrideUV, \ + reinterpret_cast(src_v + SOFF), kStrideUV, \ + dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ + BPP_B, ALIGN, YALIGN) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ + ALIGN, YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ + ALIGN, YALIGN, benchmark_width_, _Unaligned, +, 4, 4) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ + ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ + ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0) + +// These conversions are only optimized for x86 +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ABGR, 4, 4, 1) +TESTPLANAR16TOB(I012, 2, 2, 0xfff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGBFilter, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGBFilter, 4, 4, 1) + +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1) +TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1) +TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1) +TESTPLANAR16TOB(I012, 2, 2, 0xfff, AB30, 4, 4, 1) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1) +#endif // LITTLE_ENDIAN_ONLY_TEST +#endif // DISABLE_SLOW_TESTS + +#define TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + ALIGN, YALIGN, W1280, N, NEG, OFF, ATTEN, S_DEPTH) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + const int kBpc = 2; \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF); \ + align_buffer_page_end(src_u, kSizeUV* kBpc + OFF); \ + align_buffer_page_end(src_v, kSizeUV* kBpc + OFF); \ + align_buffer_page_end(src_a, kWidth* kHeight* kBpc + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + reinterpret_cast(src_y + OFF)[i] = \ + (fastrand() & ((1 << S_DEPTH) - 1)); \ + reinterpret_cast(src_a + OFF)[i] = \ + (fastrand() & ((1 << S_DEPTH) - 1)); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + reinterpret_cast(src_u + OFF)[i] = \ + (fastrand() & ((1 << S_DEPTH) - 1)); \ + reinterpret_cast(src_v + OFF)[i] = \ + (fastrand() & ((1 << S_DEPTH) - 1)); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(reinterpret_cast(src_y + OFF), kWidth, \ + reinterpret_cast(src_u + OFF), kStrideUV, \ + reinterpret_cast(src_v + OFF), kStrideUV, \ + reinterpret_cast(src_a + OFF), kWidth, \ + dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \ + ATTEN); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B( \ + reinterpret_cast(src_y + OFF), kWidth, \ + reinterpret_cast(src_u + OFF), kStrideUV, \ + reinterpret_cast(src_v + OFF), kStrideUV, \ + reinterpret_cast(src_a + OFF), kWidth, \ + dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, ATTEN); \ + } \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(src_a); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + ALIGN, YALIGN, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Unaligned, +, 2, 0, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Premult, +, 0, 1, S_DEPTH) +#else +#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + ALIGN, YALIGN, S_DEPTH) \ + TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH) +#endif + +#define I010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define I010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define J010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define I210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define I210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define J210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define I410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define I410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \ + l, m) +#define J410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define I010AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) +#define I210AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) + +// These conversions are only optimized for x86 +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(I010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(J010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(J010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(H010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(H010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(F010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(F010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(U010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(U010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(V010Alpha, 2, 2, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(V010Alpha, 2, 2, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(I210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(J210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(J210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(H210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(H210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(F210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(F210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(U210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(U210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(V210Alpha, 2, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(V210Alpha, 2, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(I410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(I410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(J410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(J410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(H410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(H410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(F410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(F410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(U410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(U410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(V410Alpha, 1, 1, ARGB, 4, 4, 1, 10) +TESTQPLANAR16TOB(V410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10) +TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) +#endif // DISABLE_SLOW_TESTS + +#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; \ + const int kBpc = 2; \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ + align_buffer_page_end(src_uv, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + reinterpret_cast(src_y + SOFF)[i] = \ + (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + reinterpret_cast(src_uv + SOFF)[i] = \ + (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \ + } \ + memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(reinterpret_cast(src_y + SOFF), kWidth, \ + reinterpret_cast(src_uv + SOFF), \ + kStrideUV, dst_argb_c + DOFF, kStrideB, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(reinterpret_cast(src_y + SOFF), kWidth, \ + reinterpret_cast(src_uv + SOFF), \ + kStrideUV, dst_argb_opt + DOFF, kStrideB, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Opt, +, 0, 0, S_DEPTH) + +#define P010ToARGB(a, b, c, d, e, f, g, h) \ + P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P210ToARGB(a, b, c, d, e, f, g, h) \ + P210ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P010ToAR30(a, b, c, d, e, f, g, h) \ + P010ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P210ToAR30(a, b, c, d, e, f, g, h) \ + P210ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) + +#define P012ToARGB(a, b, c, d, e, f, g, h) \ + P012ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P212ToARGB(a, b, c, d, e, f, g, h) \ + P212ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P012ToAR30(a, b, c, d, e, f, g, h) \ + P012ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P212ToAR30(a, b, c, d, e, f, g, h) \ + P212ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) + +#define P016ToARGB(a, b, c, d, e, f, g, h) \ + P016ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P216ToARGB(a, b, c, d, e, f, g, h) \ + P216ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P016ToAR30(a, b, c, d, e, f, g, h) \ + P016ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P216ToAR30(a, b, c, d, e, f, g, h) \ + P216ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) + +#define P010ToARGBFilter(a, b, c, d, e, f, g, h) \ + P010ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ + kFilterBilinear) +#define P210ToARGBFilter(a, b, c, d, e, f, g, h) \ + P210ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ + kFilterBilinear) +#define P010ToAR30Filter(a, b, c, d, e, f, g, h) \ + P010ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ + kFilterBilinear) +#define P210ToAR30Filter(a, b, c, d, e, f, g, h) \ + P210ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, \ + kFilterBilinear) + +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10) +TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12) +TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12) +TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16) +TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16) +TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10) +TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12) +TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12) +TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16) +TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16) +TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10) +#endif // LITTLE_ENDIAN_ONLY_TEST +#endif // DISABLE_SLOW_TESTS + +static int Clamp(int y) { + if (y < 0) { + y = 0; + } + if (y > 255) { + y = 255; + } + return y; +} + +static int Clamp10(int y) { + if (y < 0) { + y = 0; + } + if (y > 1023) { + y = 1023; + } + return y; +} + +// Test 8 bit YUV to 8 bit RGB +TEST_F(LibYUVConvertTest, TestH420ToARGB) { + const int kSize = 256; + int histogram_b[256]; + int histogram_g[256]; + int histogram_r[256]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2); + align_buffer_page_end(argb_pixels, kSize * 4); + uint8_t* orig_y = orig_yuv; + uint8_t* orig_u = orig_y + kSize; + uint8_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 128; // 128 is 0. + orig_v[i] = 128; + } + + H420ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b = argb_pixels[i * 4 + 0]; + int g = argb_pixels[i * 4 + 1]; + int r = argb_pixels[i * 4 + 2]; + int a = argb_pixels[i * 4 + 3]; + ++histogram_b[b]; + ++histogram_g[g]; + ++histogram_r[r]; + // Reference formula for Y channel contribution in YUV to RGB conversions: + int expected_y = Clamp(static_cast((i - 16) * 1.164f + 0.5f)); + EXPECT_EQ(b, expected_y); + EXPECT_EQ(g, expected_y); + EXPECT_EQ(r, expected_y); + EXPECT_EQ(a, 255); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(argb_pixels); +} + +// Test 10 bit YUV to 8 bit RGB +TEST_F(LibYUVConvertTest, TestH010ToARGB) { + const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(argb_pixels, kSize * 4); + uint16_t* orig_y = reinterpret_cast(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b = argb_pixels[i * 4 + 0]; + int g = argb_pixels[i * 4 + 1]; + int r = argb_pixels[i * 4 + 2]; + int a = argb_pixels[i * 4 + 3]; + ++histogram_b[b]; + ++histogram_g[g]; + ++histogram_r[r]; + int expected_y = Clamp(static_cast((i - 64) * 1.164f / 4)); + EXPECT_NEAR(b, expected_y, 1); + EXPECT_NEAR(g, expected_y, 1); + EXPECT_NEAR(r, expected_y, 1); + EXPECT_EQ(a, 255); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(argb_pixels); +} + +// Test 10 bit YUV to 10 bit RGB +// Caveat: Result is near due to float rounding in expected +// result. +TEST_F(LibYUVConvertTest, TestH010ToAR30) { + const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(ar30_pixels, kSize * 4); + uint16_t* orig_y = reinterpret_cast(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b10 = reinterpret_cast(ar30_pixels)[i] & 1023; + int g10 = (reinterpret_cast(ar30_pixels)[i] >> 10) & 1023; + int r10 = (reinterpret_cast(ar30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast(ar30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast((i - 64) * 1.164f + 0.5)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ar30_pixels); +} + +// Test 10 bit YUV to 10 bit RGB +// Caveat: Result is near due to float rounding in expected +// result. +TEST_F(LibYUVConvertTest, TestH010ToAB30) { + const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(ab30_pixels, kSize * 4); + uint16_t* orig_y = reinterpret_cast(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToAB30(orig_y, 0, orig_u, 0, orig_v, 0, ab30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int r10 = reinterpret_cast(ab30_pixels)[i] & 1023; + int g10 = (reinterpret_cast(ab30_pixels)[i] >> 10) & 1023; + int b10 = (reinterpret_cast(ab30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast(ab30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast((i - 64) * 1.164f)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ab30_pixels); +} + +// Test 8 bit YUV to 10 bit RGB +TEST_F(LibYUVConvertTest, TestH420ToAR30) { + const int kSize = 256; + const int kHistSize = 1024; + int histogram_b[kHistSize]; + int histogram_g[kHistSize]; + int histogram_r[kHistSize]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2); + align_buffer_page_end(ar30_pixels, kSize * 4); + uint8_t* orig_y = orig_yuv; + uint8_t* orig_u = orig_y + kSize; + uint8_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 128; // 128 is 0. + orig_v[i] = 128; + } + + H420ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b10 = reinterpret_cast(ar30_pixels)[i] & 1023; + int g10 = (reinterpret_cast(ar30_pixels)[i] >> 10) & 1023; + int r10 = (reinterpret_cast(ar30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast(ar30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast((i - 16) * 1.164f * 4.f)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kHistSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ar30_pixels); +} + +// Test I400 with jpeg matrix is same as J400 +TEST_F(LibYUVConvertTest, TestI400) { + const int kSize = 256; + align_buffer_page_end(orig_i400, kSize); + align_buffer_page_end(argb_pixels_i400, kSize * 4); + align_buffer_page_end(argb_pixels_j400, kSize * 4); + align_buffer_page_end(argb_pixels_jpeg_i400, kSize * 4); + align_buffer_page_end(argb_pixels_h709_i400, kSize * 4); + align_buffer_page_end(argb_pixels_2020_i400, kSize * 4); + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_i400[i] = i; + } + + J400ToARGB(orig_i400, 0, argb_pixels_j400, 0, kSize, 1); + I400ToARGB(orig_i400, 0, argb_pixels_i400, 0, kSize, 1); + I400ToARGBMatrix(orig_i400, 0, argb_pixels_jpeg_i400, 0, &kYuvJPEGConstants, + kSize, 1); + I400ToARGBMatrix(orig_i400, 0, argb_pixels_h709_i400, 0, &kYuvH709Constants, + kSize, 1); + I400ToARGBMatrix(orig_i400, 0, argb_pixels_2020_i400, 0, &kYuv2020Constants, + kSize, 1); + + EXPECT_EQ(0, argb_pixels_i400[0]); + EXPECT_EQ(0, argb_pixels_j400[0]); + EXPECT_EQ(0, argb_pixels_jpeg_i400[0]); + EXPECT_EQ(0, argb_pixels_h709_i400[0]); + EXPECT_EQ(0, argb_pixels_2020_i400[0]); + EXPECT_EQ(0, argb_pixels_i400[16 * 4]); + EXPECT_EQ(16, argb_pixels_j400[16 * 4]); + EXPECT_EQ(16, argb_pixels_jpeg_i400[16 * 4]); + EXPECT_EQ(0, argb_pixels_h709_i400[16 * 4]); + EXPECT_EQ(0, argb_pixels_2020_i400[16 * 4]); + EXPECT_EQ(130, argb_pixels_i400[128 * 4]); + EXPECT_EQ(128, argb_pixels_j400[128 * 4]); + EXPECT_EQ(128, argb_pixels_jpeg_i400[128 * 4]); + EXPECT_EQ(130, argb_pixels_h709_i400[128 * 4]); + EXPECT_EQ(130, argb_pixels_2020_i400[128 * 4]); + EXPECT_EQ(255, argb_pixels_i400[255 * 4]); + EXPECT_EQ(255, argb_pixels_j400[255 * 4]); + EXPECT_EQ(255, argb_pixels_jpeg_i400[255 * 4]); + EXPECT_EQ(255, argb_pixels_h709_i400[255 * 4]); + EXPECT_EQ(255, argb_pixels_2020_i400[255 * 4]); + + for (int i = 0; i < kSize * 4; ++i) { + if ((i & 3) == 3) { + EXPECT_EQ(255, argb_pixels_j400[i]); + } else { + EXPECT_EQ(i / 4, argb_pixels_j400[i]); + } + EXPECT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]); + } + + free_aligned_buffer_page_end(orig_i400); + free_aligned_buffer_page_end(argb_pixels_i400); + free_aligned_buffer_page_end(argb_pixels_j400); + free_aligned_buffer_page_end(argb_pixels_jpeg_i400); + free_aligned_buffer_page_end(argb_pixels_h709_i400); + free_aligned_buffer_page_end(argb_pixels_2020_i400); +} + +// Test RGB24 to ARGB and back to RGB24 +TEST_F(LibYUVConvertTest, TestARGBToRGB24) { + const int kSize = 256; + align_buffer_page_end(orig_rgb24, kSize * 3); + align_buffer_page_end(argb_pixels, kSize * 4); + align_buffer_page_end(dest_rgb24, kSize * 3); + + // Test grey scale + for (int i = 0; i < kSize * 3; ++i) { + orig_rgb24[i] = i; + } + + RGB24ToARGB(orig_rgb24, 0, argb_pixels, 0, kSize, 1); + ARGBToRGB24(argb_pixels, 0, dest_rgb24, 0, kSize, 1); + + for (int i = 0; i < kSize * 3; ++i) { + EXPECT_EQ(orig_rgb24[i], dest_rgb24[i]); + } + + free_aligned_buffer_page_end(orig_rgb24); + free_aligned_buffer_page_end(argb_pixels); + free_aligned_buffer_page_end(dest_rgb24); +} + +TEST_F(LibYUVConvertTest, Test565) { + SIMD_ALIGNED(uint8_t orig_pixels[256][4]); + SIMD_ALIGNED(uint8_t pixels565[256][2]); + + for (int i = 0; i < 256; ++i) { + for (int j = 0; j < 4; ++j) { + orig_pixels[i][j] = i; + } + } + ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1); + uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); + EXPECT_EQ(610919429u, checksum); +} + +// Test RGB24 to J420 is exact +#if defined(LIBYUV_BIT_EXACT) +TEST_F(LibYUVConvertTest, TestRGB24ToJ420) { + const int kSize = 256; + align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 + align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2); + int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / + (kSize * 2) * benchmark_iterations_; + + for (int i = 0; i < kSize * 3 * 2; ++i) { + orig_rgb24[i] = i; + } + + for (int i = 0; i < iterations256; ++i) { + RGB24ToJ420(orig_rgb24, kSize * 3, dest_j420, kSize, // Y plane + dest_j420 + kSize * 2, kSize / 2, // U plane + dest_j420 + kSize * 5 / 2, kSize / 2, // V plane + kSize, 2); + } + + uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381); + EXPECT_EQ(2755440272u, checksum); + + free_aligned_buffer_page_end(orig_rgb24); + free_aligned_buffer_page_end(dest_j420); +} +#endif + +// Test RGB24 to I420 is exact +#if defined(LIBYUV_BIT_EXACT) +TEST_F(LibYUVConvertTest, TestRGB24ToI420) { + const int kSize = 256; + align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 + align_buffer_page_end(dest_i420, kSize * 3 / 2 * 2); + int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / + (kSize * 2) * benchmark_iterations_; + + for (int i = 0; i < kSize * 3 * 2; ++i) { + orig_rgb24[i] = i; + } + + for (int i = 0; i < iterations256; ++i) { + RGB24ToI420(orig_rgb24, kSize * 3, dest_i420, kSize, // Y plane + dest_i420 + kSize * 2, kSize / 2, // U plane + dest_i420 + kSize * 5 / 2, kSize / 2, // V plane + kSize, 2); + } + + uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381); + EXPECT_EQ(1526656597u, checksum); + + free_aligned_buffer_page_end(orig_rgb24); + free_aligned_buffer_page_end(dest_i420); +} +#endif + +} // namespace libyuv diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc new file mode 100644 index 00000000..93867fa7 --- /dev/null +++ b/unit_test/cpu_test.cc @@ -0,0 +1,337 @@ +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/basic_types.h" +#include "libyuv/cpu_id.h" +#include "libyuv/version.h" + +namespace libyuv { + +TEST_F(LibYUVBaseTest, TestCpuHas) { + int cpu_flags = TestCpuFlag(-1); + printf("Cpu Flags 0x%x\n", cpu_flags); +#if defined(__arm__) || defined(__aarch64__) + int has_arm = TestCpuFlag(kCpuHasARM); + printf("Has ARM 0x%x\n", has_arm); + int has_neon = TestCpuFlag(kCpuHasNEON); + printf("Has NEON 0x%x\n", has_neon); +#endif +#if defined(__riscv) && defined(__linux__) + int has_riscv = TestCpuFlag(kCpuHasRISCV); + printf("Has RISCV 0x%x\n", has_riscv); + int has_rvv = TestCpuFlag(kCpuHasRVV); + printf("Has RVV 0x%x\n", has_rvv); + int has_rvvzvfh = TestCpuFlag(kCpuHasRVVZVFH); + printf("Has RVVZVFH 0x%x\n", has_rvvzvfh); +#endif +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \ + defined(_M_X64) + int has_x86 = TestCpuFlag(kCpuHasX86); + int has_sse2 = TestCpuFlag(kCpuHasSSE2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + int has_sse41 = TestCpuFlag(kCpuHasSSE41); + int has_sse42 = TestCpuFlag(kCpuHasSSE42); + int has_avx = TestCpuFlag(kCpuHasAVX); + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_erms = TestCpuFlag(kCpuHasERMS); + int has_fma3 = TestCpuFlag(kCpuHasFMA3); + int has_f16c = TestCpuFlag(kCpuHasF16C); + int has_gfni = TestCpuFlag(kCpuHasGFNI); + int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW); + int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL); + int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI); + int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI); + int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); + int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); + int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); + printf("Has X86 0x%x\n", has_x86); + printf("Has SSE2 0x%x\n", has_sse2); + printf("Has SSSE3 0x%x\n", has_ssse3); + printf("Has SSE41 0x%x\n", has_sse41); + printf("Has SSE42 0x%x\n", has_sse42); + printf("Has AVX 0x%x\n", has_avx); + printf("Has AVX2 0x%x\n", has_avx2); + printf("Has ERMS 0x%x\n", has_erms); + printf("Has FMA3 0x%x\n", has_fma3); + printf("Has F16C 0x%x\n", has_f16c); + printf("Has GFNI 0x%x\n", has_gfni); + printf("Has AVX512BW 0x%x\n", has_avx512bw); + printf("Has AVX512VL 0x%x\n", has_avx512vl); + printf("Has AVX512VNNI 0x%x\n", has_avx512vnni); + printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi); + printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2); + printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg); + printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq); +#endif +#if defined(__mips__) + int has_mips = TestCpuFlag(kCpuHasMIPS); + printf("Has MIPS 0x%x\n", has_mips); + int has_msa = TestCpuFlag(kCpuHasMSA); + printf("Has MSA 0x%x\n", has_msa); +#endif +#if defined(__loongarch__) + int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH); + printf("Has LOONGARCH 0x%x\n", has_loongarch); + int has_lsx = TestCpuFlag(kCpuHasLSX); + printf("Has LSX 0x%x\n", has_lsx); + int has_lasx = TestCpuFlag(kCpuHasLASX); + printf("Has LASX 0x%x\n", has_lasx); +#endif +} + +TEST_F(LibYUVBaseTest, TestCompilerMacros) { + // Tests all macros used in public headers. +#ifdef __ATOMIC_RELAXED + printf("__ATOMIC_RELAXED %d\n", __ATOMIC_RELAXED); +#endif +#ifdef __cplusplus + printf("__cplusplus %ld\n", __cplusplus); +#endif +#ifdef __clang_major__ + printf("__clang_major__ %d\n", __clang_major__); +#endif +#ifdef __clang_minor__ + printf("__clang_minor__ %d\n", __clang_minor__); +#endif +#ifdef __GNUC__ + printf("__GNUC__ %d\n", __GNUC__); +#endif +#ifdef __GNUC_MINOR__ + printf("__GNUC_MINOR__ %d\n", __GNUC_MINOR__); +#endif +#ifdef __i386__ + printf("__i386__ %d\n", __i386__); +#endif +#ifdef __x86_64__ + printf("__x86_64__ %d\n", __x86_64__); +#endif +#ifdef _M_IX86 + printf("_M_IX86 %d\n", _M_IX86); +#endif +#ifdef _M_X64 + printf("_M_X64 %d\n", _M_X64); +#endif +#ifdef _MSC_VER + printf("_MSC_VER %d\n", _MSC_VER); +#endif +#ifdef __aarch64__ + printf("__aarch64__ %d\n", __aarch64__); +#endif +#ifdef __arm__ + printf("__arm__ %d\n", __arm__); +#endif +#ifdef __riscv + printf("__riscv %d\n", __riscv); +#endif +#ifdef __riscv_vector + printf("__riscv_vector %d\n", __riscv_vector); +#endif +#ifdef __APPLE__ + printf("__APPLE__ %d\n", __APPLE__); +#endif +#ifdef __clang__ + printf("__clang__ %d\n", __clang__); +#endif +#ifdef __CLR_VER + printf("__CLR_VER %d\n", __CLR_VER); +#endif +#ifdef __CYGWIN__ + printf("__CYGWIN__ %d\n", __CYGWIN__); +#endif +#ifdef __llvm__ + printf("__llvm__ %d\n", __llvm__); +#endif +#ifdef __mips_msa + printf("__mips_msa %d\n", __mips_msa); +#endif +#ifdef __mips + printf("__mips %d\n", __mips); +#endif +#ifdef __mips_isa_rev + printf("__mips_isa_rev %d\n", __mips_isa_rev); +#endif +#ifdef _MIPS_ARCH_LOONGSON3A + printf("_MIPS_ARCH_LOONGSON3A %d\n", _MIPS_ARCH_LOONGSON3A); +#endif +#ifdef __loongarch__ + printf("__loongarch__ %d\n", __loongarch__); +#endif +#ifdef _WIN32 + printf("_WIN32 %d\n", _WIN32); +#endif +#ifdef __native_client__ + printf("__native_client__ %d\n", __native_client__); +#endif +#ifdef __pic__ + printf("__pic__ %d\n", __pic__); +#endif +#ifdef __pnacl__ + printf("__pnacl__ %d\n", __pnacl__); +#endif +#ifdef GG_LONGLONG + printf("GG_LONGLONG %d\n", GG_LONGLONG); +#endif +#ifdef INT_TYPES_DEFINED + printf("INT_TYPES_DEFINED\n"); +#endif +#ifdef __has_feature + printf("__has_feature\n"); +#if __has_feature(memory_sanitizer) + printf("__has_feature(memory_sanitizer) %d\n", + __has_feature(memory_sanitizer)); +#endif +#endif +} + +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \ + defined(_M_X64) +TEST_F(LibYUVBaseTest, TestCpuId) { + int has_x86 = TestCpuFlag(kCpuHasX86); + if (has_x86) { + int cpu_info[4]; + // Vendor ID: + // AuthenticAMD AMD processor + // CentaurHauls Centaur processor + // CyrixInstead Cyrix processor + // GenuineIntel Intel processor + // GenuineTMx86 Transmeta processor + // Geode by NSC National Semiconductor processor + // NexGenDriven NexGen processor + // RiseRiseRise Rise Technology processor + // SiS SiS SiS SiS processor + // UMC UMC UMC UMC processor + CpuId(0, 0, cpu_info); + cpu_info[0] = cpu_info[1]; // Reorder output + cpu_info[1] = cpu_info[3]; + cpu_info[3] = 0; + printf("Cpu Vendor: %s 0x%x 0x%x 0x%x\n", + reinterpret_cast(&cpu_info[0]), cpu_info[0], cpu_info[1], + cpu_info[2]); + EXPECT_EQ(12u, strlen(reinterpret_cast(&cpu_info[0]))); + + // CPU Family and Model + // 3:0 - Stepping + // 7:4 - Model + // 11:8 - Family + // 13:12 - Processor Type + // 19:16 - Extended Model + // 27:20 - Extended Family + CpuId(1, 0, cpu_info); + int family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0); + int model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0); + printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, model, + model); + } +} +#endif + +static int FileExists(const char* file_name) { + FILE* f = fopen(file_name, "r"); + if (!f) { + return 0; + } + fclose(f); + return 1; +} + +TEST_F(LibYUVBaseTest, TestLinuxNeon) { + if (FileExists("../../unit_test/testdata/arm_v7.txt")) { + printf("Note: testing to load \"../../unit_test/testdata/arm_v7.txt\"\n"); + + EXPECT_EQ(0, ArmCpuCaps("../../unit_test/testdata/arm_v7.txt")); + EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/tegra3.txt")); + EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/juno.txt")); + } else { + printf("WARNING: unable to load \"../../unit_test/testdata/arm_v7.txt\"\n"); + } +#if defined(__linux__) && defined(__ARM_NEON__) + if (FileExists("/proc/cpuinfo")) { + if (kCpuHasNEON != ArmCpuCaps("/proc/cpuinfo")) { + // This can happen on ARM emulator but /proc/cpuinfo is from host. + printf("WARNING: Neon build enabled but CPU does not have NEON\n"); + } + } else { + printf("WARNING: unable to load \"/proc/cpuinfo\"\n"); + } +#endif +} + +TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) { + if (FileExists("../../unit_test/testdata/mips.txt")) { + printf("Note: testing to load \"../../unit_test/testdata/mips.txt\"\n"); + + EXPECT_EQ(0, MipsCpuCaps("../../unit_test/testdata/mips.txt")); + EXPECT_EQ(kCpuHasMSA, MipsCpuCaps("../../unit_test/testdata/mips_msa.txt")); + EXPECT_EQ(kCpuHasMSA, + MipsCpuCaps("../../unit_test/testdata/mips_loongson2k.txt")); + } else { + printf("WARNING: unable to load \"../../unit_test/testdata/mips.txt\"\n"); + } +} + +TEST_F(LibYUVBaseTest, TestLinuxRVV) { + if (FileExists("../../unit_test/testdata/riscv64.txt")) { + printf("Note: testing to load \"../../unit_test/testdata/riscv64.txt\"\n"); + + EXPECT_EQ(0, RiscvCpuCaps("../../unit_test/testdata/riscv64.txt")); + EXPECT_EQ(kCpuHasRVV, + RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv.txt")); + EXPECT_EQ(kCpuHasRVV | kCpuHasRVVZVFH, + RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv_zvfh.txt")); + } else { + printf( + "WARNING: unable to load " + "\"../../unit_test/testdata/riscv64.txt\"\n"); + } +#if defined(__linux__) && defined(__riscv) + if (FileExists("/proc/cpuinfo")) { + if (!(kCpuHasRVV & RiscvCpuCaps("/proc/cpuinfo"))) { + // This can happen on RVV emulator but /proc/cpuinfo is from host. + printf("WARNING: RVV build enabled but CPU does not have RVV\n"); + } + } else { + printf("WARNING: unable to load \"/proc/cpuinfo\"\n"); + } +#endif +} + +// TODO(fbarchard): Fix clangcl test of cpuflags. +#ifdef _MSC_VER +TEST_F(LibYUVBaseTest, DISABLED_TestSetCpuFlags) { +#else +TEST_F(LibYUVBaseTest, TestSetCpuFlags) { +#endif + // Reset any masked flags that may have been set so auto init is enabled. + MaskCpuFlags(0); + + int original_cpu_flags = TestCpuFlag(-1); + + // Test setting different CPU configurations. + int cpu_flags = kCpuHasARM | kCpuHasNEON | kCpuInitialized; + SetCpuFlags(cpu_flags); + EXPECT_EQ(cpu_flags, TestCpuFlag(-1)); + + cpu_flags = kCpuHasX86 | kCpuInitialized; + SetCpuFlags(cpu_flags); + EXPECT_EQ(cpu_flags, TestCpuFlag(-1)); + + // Test that setting 0 turns auto-init back on. + SetCpuFlags(0); + EXPECT_EQ(original_cpu_flags, TestCpuFlag(-1)); + + // Restore the CPU flag mask. + MaskCpuFlags(benchmark_cpu_info_); +} + +} // namespace libyuv diff --git a/unit_test/cpu_thread_test.cc b/unit_test/cpu_thread_test.cc new file mode 100644 index 00000000..69aab74e --- /dev/null +++ b/unit_test/cpu_thread_test.cc @@ -0,0 +1,63 @@ +/* + * Copyright 2017 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "libyuv/cpu_id.h" + +#if defined(__clang__) && !defined(__wasm__) +#if __has_include() +#define LIBYUV_HAVE_PTHREAD 1 +#endif +#elif defined(__linux__) +#define LIBYUV_HAVE_PTHREAD 1 +#endif + +#ifdef LIBYUV_HAVE_PTHREAD +#include +#endif + +namespace libyuv { + +#ifdef LIBYUV_HAVE_PTHREAD +void* ThreadMain(void* arg) { + int* flags = static_cast(arg); + + *flags = TestCpuFlag(kCpuInitialized); + return nullptr; +} +#endif // LIBYUV_HAVE_PTHREAD + +// Call TestCpuFlag() from two threads. ThreadSanitizer should not report any +// data race. +TEST(LibYUVCpuThreadTest, TestCpuFlagMultipleThreads) { +#ifdef LIBYUV_HAVE_PTHREAD + int cpu_flags1; + int cpu_flags2; + int ret; + pthread_t thread1; + pthread_t thread2; + + MaskCpuFlags(0); // Reset to 0 to allow auto detect. + ret = pthread_create(&thread1, nullptr, ThreadMain, &cpu_flags1); + ASSERT_EQ(ret, 0); + ret = pthread_create(&thread2, nullptr, ThreadMain, &cpu_flags2); + ASSERT_EQ(ret, 0); + ret = pthread_join(thread1, nullptr); + EXPECT_EQ(ret, 0); + ret = pthread_join(thread2, nullptr); + EXPECT_EQ(ret, 0); + EXPECT_EQ(cpu_flags1, cpu_flags2); +#else + printf("pthread unavailable; Test skipped."); +#endif // LIBYUV_HAVE_PTHREAD +} + +} // namespace libyuv diff --git a/unit_test/math_test.cc b/unit_test/math_test.cc new file mode 100644 index 00000000..a1544c12 --- /dev/null +++ b/unit_test/math_test.cc @@ -0,0 +1,160 @@ +/* + * Copyright 2013 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/basic_types.h" +#include "libyuv/cpu_id.h" +#include "libyuv/scale.h" + +#ifdef ENABLE_ROW_TESTS +#include "libyuv/scale_row.h" +#endif + +namespace libyuv { + +#ifdef ENABLE_ROW_TESTS +TEST_F(LibYUVBaseTest, TestFixedDiv) { + int num[1280]; + int div[1280]; + int result_opt[1280]; + int result_c[1280]; + + EXPECT_EQ(0x10000, libyuv::FixedDiv(1, 1)); + EXPECT_EQ(0x7fff0000, libyuv::FixedDiv(0x7fff, 1)); + // TODO(fbarchard): Avoid the following that throw exceptions. + // EXPECT_EQ(0x100000000, libyuv::FixedDiv(0x10000, 1)); + // EXPECT_EQ(0x80000000, libyuv::FixedDiv(0x8000, 1)); + + EXPECT_EQ(0x20000, libyuv::FixedDiv(640 * 2, 640)); + EXPECT_EQ(0x30000, libyuv::FixedDiv(640 * 3, 640)); + EXPECT_EQ(0x40000, libyuv::FixedDiv(640 * 4, 640)); + EXPECT_EQ(0x50000, libyuv::FixedDiv(640 * 5, 640)); + EXPECT_EQ(0x60000, libyuv::FixedDiv(640 * 6, 640)); + EXPECT_EQ(0x70000, libyuv::FixedDiv(640 * 7, 640)); + EXPECT_EQ(0x80000, libyuv::FixedDiv(640 * 8, 640)); + EXPECT_EQ(0xa0000, libyuv::FixedDiv(640 * 10, 640)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(960 * 2, 960)); + EXPECT_EQ(0x08000, libyuv::FixedDiv(640 / 2, 640)); + EXPECT_EQ(0x04000, libyuv::FixedDiv(640 / 4, 640)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(1080 * 2, 1080)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(200000, 100000)); + EXPECT_EQ(0x18000, libyuv::FixedDiv(150000, 100000)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(40000, 20000)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(-40000, -20000)); + EXPECT_EQ(-0x20000, libyuv::FixedDiv(40000, -20000)); + EXPECT_EQ(-0x20000, libyuv::FixedDiv(-40000, 20000)); + EXPECT_EQ(0x10000, libyuv::FixedDiv(4095, 4095)); + EXPECT_EQ(0x10000, libyuv::FixedDiv(4096, 4096)); + EXPECT_EQ(0x10000, libyuv::FixedDiv(4097, 4097)); + EXPECT_EQ(123 * 65536, libyuv::FixedDiv(123, 1)); + + for (int i = 1; i < 4100; ++i) { + EXPECT_EQ(0x10000, libyuv::FixedDiv(i, i)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(i * 2, i)); + EXPECT_EQ(0x30000, libyuv::FixedDiv(i * 3, i)); + EXPECT_EQ(0x40000, libyuv::FixedDiv(i * 4, i)); + EXPECT_EQ(0x08000, libyuv::FixedDiv(i, i * 2)); + EXPECT_NEAR(16384 * 65536 / i, libyuv::FixedDiv(16384, i), 1); + } + EXPECT_EQ(123 * 65536, libyuv::FixedDiv(123, 1)); + + MemRandomize(reinterpret_cast(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast(&div[0]), sizeof(div)); + for (int j = 0; j < 1280; ++j) { + if (div[j] == 0) { + div[j] = 1280; + } + num[j] &= 0xffff; // Clamp to avoid divide overflow. + } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + for (int j = 0; j < 1280; ++j) { + result_opt[j] = libyuv::FixedDiv(num[j], div[j]); + } + } + for (int j = 0; j < 1280; ++j) { + result_c[j] = libyuv::FixedDiv_C(num[j], div[j]); + EXPECT_NEAR(result_c[j], result_opt[j], 1); + } +} + +TEST_F(LibYUVBaseTest, TestFixedDiv_Opt) { + int num[1280]; + int div[1280]; + int result_opt[1280]; + int result_c[1280]; + + MemRandomize(reinterpret_cast(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast(&div[0]), sizeof(div)); + for (int j = 0; j < 1280; ++j) { + num[j] &= 4095; // Make numerator smaller. + div[j] &= 4095; // Make divisor smaller. + if (div[j] == 0) { + div[j] = 1280; + } + } + + int has_x86 = TestCpuFlag(kCpuHasX86); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + if (has_x86) { + for (int j = 0; j < 1280; ++j) { + result_opt[j] = libyuv::FixedDiv(num[j], div[j]); + } + } else { + for (int j = 0; j < 1280; ++j) { + result_opt[j] = libyuv::FixedDiv_C(num[j], div[j]); + } + } + } + for (int j = 0; j < 1280; ++j) { + result_c[j] = libyuv::FixedDiv_C(num[j], div[j]); + EXPECT_NEAR(result_c[j], result_opt[j], 1); + } +} + +TEST_F(LibYUVBaseTest, TestFixedDiv1_Opt) { + int num[1280]; + int div[1280]; + int result_opt[1280]; + int result_c[1280]; + + MemRandomize(reinterpret_cast(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast(&div[0]), sizeof(div)); + for (int j = 0; j < 1280; ++j) { + num[j] &= 4095; // Make numerator smaller. + div[j] &= 4095; // Make divisor smaller. + if (div[j] <= 1) { + div[j] = 1280; + } + } + + int has_x86 = TestCpuFlag(kCpuHasX86); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + if (has_x86) { + for (int j = 0; j < 1280; ++j) { + result_opt[j] = libyuv::FixedDiv1(num[j], div[j]); + } + } else { + for (int j = 0; j < 1280; ++j) { + result_opt[j] = libyuv::FixedDiv1_C(num[j], div[j]); + } + } + } + for (int j = 0; j < 1280; ++j) { + result_c[j] = libyuv::FixedDiv1_C(num[j], div[j]); + EXPECT_NEAR(result_c[j], result_opt[j], 1); + } +} +#endif // ENABLE_ROW_TESTS + +} // namespace libyuv diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc new file mode 100644 index 00000000..ad97b87e --- /dev/null +++ b/unit_test/planar_test.cc @@ -0,0 +1,4471 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/compare.h" +#include "libyuv/convert.h" +#include "libyuv/convert_argb.h" +#include "libyuv/convert_from.h" +#include "libyuv/convert_from_argb.h" +#include "libyuv/cpu_id.h" +#include "libyuv/planar_functions.h" +#include "libyuv/rotate.h" +#include "libyuv/scale.h" + +#ifdef ENABLE_ROW_TESTS +// row.h defines SIMD_ALIGNED, overriding unit_test.h +// TODO(fbarchard): Remove row.h from unittests. Test public functions. +#include "libyuv/row.h" /* For ScaleSumSamples_Neon */ +#endif + +#if defined(LIBYUV_BIT_EXACT) +#define EXPECTED_ATTENUATE_DIFF 0 +#else +#define EXPECTED_ATTENUATE_DIFF 2 +#endif + +namespace libyuv { + +TEST_F(LibYUVPlanarTest, TestAttenuate) { + const int kSize = 1280 * 4; + align_buffer_page_end(orig_pixels, kSize); + align_buffer_page_end(atten_pixels, kSize); + align_buffer_page_end(unatten_pixels, kSize); + align_buffer_page_end(atten2_pixels, kSize); + + // Test unattenuation clamps + orig_pixels[0 * 4 + 0] = 200u; + orig_pixels[0 * 4 + 1] = 129u; + orig_pixels[0 * 4 + 2] = 127u; + orig_pixels[0 * 4 + 3] = 128u; + // Test unattenuation transparent and opaque are unaffected + orig_pixels[1 * 4 + 0] = 16u; + orig_pixels[1 * 4 + 1] = 64u; + orig_pixels[1 * 4 + 2] = 192u; + orig_pixels[1 * 4 + 3] = 0u; + orig_pixels[2 * 4 + 0] = 16u; + orig_pixels[2 * 4 + 1] = 64u; + orig_pixels[2 * 4 + 2] = 192u; + orig_pixels[2 * 4 + 3] = 255u; + orig_pixels[3 * 4 + 0] = 16u; + orig_pixels[3 * 4 + 1] = 64u; + orig_pixels[3 * 4 + 2] = 192u; + orig_pixels[3 * 4 + 3] = 128u; + ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1); + EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]); + EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]); + EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]); + EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]); + EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]); + EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]); + EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]); + EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]); + EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]); + EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]); + EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]); + EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i * 4 + 0] = i; + orig_pixels[i * 4 + 1] = i / 2; + orig_pixels[i * 4 + 2] = i / 3; + orig_pixels[i * 4 + 3] = i; + } + ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1); + ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1); + } + for (int i = 0; i < 1280; ++i) { + EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2); + EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2); + EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2); + EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2); + } + // Make sure transparent, 50% and opaque are fully accurate. + EXPECT_EQ(0, atten_pixels[0 * 4 + 0]); + EXPECT_EQ(0, atten_pixels[0 * 4 + 1]); + EXPECT_EQ(0, atten_pixels[0 * 4 + 2]); + EXPECT_EQ(0, atten_pixels[0 * 4 + 3]); + EXPECT_EQ(64, atten_pixels[128 * 4 + 0]); + EXPECT_EQ(32, atten_pixels[128 * 4 + 1]); + EXPECT_EQ(21, atten_pixels[128 * 4 + 2]); + EXPECT_EQ(128, atten_pixels[128 * 4 + 3]); + EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF); + EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF); + EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF); + EXPECT_EQ(255, atten_pixels[255 * 4 + 3]); + + free_aligned_buffer_page_end(atten2_pixels); + free_aligned_buffer_page_end(unatten_pixels); + free_aligned_buffer_page_end(atten_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +static int TestAttenuateI(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width, + invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width, + invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) { + int max_diff = TestAttenuateI(benchmark_width_ + 1, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 0); + + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); +} + +TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) { + int max_diff = + TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); +} + +TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) { + int max_diff = + TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); +} + +TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) { + int max_diff = + TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); +} + +static int TestUnattenuateI(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb[i + off] = (fastrand() & 0xff); + } + ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width, + height); + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width, + invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width, + invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) { + int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); +} + +TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) { + int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 1); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); +} + +TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) { + int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, -1, 0); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); +} + +TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { + int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); +} + +TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { + SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]); + SIMD_ALIGNED(int32_t added_pixels[16][16][4]); + + for (int y = 0; y < 16; ++y) { + for (int x = 0; x < 16; ++x) { + orig_pixels[y][x][0] = 1u; + orig_pixels[y][x][1] = 2u; + orig_pixels[y][x][2] = 3u; + orig_pixels[y][x][3] = 255u; + } + } + + ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4, + &added_pixels[0][0][0], 16 * 4, 16, 16); + + for (int y = 0; y < 16; ++y) { + for (int x = 0; x < 16; ++x) { + EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]); + EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]); + EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]); + EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]); + } + } +} + +// near is for legacy platforms. +TEST_F(LibYUVPlanarTest, TestARGBGray) { + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + + // Test blue + orig_pixels[0][0] = 255u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 128u; + // Test green + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 255u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 0u; + // Test red + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 255u; + orig_pixels[2][3] = 255u; + // Test black + orig_pixels[3][0] = 0u; + orig_pixels[3][1] = 0u; + orig_pixels[3][2] = 0u; + orig_pixels[3][3] = 255u; + // Test white + orig_pixels[4][0] = 255u; + orig_pixels[4][1] = 255u; + orig_pixels[4][2] = 255u; + orig_pixels[4][3] = 255u; + // Test color + orig_pixels[5][0] = 16u; + orig_pixels[5][1] = 64u; + orig_pixels[5][2] = 192u; + orig_pixels[5][3] = 224u; + // Do 16 to test asm version. + ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1); + EXPECT_NEAR(29u, orig_pixels[0][0], 1); + EXPECT_NEAR(29u, orig_pixels[0][1], 1); + EXPECT_NEAR(29u, orig_pixels[0][2], 1); + EXPECT_EQ(128u, orig_pixels[0][3]); + EXPECT_EQ(149u, orig_pixels[1][0]); + EXPECT_EQ(149u, orig_pixels[1][1]); + EXPECT_EQ(149u, orig_pixels[1][2]); + EXPECT_EQ(0u, orig_pixels[1][3]); + EXPECT_NEAR(77u, orig_pixels[2][0], 1); + EXPECT_NEAR(77u, orig_pixels[2][1], 1); + EXPECT_NEAR(77u, orig_pixels[2][2], 1); + EXPECT_EQ(255u, orig_pixels[2][3]); + EXPECT_EQ(0u, orig_pixels[3][0]); + EXPECT_EQ(0u, orig_pixels[3][1]); + EXPECT_EQ(0u, orig_pixels[3][2]); + EXPECT_EQ(255u, orig_pixels[3][3]); + EXPECT_EQ(255u, orig_pixels[4][0]); + EXPECT_EQ(255u, orig_pixels[4][1]); + EXPECT_EQ(255u, orig_pixels[4][2]); + EXPECT_EQ(255u, orig_pixels[4][3]); + EXPECT_NEAR(97u, orig_pixels[5][0], 1); + EXPECT_NEAR(97u, orig_pixels[5][1], 1); + EXPECT_NEAR(97u, orig_pixels[5][2], 1); + EXPECT_EQ(224u, orig_pixels[5][3]); + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1); + } +} + +TEST_F(LibYUVPlanarTest, TestARGBGrayTo) { + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t gray_pixels[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + + // Test blue + orig_pixels[0][0] = 255u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 128u; + // Test green + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 255u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 0u; + // Test red + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 255u; + orig_pixels[2][3] = 255u; + // Test black + orig_pixels[3][0] = 0u; + orig_pixels[3][1] = 0u; + orig_pixels[3][2] = 0u; + orig_pixels[3][3] = 255u; + // Test white + orig_pixels[4][0] = 255u; + orig_pixels[4][1] = 255u; + orig_pixels[4][2] = 255u; + orig_pixels[4][3] = 255u; + // Test color + orig_pixels[5][0] = 16u; + orig_pixels[5][1] = 64u; + orig_pixels[5][2] = 192u; + orig_pixels[5][3] = 224u; + // Do 16 to test asm version. + ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1); + EXPECT_NEAR(30u, gray_pixels[0][0], 1); + EXPECT_NEAR(30u, gray_pixels[0][1], 1); + EXPECT_NEAR(30u, gray_pixels[0][2], 1); + EXPECT_NEAR(128u, gray_pixels[0][3], 1); + EXPECT_NEAR(149u, gray_pixels[1][0], 1); + EXPECT_NEAR(149u, gray_pixels[1][1], 1); + EXPECT_NEAR(149u, gray_pixels[1][2], 1); + EXPECT_NEAR(0u, gray_pixels[1][3], 1); + EXPECT_NEAR(76u, gray_pixels[2][0], 1); + EXPECT_NEAR(76u, gray_pixels[2][1], 1); + EXPECT_NEAR(76u, gray_pixels[2][2], 1); + EXPECT_NEAR(255u, gray_pixels[2][3], 1); + EXPECT_NEAR(0u, gray_pixels[3][0], 1); + EXPECT_NEAR(0u, gray_pixels[3][1], 1); + EXPECT_NEAR(0u, gray_pixels[3][2], 1); + EXPECT_NEAR(255u, gray_pixels[3][3], 1); + EXPECT_NEAR(255u, gray_pixels[4][0], 1); + EXPECT_NEAR(255u, gray_pixels[4][1], 1); + EXPECT_NEAR(255u, gray_pixels[4][2], 1); + EXPECT_NEAR(255u, gray_pixels[4][3], 1); + EXPECT_NEAR(96u, gray_pixels[5][0], 1); + EXPECT_NEAR(96u, gray_pixels[5][1], 1); + EXPECT_NEAR(96u, gray_pixels[5][2], 1); + EXPECT_NEAR(224u, gray_pixels[5][3], 1); + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1); + } + + for (int i = 0; i < 256; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i; + orig_pixels[i][2] = i; + orig_pixels[i][3] = i; + } + ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1); + for (int i = 0; i < 256; ++i) { + EXPECT_EQ(i, orig_pixels[i][0]); + EXPECT_EQ(i, orig_pixels[i][1]); + EXPECT_EQ(i, orig_pixels[i][2]); + EXPECT_EQ(i, orig_pixels[i][3]); + } +} + +TEST_F(LibYUVPlanarTest, TestARGBSepia) { + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + + // Test blue + orig_pixels[0][0] = 255u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 128u; + // Test green + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 255u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 0u; + // Test red + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 255u; + orig_pixels[2][3] = 255u; + // Test black + orig_pixels[3][0] = 0u; + orig_pixels[3][1] = 0u; + orig_pixels[3][2] = 0u; + orig_pixels[3][3] = 255u; + // Test white + orig_pixels[4][0] = 255u; + orig_pixels[4][1] = 255u; + orig_pixels[4][2] = 255u; + orig_pixels[4][3] = 255u; + // Test color + orig_pixels[5][0] = 16u; + orig_pixels[5][1] = 64u; + orig_pixels[5][2] = 192u; + orig_pixels[5][3] = 224u; + // Do 16 to test asm version. + ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1); + EXPECT_EQ(33u, orig_pixels[0][0]); + EXPECT_EQ(43u, orig_pixels[0][1]); + EXPECT_EQ(47u, orig_pixels[0][2]); + EXPECT_EQ(128u, orig_pixels[0][3]); + EXPECT_EQ(135u, orig_pixels[1][0]); + EXPECT_EQ(175u, orig_pixels[1][1]); + EXPECT_EQ(195u, orig_pixels[1][2]); + EXPECT_EQ(0u, orig_pixels[1][3]); + EXPECT_EQ(69u, orig_pixels[2][0]); + EXPECT_EQ(89u, orig_pixels[2][1]); + EXPECT_EQ(99u, orig_pixels[2][2]); + EXPECT_EQ(255u, orig_pixels[2][3]); + EXPECT_EQ(0u, orig_pixels[3][0]); + EXPECT_EQ(0u, orig_pixels[3][1]); + EXPECT_EQ(0u, orig_pixels[3][2]); + EXPECT_EQ(255u, orig_pixels[3][3]); + EXPECT_EQ(239u, orig_pixels[4][0]); + EXPECT_EQ(255u, orig_pixels[4][1]); + EXPECT_EQ(255u, orig_pixels[4][2]); + EXPECT_EQ(255u, orig_pixels[4][3]); + EXPECT_EQ(88u, orig_pixels[5][0]); + EXPECT_EQ(114u, orig_pixels[5][1]); + EXPECT_EQ(127u, orig_pixels[5][2]); + EXPECT_EQ(224u, orig_pixels[5][3]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1); + } +} + +TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) { + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]); + + // Matrix for Sepia. + SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = { + 17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0, + 24 / 2, 98 / 2, 50 / 2, 0, 0, 0, 0, 64, // Copy alpha. + }; + memset(orig_pixels, 0, sizeof(orig_pixels)); + + // Test blue + orig_pixels[0][0] = 255u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 128u; + // Test green + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 255u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 0u; + // Test red + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 255u; + orig_pixels[2][3] = 255u; + // Test color + orig_pixels[3][0] = 16u; + orig_pixels[3][1] = 64u; + orig_pixels[3][2] = 192u; + orig_pixels[3][3] = 224u; + // Do 16 to test asm version. + ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + &kRGBToSepia[0], 16, 1); + EXPECT_EQ(31u, dst_pixels_opt[0][0]); + EXPECT_EQ(43u, dst_pixels_opt[0][1]); + EXPECT_EQ(47u, dst_pixels_opt[0][2]); + EXPECT_EQ(128u, dst_pixels_opt[0][3]); + EXPECT_EQ(135u, dst_pixels_opt[1][0]); + EXPECT_EQ(175u, dst_pixels_opt[1][1]); + EXPECT_EQ(195u, dst_pixels_opt[1][2]); + EXPECT_EQ(0u, dst_pixels_opt[1][3]); + EXPECT_EQ(67u, dst_pixels_opt[2][0]); + EXPECT_EQ(87u, dst_pixels_opt[2][1]); + EXPECT_EQ(99u, dst_pixels_opt[2][2]); + EXPECT_EQ(255u, dst_pixels_opt[2][3]); + EXPECT_EQ(87u, dst_pixels_opt[3][0]); + EXPECT_EQ(112u, dst_pixels_opt[3][1]); + EXPECT_EQ(127u, dst_pixels_opt[3][2]); + EXPECT_EQ(224u, dst_pixels_opt[3][3]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + MaskCpuFlags(disable_cpu_flags_); + ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, + &kRGBToSepia[0], 1280, 1); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + &kRGBToSepia[0], 1280, 1); + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); + EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); + EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); + EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); + } +} + +TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) { + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + + // Matrix for Sepia. + SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = { + 17, 68, 35, 0, 22, 88, 45, 0, + 24, 98, 50, 0, 0, 0, 0, 0, // Unused but makes matrix 16 bytes. + }; + memset(orig_pixels, 0, sizeof(orig_pixels)); + + // Test blue + orig_pixels[0][0] = 255u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 128u; + // Test green + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 255u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 0u; + // Test red + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 255u; + orig_pixels[2][3] = 255u; + // Test color + orig_pixels[3][0] = 16u; + orig_pixels[3][1] = 64u; + orig_pixels[3][2] = 192u; + orig_pixels[3][3] = 224u; + // Do 16 to test asm version. + RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1); + EXPECT_EQ(31u, orig_pixels[0][0]); + EXPECT_EQ(43u, orig_pixels[0][1]); + EXPECT_EQ(47u, orig_pixels[0][2]); + EXPECT_EQ(128u, orig_pixels[0][3]); + EXPECT_EQ(135u, orig_pixels[1][0]); + EXPECT_EQ(175u, orig_pixels[1][1]); + EXPECT_EQ(195u, orig_pixels[1][2]); + EXPECT_EQ(0u, orig_pixels[1][3]); + EXPECT_EQ(67u, orig_pixels[2][0]); + EXPECT_EQ(87u, orig_pixels[2][1]); + EXPECT_EQ(99u, orig_pixels[2][2]); + EXPECT_EQ(255u, orig_pixels[2][3]); + EXPECT_EQ(87u, orig_pixels[3][0]); + EXPECT_EQ(112u, orig_pixels[3][1]); + EXPECT_EQ(127u, orig_pixels[3][2]); + EXPECT_EQ(224u, orig_pixels[3][3]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1); + } +} + +TEST_F(LibYUVPlanarTest, TestARGBColorTable) { + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + + // Matrix for Sepia. + static const uint8_t kARGBTable[256 * 4] = { + 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, + }; + + orig_pixels[0][0] = 0u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 0u; + orig_pixels[1][0] = 1u; + orig_pixels[1][1] = 1u; + orig_pixels[1][2] = 1u; + orig_pixels[1][3] = 1u; + orig_pixels[2][0] = 2u; + orig_pixels[2][1] = 2u; + orig_pixels[2][2] = 2u; + orig_pixels[2][3] = 2u; + orig_pixels[3][0] = 0u; + orig_pixels[3][1] = 1u; + orig_pixels[3][2] = 2u; + orig_pixels[3][3] = 3u; + // Do 16 to test asm version. + ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1); + EXPECT_EQ(1u, orig_pixels[0][0]); + EXPECT_EQ(2u, orig_pixels[0][1]); + EXPECT_EQ(3u, orig_pixels[0][2]); + EXPECT_EQ(4u, orig_pixels[0][3]); + EXPECT_EQ(5u, orig_pixels[1][0]); + EXPECT_EQ(6u, orig_pixels[1][1]); + EXPECT_EQ(7u, orig_pixels[1][2]); + EXPECT_EQ(8u, orig_pixels[1][3]); + EXPECT_EQ(9u, orig_pixels[2][0]); + EXPECT_EQ(10u, orig_pixels[2][1]); + EXPECT_EQ(11u, orig_pixels[2][2]); + EXPECT_EQ(12u, orig_pixels[2][3]); + EXPECT_EQ(1u, orig_pixels[3][0]); + EXPECT_EQ(6u, orig_pixels[3][1]); + EXPECT_EQ(11u, orig_pixels[3][2]); + EXPECT_EQ(16u, orig_pixels[3][3]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1); + } +} + +// Same as TestARGBColorTable except alpha does not change. +TEST_F(LibYUVPlanarTest, TestRGBColorTable) { + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + + // Matrix for Sepia. + static const uint8_t kARGBTable[256 * 4] = { + 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, + }; + + orig_pixels[0][0] = 0u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 0u; + orig_pixels[1][0] = 1u; + orig_pixels[1][1] = 1u; + orig_pixels[1][2] = 1u; + orig_pixels[1][3] = 1u; + orig_pixels[2][0] = 2u; + orig_pixels[2][1] = 2u; + orig_pixels[2][2] = 2u; + orig_pixels[2][3] = 2u; + orig_pixels[3][0] = 0u; + orig_pixels[3][1] = 1u; + orig_pixels[3][2] = 2u; + orig_pixels[3][3] = 3u; + // Do 16 to test asm version. + RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1); + EXPECT_EQ(1u, orig_pixels[0][0]); + EXPECT_EQ(2u, orig_pixels[0][1]); + EXPECT_EQ(3u, orig_pixels[0][2]); + EXPECT_EQ(0u, orig_pixels[0][3]); // Alpha unchanged. + EXPECT_EQ(5u, orig_pixels[1][0]); + EXPECT_EQ(6u, orig_pixels[1][1]); + EXPECT_EQ(7u, orig_pixels[1][2]); + EXPECT_EQ(1u, orig_pixels[1][3]); // Alpha unchanged. + EXPECT_EQ(9u, orig_pixels[2][0]); + EXPECT_EQ(10u, orig_pixels[2][1]); + EXPECT_EQ(11u, orig_pixels[2][2]); + EXPECT_EQ(2u, orig_pixels[2][3]); // Alpha unchanged. + EXPECT_EQ(1u, orig_pixels[3][0]); + EXPECT_EQ(6u, orig_pixels[3][1]); + EXPECT_EQ(11u, orig_pixels[3][2]); + EXPECT_EQ(3u, orig_pixels[3][3]); // Alpha unchanged. + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1); + } +} + +TEST_F(LibYUVPlanarTest, TestARGBQuantize) { + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0, + 1280, 1); + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]); + EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]); + EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]); + EXPECT_EQ(i & 255, orig_pixels[i][3]); + } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0, + 1280, 1); + } +} + +TEST_F(LibYUVPlanarTest, ARGBMirror_Opt) { + align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_pixels_opt, + benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 4); + + MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 4); + MaskCpuFlags(disable_cpu_flags_); + ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_c, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_opt, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + } + for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, MirrorPlane_Opt) { + align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_); + align_buffer_page_end(dst_pixels_opt, benchmark_width_ * benchmark_height_); + align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_); + + MemRandomize(src_pixels, benchmark_width_ * benchmark_height_); + MaskCpuFlags(disable_cpu_flags_); + MirrorPlane(src_pixels, benchmark_width_, dst_pixels_c, benchmark_width_, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MirrorPlane(src_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_, + benchmark_width_, benchmark_height_); + } + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, MirrorUVPlane_Opt) { + align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_pixels_opt, + benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 2); + + MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 2); + MaskCpuFlags(disable_cpu_flags_); + MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c, + benchmark_width_ * 2, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt, + benchmark_width_ * 2, benchmark_width_, benchmark_height_); + } + for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, TestShade) { + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t shade_pixels[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + + orig_pixels[0][0] = 10u; + orig_pixels[0][1] = 20u; + orig_pixels[0][2] = 40u; + orig_pixels[0][3] = 80u; + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 0u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 255u; + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 0u; + orig_pixels[2][3] = 0u; + orig_pixels[3][0] = 0u; + orig_pixels[3][1] = 0u; + orig_pixels[3][2] = 0u; + orig_pixels[3][3] = 0u; + // Do 8 pixels to allow opt version to be used. + ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff); + EXPECT_EQ(10u, shade_pixels[0][0]); + EXPECT_EQ(20u, shade_pixels[0][1]); + EXPECT_EQ(40u, shade_pixels[0][2]); + EXPECT_EQ(40u, shade_pixels[0][3]); + EXPECT_EQ(0u, shade_pixels[1][0]); + EXPECT_EQ(0u, shade_pixels[1][1]); + EXPECT_EQ(0u, shade_pixels[1][2]); + EXPECT_EQ(128u, shade_pixels[1][3]); + EXPECT_EQ(0u, shade_pixels[2][0]); + EXPECT_EQ(0u, shade_pixels[2][1]); + EXPECT_EQ(0u, shade_pixels[2][2]); + EXPECT_EQ(0u, shade_pixels[2][3]); + EXPECT_EQ(0u, shade_pixels[3][0]); + EXPECT_EQ(0u, shade_pixels[3][1]); + EXPECT_EQ(0u, shade_pixels[3][2]); + EXPECT_EQ(0u, shade_pixels[3][3]); + + ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080); + EXPECT_EQ(5u, shade_pixels[0][0]); + EXPECT_EQ(10u, shade_pixels[0][1]); + EXPECT_EQ(20u, shade_pixels[0][2]); + EXPECT_EQ(40u, shade_pixels[0][3]); + + ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080); + EXPECT_EQ(5u, shade_pixels[0][0]); + EXPECT_EQ(5u, shade_pixels[0][1]); + EXPECT_EQ(5u, shade_pixels[0][2]); + EXPECT_EQ(5u, shade_pixels[0][3]); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1, + 0x80808080); + } +} + +TEST_F(LibYUVPlanarTest, TestARGBInterpolate) { + SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]); + SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]); + memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); + memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); + + orig_pixels_0[0][0] = 16u; + orig_pixels_0[0][1] = 32u; + orig_pixels_0[0][2] = 64u; + orig_pixels_0[0][3] = 128u; + orig_pixels_0[1][0] = 0u; + orig_pixels_0[1][1] = 0u; + orig_pixels_0[1][2] = 0u; + orig_pixels_0[1][3] = 255u; + orig_pixels_0[2][0] = 0u; + orig_pixels_0[2][1] = 0u; + orig_pixels_0[2][2] = 0u; + orig_pixels_0[2][3] = 0u; + orig_pixels_0[3][0] = 0u; + orig_pixels_0[3][1] = 0u; + orig_pixels_0[3][2] = 0u; + orig_pixels_0[3][3] = 0u; + + orig_pixels_1[0][0] = 0u; + orig_pixels_1[0][1] = 0u; + orig_pixels_1[0][2] = 0u; + orig_pixels_1[0][3] = 0u; + orig_pixels_1[1][0] = 0u; + orig_pixels_1[1][1] = 0u; + orig_pixels_1[1][2] = 0u; + orig_pixels_1[1][3] = 0u; + orig_pixels_1[2][0] = 0u; + orig_pixels_1[2][1] = 0u; + orig_pixels_1[2][2] = 0u; + orig_pixels_1[2][3] = 0u; + orig_pixels_1[3][0] = 255u; + orig_pixels_1[3][1] = 255u; + orig_pixels_1[3][2] = 255u; + orig_pixels_1[3][3] = 255u; + + ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, + &interpolate_pixels[0][0], 0, 4, 1, 128); + EXPECT_EQ(8u, interpolate_pixels[0][0]); + EXPECT_EQ(16u, interpolate_pixels[0][1]); + EXPECT_EQ(32u, interpolate_pixels[0][2]); + EXPECT_EQ(64u, interpolate_pixels[0][3]); + EXPECT_EQ(0u, interpolate_pixels[1][0]); + EXPECT_EQ(0u, interpolate_pixels[1][1]); + EXPECT_EQ(0u, interpolate_pixels[1][2]); + EXPECT_EQ(128u, interpolate_pixels[1][3]); + EXPECT_EQ(0u, interpolate_pixels[2][0]); + EXPECT_EQ(0u, interpolate_pixels[2][1]); + EXPECT_EQ(0u, interpolate_pixels[2][2]); + EXPECT_EQ(0u, interpolate_pixels[2][3]); + EXPECT_EQ(128u, interpolate_pixels[3][0]); + EXPECT_EQ(128u, interpolate_pixels[3][1]); + EXPECT_EQ(128u, interpolate_pixels[3][2]); + EXPECT_EQ(128u, interpolate_pixels[3][3]); + + ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, + &interpolate_pixels[0][0], 0, 4, 1, 0); + EXPECT_EQ(16u, interpolate_pixels[0][0]); + EXPECT_EQ(32u, interpolate_pixels[0][1]); + EXPECT_EQ(64u, interpolate_pixels[0][2]); + EXPECT_EQ(128u, interpolate_pixels[0][3]); + + ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, + &interpolate_pixels[0][0], 0, 4, 1, 192); + + EXPECT_EQ(4u, interpolate_pixels[0][0]); + EXPECT_EQ(8u, interpolate_pixels[0][1]); + EXPECT_EQ(16u, interpolate_pixels[0][2]); + EXPECT_EQ(32u, interpolate_pixels[0][3]); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, + &interpolate_pixels[0][0], 0, 1280, 1, 128); + } +} + +TEST_F(LibYUVPlanarTest, TestInterpolatePlane) { + SIMD_ALIGNED(uint8_t orig_pixels_0[1280]); + SIMD_ALIGNED(uint8_t orig_pixels_1[1280]); + SIMD_ALIGNED(uint8_t interpolate_pixels[1280]); + memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); + memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); + + orig_pixels_0[0] = 16u; + orig_pixels_0[1] = 32u; + orig_pixels_0[2] = 64u; + orig_pixels_0[3] = 128u; + orig_pixels_0[4] = 0u; + orig_pixels_0[5] = 0u; + orig_pixels_0[6] = 0u; + orig_pixels_0[7] = 255u; + orig_pixels_0[8] = 0u; + orig_pixels_0[9] = 0u; + orig_pixels_0[10] = 0u; + orig_pixels_0[11] = 0u; + orig_pixels_0[12] = 0u; + orig_pixels_0[13] = 0u; + orig_pixels_0[14] = 0u; + orig_pixels_0[15] = 0u; + + orig_pixels_1[0] = 0u; + orig_pixels_1[1] = 0u; + orig_pixels_1[2] = 0u; + orig_pixels_1[3] = 0u; + orig_pixels_1[4] = 0u; + orig_pixels_1[5] = 0u; + orig_pixels_1[6] = 0u; + orig_pixels_1[7] = 0u; + orig_pixels_1[8] = 0u; + orig_pixels_1[9] = 0u; + orig_pixels_1[10] = 0u; + orig_pixels_1[11] = 0u; + orig_pixels_1[12] = 255u; + orig_pixels_1[13] = 255u; + orig_pixels_1[14] = 255u; + orig_pixels_1[15] = 255u; + + InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 128); + EXPECT_EQ(8u, interpolate_pixels[0]); + EXPECT_EQ(16u, interpolate_pixels[1]); + EXPECT_EQ(32u, interpolate_pixels[2]); + EXPECT_EQ(64u, interpolate_pixels[3]); + EXPECT_EQ(0u, interpolate_pixels[4]); + EXPECT_EQ(0u, interpolate_pixels[5]); + EXPECT_EQ(0u, interpolate_pixels[6]); + EXPECT_EQ(128u, interpolate_pixels[7]); + EXPECT_EQ(0u, interpolate_pixels[8]); + EXPECT_EQ(0u, interpolate_pixels[9]); + EXPECT_EQ(0u, interpolate_pixels[10]); + EXPECT_EQ(0u, interpolate_pixels[11]); + EXPECT_EQ(128u, interpolate_pixels[12]); + EXPECT_EQ(128u, interpolate_pixels[13]); + EXPECT_EQ(128u, interpolate_pixels[14]); + EXPECT_EQ(128u, interpolate_pixels[15]); + + InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 0); + EXPECT_EQ(16u, interpolate_pixels[0]); + EXPECT_EQ(32u, interpolate_pixels[1]); + EXPECT_EQ(64u, interpolate_pixels[2]); + EXPECT_EQ(128u, interpolate_pixels[3]); + + InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 192); + + EXPECT_EQ(4u, interpolate_pixels[0]); + EXPECT_EQ(8u, interpolate_pixels[1]); + EXPECT_EQ(16u, interpolate_pixels[2]); + EXPECT_EQ(32u, interpolate_pixels[3]); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 1280, 1, 123); + } +} + +TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) { + SIMD_ALIGNED(uint16_t orig_pixels_0[1280]); + SIMD_ALIGNED(uint16_t orig_pixels_1[1280]); + SIMD_ALIGNED(uint16_t interpolate_pixels[1280]); + memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); + memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); + + orig_pixels_0[0] = 16u; + orig_pixels_0[1] = 32u; + orig_pixels_0[2] = 64u; + orig_pixels_0[3] = 128u; + orig_pixels_0[4] = 0u; + orig_pixels_0[5] = 0u; + orig_pixels_0[6] = 0u; + orig_pixels_0[7] = 255u; + orig_pixels_0[8] = 0u; + orig_pixels_0[9] = 0u; + orig_pixels_0[10] = 0u; + orig_pixels_0[11] = 0u; + orig_pixels_0[12] = 0u; + orig_pixels_0[13] = 0u; + orig_pixels_0[14] = 0u; + orig_pixels_0[15] = 0u; + + orig_pixels_1[0] = 0u; + orig_pixels_1[1] = 0u; + orig_pixels_1[2] = 0u; + orig_pixels_1[3] = 0u; + orig_pixels_1[4] = 0u; + orig_pixels_1[5] = 0u; + orig_pixels_1[6] = 0u; + orig_pixels_1[7] = 0u; + orig_pixels_1[8] = 0u; + orig_pixels_1[9] = 0u; + orig_pixels_1[10] = 0u; + orig_pixels_1[11] = 0u; + orig_pixels_1[12] = 255u; + orig_pixels_1[13] = 255u; + orig_pixels_1[14] = 255u; + orig_pixels_1[15] = 255u; + + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 128); + EXPECT_EQ(8u, interpolate_pixels[0]); + EXPECT_EQ(16u, interpolate_pixels[1]); + EXPECT_EQ(32u, interpolate_pixels[2]); + EXPECT_EQ(64u, interpolate_pixels[3]); + EXPECT_EQ(0u, interpolate_pixels[4]); + EXPECT_EQ(0u, interpolate_pixels[5]); + EXPECT_EQ(0u, interpolate_pixels[6]); + EXPECT_EQ(128u, interpolate_pixels[7]); + EXPECT_EQ(0u, interpolate_pixels[8]); + EXPECT_EQ(0u, interpolate_pixels[9]); + EXPECT_EQ(0u, interpolate_pixels[10]); + EXPECT_EQ(0u, interpolate_pixels[11]); + EXPECT_EQ(128u, interpolate_pixels[12]); + EXPECT_EQ(128u, interpolate_pixels[13]); + EXPECT_EQ(128u, interpolate_pixels[14]); + EXPECT_EQ(128u, interpolate_pixels[15]); + + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 0); + EXPECT_EQ(16u, interpolate_pixels[0]); + EXPECT_EQ(32u, interpolate_pixels[1]); + EXPECT_EQ(64u, interpolate_pixels[2]); + EXPECT_EQ(128u, interpolate_pixels[3]); + + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 192); + + EXPECT_EQ(4u, interpolate_pixels[0]); + EXPECT_EQ(8u, interpolate_pixels[1]); + EXPECT_EQ(16u, interpolate_pixels[2]); + EXPECT_EQ(32u, interpolate_pixels[3]); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 1280, 1, 123); + } +} + +#define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \ + N, NEG, OFF) \ + TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStrideA = \ + (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \ + align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \ + for (int i = 0; i < kStrideA * kHeight; ++i) { \ + src_argb_a[i + OFF] = (fastrand() & 0xff); \ + src_argb_b[i + OFF] = (fastrand() & 0xff); \ + } \ + MaskCpuFlags(disable_cpu_flags_); \ + ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \ + dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \ + dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP); \ + } \ + for (int i = 0; i < kStrideB * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb_a); \ + free_aligned_buffer_page_end(src_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTINTERPOLATE(TERP) \ + TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ + 1, TERP, _Any, +, 0) \ + TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \ + TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \ + TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0) + +TESTINTERPOLATE(0) +TESTINTERPOLATE(64) +TESTINTERPOLATE(128) +TESTINTERPOLATE(192) +TESTINTERPOLATE(255) + +static int TestBlend(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off, + int attenuate) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(src_argb_b, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + src_argb_b[i + off] = (fastrand() & 0xff); + } + MemRandomize(src_argb_a, kStride * height + off); + MemRandomize(src_argb_b, kStride * height + off); + if (attenuate) { + ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width, + height); + } + memset(dst_argb_c, 255, kStride * height); + memset(dst_argb_opt, 255, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c, + kStride, width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, + dst_argb_opt, kStride, width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(src_argb_b); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBBlend_Any) { + int max_diff = + TestBlend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) { + int max_diff = + TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) { + int max_diff = + TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) { + int max_diff = + TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { + int max_diff = + TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1); + EXPECT_LE(max_diff, 1); +} + +static void TestBlendPlane(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 1; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(src_argb_b, kStride * height + off); + align_buffer_page_end(src_argb_alpha, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height + off); + align_buffer_page_end(dst_argb_opt, kStride * height + off); + memset(dst_argb_c, 255, kStride * height + off); + memset(dst_argb_opt, 255, kStride * height + off); + + // Test source is maintained exactly if alpha is 255. + for (int i = 0; i < width; ++i) { + src_argb_a[i + off] = i & 255; + src_argb_b[i + off] = 255 - (i & 255); + } + memset(src_argb_alpha + off, 255, width); + BlendPlane(src_argb_a + off, width, src_argb_b + off, width, + src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1); + for (int i = 0; i < width; ++i) { + EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]); + } + // Test destination is maintained exactly if alpha is 0. + memset(src_argb_alpha + off, 0, width); + BlendPlane(src_argb_a + off, width, src_argb_b + off, width, + src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1); + for (int i = 0; i < width; ++i) { + EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]); + } + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + src_argb_b[i + off] = (fastrand() & 0xff); + src_argb_alpha[i + off] = (fastrand() & 0xff); + } + + MaskCpuFlags(disable_cpu_flags); + BlendPlane(src_argb_a + off, width, src_argb_b + off, width, + src_argb_alpha + off, width, dst_argb_c + off, width, width, + invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + BlendPlane(src_argb_a + off, width, src_argb_b + off, width, + src_argb_alpha + off, width, dst_argb_opt + off, width, width, + invert * height); + } + for (int i = 0; i < kStride * height; ++i) { + EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]); + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(src_argb_b); + free_aligned_buffer_page_end(src_argb_alpha); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); +} + +TEST_F(LibYUVPlanarTest, BlendPlane_Opt) { + TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); +} +TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) { + TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); +} +TEST_F(LibYUVPlanarTest, BlendPlane_Any) { + TestBlendPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); +} +TEST_F(LibYUVPlanarTest, BlendPlane_Invert) { + TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 1); +} + +#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) + +static void TestI420Blend(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off) { + width = ((width) > 0) ? (width) : 1; + const int kStrideUV = SUBSAMPLE(width, 2); + const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2); + align_buffer_page_end(src_y0, width * height + off); + align_buffer_page_end(src_u0, kSizeUV + off); + align_buffer_page_end(src_v0, kSizeUV + off); + align_buffer_page_end(src_y1, width * height + off); + align_buffer_page_end(src_u1, kSizeUV + off); + align_buffer_page_end(src_v1, kSizeUV + off); + align_buffer_page_end(src_a, width * height + off); + align_buffer_page_end(dst_y_c, width * height + off); + align_buffer_page_end(dst_u_c, kSizeUV + off); + align_buffer_page_end(dst_v_c, kSizeUV + off); + align_buffer_page_end(dst_y_opt, width * height + off); + align_buffer_page_end(dst_u_opt, kSizeUV + off); + align_buffer_page_end(dst_v_opt, kSizeUV + off); + + MemRandomize(src_y0, width * height + off); + MemRandomize(src_u0, kSizeUV + off); + MemRandomize(src_v0, kSizeUV + off); + MemRandomize(src_y1, width * height + off); + MemRandomize(src_u1, kSizeUV + off); + MemRandomize(src_v1, kSizeUV + off); + MemRandomize(src_a, width * height + off); + memset(dst_y_c, 255, width * height + off); + memset(dst_u_c, 255, kSizeUV + off); + memset(dst_v_c, 255, kSizeUV + off); + memset(dst_y_opt, 255, width * height + off); + memset(dst_u_opt, 255, kSizeUV + off); + memset(dst_v_opt, 255, kSizeUV + off); + + MaskCpuFlags(disable_cpu_flags); + I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off, + kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV, + src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width, + dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width, + invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off, + kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV, + src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off, + width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV, + width, invert * height); + } + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]); + } + for (int i = 0; i < kSizeUV; ++i) { + EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]); + EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]); + } + free_aligned_buffer_page_end(src_y0); + free_aligned_buffer_page_end(src_u0); + free_aligned_buffer_page_end(src_v0); + free_aligned_buffer_page_end(src_y1); + free_aligned_buffer_page_end(src_u1); + free_aligned_buffer_page_end(src_v1); + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(dst_y_c); + free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_y_opt); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_opt); +} + +TEST_F(LibYUVPlanarTest, I420Blend_Opt) { + TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); +} +TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) { + TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); +} + +// TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable. +TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) { + TestI420Blend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); +} +TEST_F(LibYUVPlanarTest, I420Blend_Invert) { + TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); +} + +TEST_F(LibYUVPlanarTest, TestAffine) { + SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]); + SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]); + + for (int i = 0; i < 1280; ++i) { + for (int j = 0; j < 4; ++j) { + orig_pixels_0[i][j] = i; + } + } + + float uv_step[4] = {0.f, 0.f, 0.75f, 0.f}; + + ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step, + 1280); + EXPECT_EQ(0u, interpolate_pixels_C[0][0]); + EXPECT_EQ(96u, interpolate_pixels_C[128][0]); + EXPECT_EQ(191u, interpolate_pixels_C[255][3]); + +#if defined(HAS_ARGBAFFINEROW_SSE2) + SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]); + ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0], + uv_step, 1280); + EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4)); + + int has_sse2 = TestCpuFlag(kCpuHasSSE2); + if (has_sse2) { + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0], + uv_step, 1280); + } + } +#endif +} + +TEST_F(LibYUVPlanarTest, TestCopyPlane) { + int err = 0; + int yw = benchmark_width_; + int yh = benchmark_height_; + int b = 12; + int i, j; + + int y_plane_size = (yw + b * 2) * (yh + b * 2); + align_buffer_page_end(orig_y, y_plane_size); + align_buffer_page_end(dst_c, y_plane_size); + align_buffer_page_end(dst_opt, y_plane_size); + + memset(orig_y, 0, y_plane_size); + memset(dst_c, 0, y_plane_size); + memset(dst_opt, 0, y_plane_size); + + // Fill image buffers with random data. + for (i = b; i < (yh + b); ++i) { + for (j = b; j < (yw + b); ++j) { + orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff; + } + } + + // Fill destination buffers with random data. + for (i = 0; i < y_plane_size; ++i) { + uint8_t random_number = fastrand() & 0x7f; + dst_c[i] = random_number; + dst_opt[i] = dst_c[i]; + } + + int y_off = b * (yw + b * 2) + b; + + int y_st = yw + b * 2; + int stride = 8; + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + for (j = 0; j < benchmark_iterations_; j++) { + CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh); + } + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + for (j = 0; j < benchmark_iterations_; j++) { + CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh); + } + + for (i = 0; i < y_plane_size; ++i) { + if (dst_c[i] != dst_opt[i]) { + ++err; + } + } + + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(dst_c); + free_aligned_buffer_page_end(dst_opt); + + EXPECT_EQ(0, err); +} + +TEST_F(LibYUVPlanarTest, CopyPlane_Opt) { + int i; + int y_plane_size = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_y, y_plane_size); + align_buffer_page_end(dst_c, y_plane_size); + align_buffer_page_end(dst_opt, y_plane_size); + + MemRandomize(orig_y, y_plane_size); + memset(dst_c, 1, y_plane_size); + memset(dst_opt, 2, y_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + for (i = 0; i < benchmark_iterations_; i++) { + CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_, + benchmark_width_, benchmark_height_); + } + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + for (i = 0; i < benchmark_iterations_; i++) { + CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_, + benchmark_width_, benchmark_height_); + } + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(dst_c); + free_aligned_buffer_page_end(dst_opt); +} + +TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) { + // Test to verify copying a rect with a zero height or width does + // not touch destination memory. + uint8_t src = 42; + uint8_t dst = 0; + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + CopyPlane(&src, 0, &dst, 0, 0, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); + + CopyPlane(&src, 1, &dst, 1, 1, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); + + CopyPlane(&src, 1, &dst, 1, 0, 1); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + CopyPlane(&src, 0, &dst, 0, 0, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); + + CopyPlane(&src, 1, &dst, 1, 1, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); + + CopyPlane(&src, 1, &dst, 1, 0, 1); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); +} + +TEST_F(LibYUVPlanarTest, TestDetilePlane) { + int i, j; + + // orig is tiled. Allocate enough memory for tiles. + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; + int y_plane_size = benchmark_width_ * benchmark_height_; + align_buffer_page_end(tile_y, tile_plane_size); + align_buffer_page_end(dst_c, y_plane_size); + align_buffer_page_end(dst_opt, y_plane_size); + + MemRandomize(tile_y, tile_plane_size); + memset(dst_c, 0, y_plane_size); + memset(dst_opt, 0, y_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_, + benchmark_height_, 16); + } + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_, + benchmark_height_, 16); + } + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end(tile_y); + free_aligned_buffer_page_end(dst_c); + free_aligned_buffer_page_end(dst_opt); +} + +TEST_F(LibYUVPlanarTest, TestDetilePlane_16) { + int i, j; + + // orig is tiled. Allocate enough memory for tiles. + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height * 2; + int y_plane_size = benchmark_width_ * benchmark_height_ * 2; + align_buffer_page_end(tile_y, tile_plane_size); + align_buffer_page_end(dst_c, y_plane_size); + align_buffer_page_end(dst_opt, y_plane_size); + + MemRandomize(tile_y, tile_plane_size); + memset(dst_c, 0, y_plane_size); + memset(dst_opt, 0, y_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c, + benchmark_width_, benchmark_width_, benchmark_height_, 16); + } + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt, + benchmark_width_, benchmark_width_, benchmark_height_, 16); + } + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end(tile_y); + free_aligned_buffer_page_end(dst_c); + free_aligned_buffer_page_end(dst_opt); +} + +// Compares DetileSplitUV to 2 step Detile + SplitUV +TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) { + int i, j; + + // orig is tiled. Allocate enough memory for tiles. + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; + int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; + align_buffer_page_end(tile_uv, tile_plane_size); + align_buffer_page_end(detiled_uv, tile_plane_size); + align_buffer_page_end(dst_u_two_stage, uv_plane_size); + align_buffer_page_end(dst_u_opt, uv_plane_size); + align_buffer_page_end(dst_v_two_stage, uv_plane_size); + align_buffer_page_end(dst_v_opt, uv_plane_size); + + MemRandomize(tile_uv, tile_plane_size); + memset(detiled_uv, 0, tile_plane_size); + memset(dst_u_two_stage, 0, uv_plane_size); + memset(dst_u_opt, 0, uv_plane_size); + memset(dst_v_two_stage, 0, uv_plane_size); + memset(dst_v_opt, 0, uv_plane_size); + + DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, + dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_, + benchmark_height_, 16); + + // Benchmark 2 step conversion for comparison. + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_, + benchmark_width_, benchmark_height_, 16); + SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage, + (benchmark_width_ + 1) / 2, dst_v_two_stage, + (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2, + benchmark_height_); + } + + for (i = 0; i < uv_plane_size; ++i) { + EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]); + EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]); + } + + free_aligned_buffer_page_end(tile_uv); + free_aligned_buffer_page_end(detiled_uv); + free_aligned_buffer_page_end(dst_u_two_stage); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_two_stage); + free_aligned_buffer_page_end(dst_v_opt); +} + +TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { + int i, j; + + // orig is tiled. Allocate enough memory for tiles. + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; + int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; + align_buffer_page_end(tile_uv, tile_plane_size); + align_buffer_page_end(dst_u_c, uv_plane_size); + align_buffer_page_end(dst_u_opt, uv_plane_size); + align_buffer_page_end(dst_v_c, uv_plane_size); + align_buffer_page_end(dst_v_opt, uv_plane_size); + + MemRandomize(tile_uv, tile_plane_size); + memset(dst_u_c, 0, uv_plane_size); + memset(dst_u_opt, 0, uv_plane_size); + memset(dst_v_c, 0, uv_plane_size); + memset(dst_v_opt, 0, uv_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + + DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2, + dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_, + benchmark_height_, 16); + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + + for (j = 0; j < benchmark_iterations_; j++) { + DetileSplitUVPlane( + tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, + (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); + } + + for (i = 0; i < uv_plane_size; ++i) { + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); + } + + free_aligned_buffer_page_end(tile_uv); + free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_v_opt); +} + +static int TestMultiply(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(src_argb_b, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + src_argb_b[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c, + kStride, width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, + dst_argb_opt, kStride, width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(src_argb_b); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) { + int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) { + int max_diff = + TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) { + int max_diff = + TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) { + int max_diff = + TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +static int TestAdd(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(src_argb_b, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + src_argb_b[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c, + kStride, width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt, + kStride, width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(src_argb_b); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBAdd_Any) { + int max_diff = + TestAdd(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) { + int max_diff = + TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) { + int max_diff = + TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) { + int max_diff = + TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +static int TestSubtract(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(src_argb_b, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + src_argb_b[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c, + kStride, width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, + dst_argb_opt, kStride, width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(src_argb_b); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) { + int max_diff = TestSubtract(benchmark_width_ + 1, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) { + int max_diff = + TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) { + int max_diff = + TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) { + int max_diff = + TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +static int TestSobel(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + memset(src_argb_a, 0, kStride * height + off); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width, + invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width, + invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBSobel_Any) { + int max_diff = + TestSobel(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) { + int max_diff = + TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) { + int max_diff = + TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) { + int max_diff = + TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_EQ(0, max_diff); +} + +static int TestSobelToPlane(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off) { + if (width < 1) { + width = 1; + } + const int kSrcBpp = 4; + const int kDstBpp = 1; + const int kSrcStride = (width * kSrcBpp + 15) & ~15; + const int kDstStride = (width * kDstBpp + 15) & ~15; + align_buffer_page_end(src_argb_a, kSrcStride * height + off); + align_buffer_page_end(dst_argb_c, kDstStride * height); + align_buffer_page_end(dst_argb_opt, kDstStride * height); + memset(src_argb_a, 0, kSrcStride * height + off); + for (int i = 0; i < kSrcStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kDstStride * height); + memset(dst_argb_opt, 0, kDstStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width, + invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kDstStride * height; ++i) { + int abs_diff = abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) { + int max_diff = TestSobelToPlane(benchmark_width_ + 1, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) { + int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) { + int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, -1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) { + int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 0); + EXPECT_EQ(0, max_diff); +} + +static int TestSobelXY(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + memset(src_argb_a, 0, kStride * height + off); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width, + invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width, + invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) { + int max_diff = TestSobelXY(benchmark_width_ + 1, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) { + int max_diff = + TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) { + int max_diff = + TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) { + int max_diff = + TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_EQ(0, max_diff); +} + +static int TestBlur(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off, + int radius) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(dst_cumsum, width * height * 16); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + } + memset(dst_cumsum, 0, width * height * 16); + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride, + reinterpret_cast(dst_cumsum), width * 4, width, + invert * height, radius); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride, + reinterpret_cast(dst_cumsum), width * 4, width, + invert * height, radius); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(dst_cumsum); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +#define DISABLED_ARM(name) name +#else +#define DISABLED_ARM(name) DISABLED_##name +#endif + +static const int kBlurSize = 55; +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Any)) { + int max_diff = + TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Unaligned)) { + int max_diff = + TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Invert)) { + int max_diff = + TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Opt)) { + int max_diff = + TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize); + EXPECT_LE(max_diff, 1); +} + +static const int kBlurSmallSize = 5; +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Any)) { + int max_diff = + TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Unaligned)) { + int max_diff = + TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Invert)) { + int max_diff = + TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Opt)) { + int max_diff = + TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) { + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + + SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = { + 0.94230f, -3.03300f, -2.92500f, 0.f, // C0 + 0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x + 0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x + 0.0f, 0.000006965f, 0.000008781f, 0.f, // C3 x * x * x + }; + + // Test blue + orig_pixels[0][0] = 255u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 128u; + // Test green + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 255u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 0u; + // Test red + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 255u; + orig_pixels[2][3] = 255u; + // Test white + orig_pixels[3][0] = 255u; + orig_pixels[3][1] = 255u; + orig_pixels[3][2] = 255u; + orig_pixels[3][3] = 255u; + // Test color + orig_pixels[4][0] = 16u; + orig_pixels[4][1] = 64u; + orig_pixels[4][2] = 192u; + orig_pixels[4][3] = 224u; + // Do 16 to test asm version. + ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + &kWarmifyPolynomial[0], 16, 1); + EXPECT_EQ(235u, dst_pixels_opt[0][0]); + EXPECT_EQ(0u, dst_pixels_opt[0][1]); + EXPECT_EQ(0u, dst_pixels_opt[0][2]); + EXPECT_EQ(128u, dst_pixels_opt[0][3]); + EXPECT_EQ(0u, dst_pixels_opt[1][0]); + EXPECT_EQ(233u, dst_pixels_opt[1][1]); + EXPECT_EQ(0u, dst_pixels_opt[1][2]); + EXPECT_EQ(0u, dst_pixels_opt[1][3]); + EXPECT_EQ(0u, dst_pixels_opt[2][0]); + EXPECT_EQ(0u, dst_pixels_opt[2][1]); + EXPECT_EQ(241u, dst_pixels_opt[2][2]); + EXPECT_EQ(255u, dst_pixels_opt[2][3]); + EXPECT_EQ(235u, dst_pixels_opt[3][0]); + EXPECT_EQ(233u, dst_pixels_opt[3][1]); + EXPECT_EQ(241u, dst_pixels_opt[3][2]); + EXPECT_EQ(255u, dst_pixels_opt[3][3]); + EXPECT_EQ(10u, dst_pixels_opt[4][0]); + EXPECT_EQ(59u, dst_pixels_opt[4][1]); + EXPECT_EQ(188u, dst_pixels_opt[4][2]); + EXPECT_EQ(224u, dst_pixels_opt[4][3]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + + MaskCpuFlags(disable_cpu_flags_); + ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, + &kWarmifyPolynomial[0], 1280, 1); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + &kWarmifyPolynomial[0], 1280, 1); + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); + EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); + EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); + EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); + } +} + +int TestHalfFloatPlane(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + float scale, + int mask) { + int i, j; + const int y_plane_size = benchmark_width * benchmark_height * 2; + + align_buffer_page_end(orig_y, y_plane_size * 3); + uint8_t* dst_opt = orig_y + y_plane_size; + uint8_t* dst_c = orig_y + y_plane_size * 2; + + MemRandomize(orig_y, y_plane_size); + memset(dst_c, 0, y_plane_size); + memset(dst_opt, 1, y_plane_size); + + for (i = 0; i < y_plane_size / 2; ++i) { + reinterpret_cast(orig_y)[i] &= mask; + } + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags); + for (j = 0; j < benchmark_iterations; j++) { + HalfFloatPlane(reinterpret_cast(orig_y), benchmark_width * 2, + reinterpret_cast(dst_c), benchmark_width * 2, + scale, benchmark_width, benchmark_height); + } + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info); + for (j = 0; j < benchmark_iterations; j++) { + HalfFloatPlane(reinterpret_cast(orig_y), benchmark_width * 2, + reinterpret_cast(dst_opt), benchmark_width * 2, + scale, benchmark_width, benchmark_height); + } + + int max_diff = 0; + for (i = 0; i < y_plane_size / 2; ++i) { + int abs_diff = + abs(static_cast(reinterpret_cast(dst_c)[i]) - + static_cast(reinterpret_cast(dst_opt)[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +#if defined(__arm__) +static void EnableFlushDenormalToZero(void) { + uint32_t cw; + __asm__ __volatile__( + "vmrs %0, fpscr \n" + "orr %0, %0, #0x1000000 \n" + "vmsr fpscr, %0 \n" + : "=r"(cw)::"memory"); +} +#endif + +// 5 bit exponent with bias of 15 will underflow to a denormal if scale causes +// exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally +// happen since scale is 1/(1<(orig_y + y_plane_size); + float* dst_c = reinterpret_cast(orig_y + y_plane_size * 5); + + MemRandomize(orig_y, y_plane_size); + memset(dst_c, 0, y_plane_size * 4); + memset(dst_opt, 1, y_plane_size * 4); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags); + ByteToFloat(orig_y, dst_c, scale, y_plane_size); + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info); + for (j = 0; j < benchmark_iterations; j++) { + ByteToFloat(orig_y, dst_opt, scale, y_plane_size); + } + + float max_diff = 0; + for (i = 0; i < y_plane_size; ++i) { + float abs_diff = fabs(dst_c[i] - dst_opt[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestByteToFloat) { + float diff = TestByteToFloat(benchmark_width_, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, 1.0f); + EXPECT_EQ(0.f, diff); +} + +TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + + align_buffer_page_end(lumacolortable, 32768); + int v = 0; + for (int i = 0; i < 32768; ++i) { + lumacolortable[i] = v; + v += 3; + } + // Test blue + orig_pixels[0][0] = 255u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 128u; + // Test green + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 255u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 0u; + // Test red + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 255u; + orig_pixels[2][3] = 255u; + // Test color + orig_pixels[3][0] = 16u; + orig_pixels[3][1] = 64u; + orig_pixels[3][2] = 192u; + orig_pixels[3][3] = 224u; + // Do 16 to test asm version. + ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + &lumacolortable[0], 16, 1); + EXPECT_EQ(253u, dst_pixels_opt[0][0]); + EXPECT_EQ(0u, dst_pixels_opt[0][1]); + EXPECT_EQ(0u, dst_pixels_opt[0][2]); + EXPECT_EQ(128u, dst_pixels_opt[0][3]); + EXPECT_EQ(0u, dst_pixels_opt[1][0]); + EXPECT_EQ(253u, dst_pixels_opt[1][1]); + EXPECT_EQ(0u, dst_pixels_opt[1][2]); + EXPECT_EQ(0u, dst_pixels_opt[1][3]); + EXPECT_EQ(0u, dst_pixels_opt[2][0]); + EXPECT_EQ(0u, dst_pixels_opt[2][1]); + EXPECT_EQ(253u, dst_pixels_opt[2][2]); + EXPECT_EQ(255u, dst_pixels_opt[2][3]); + EXPECT_EQ(48u, dst_pixels_opt[3][0]); + EXPECT_EQ(192u, dst_pixels_opt[3][1]); + EXPECT_EQ(64u, dst_pixels_opt[3][2]); + EXPECT_EQ(224u, dst_pixels_opt[3][3]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + + MaskCpuFlags(disable_cpu_flags_); + ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, + lumacolortable, 1280, 1); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + lumacolortable, 1280, 1); + } + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); + EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); + EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); + EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); + } + + free_aligned_buffer_page_end(lumacolortable); +} + +TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) { + const int kSize = benchmark_width_ * benchmark_height_ * 4; + align_buffer_page_end(orig_pixels, kSize); + align_buffer_page_end(dst_pixels_opt, kSize); + align_buffer_page_end(dst_pixels_c, kSize); + + MemRandomize(orig_pixels, kSize); + MemRandomize(dst_pixels_opt, kSize); + memcpy(dst_pixels_c, dst_pixels_opt, kSize); + + MaskCpuFlags(disable_cpu_flags_); + ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + } + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(dst_pixels_c); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 4); + align_buffer_page_end(dst_pixels_opt, kPixels); + align_buffer_page_end(dst_pixels_c, kPixels); + + MemRandomize(src_pixels, kPixels * 4); + MemRandomize(dst_pixels_opt, kPixels); + memcpy(dst_pixels_c, dst_pixels_opt, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c, + benchmark_width_, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt, + benchmark_width_, benchmark_width_, benchmark_height_); + } + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(dst_pixels_c); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(src_pixels); +} + +TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 4); + align_buffer_page_end(dst_pixels_c, kPixels * 4); + + MemRandomize(orig_pixels, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 4); + memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4); + + MaskCpuFlags(disable_cpu_flags_); + ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + } + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(dst_pixels_c); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(orig_pixels); +} + +static int TestARGBRect(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off, + int bpp) { + if (width < 1) { + width = 1; + } + const int kStride = width * bpp; + const int kSize = kStride * height; + const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff); + + align_buffer_page_end(dst_argb_c, kSize + off); + align_buffer_page_end(dst_argb_opt, kSize + off); + + MemRandomize(dst_argb_c + off, kSize); + memcpy(dst_argb_opt + off, dst_argb_c + off, kSize); + + MaskCpuFlags(disable_cpu_flags); + if (bpp == 4) { + ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32); + } else { + SetPlane(dst_argb_c + off, kStride, width, invert * height, v32); + } + + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + if (bpp == 4) { + ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32); + } else { + SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32); + } + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = abs(static_cast(dst_argb_c[i + off]) - + static_cast(dst_argb_opt[i + off])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBRect_Any) { + int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 0, 4); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) { + int max_diff = + TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBRect_Invert) { + int max_diff = + TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBRect_Opt) { + int max_diff = + TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, SetPlane_Any) { + int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 0, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) { + int max_diff = + TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, SetPlane_Invert) { + int max_diff = + TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, SetPlane_Opt) { + int max_diff = + TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_u, kPixels); + align_buffer_page_end(src_pixels_v, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_c, kPixels * 2); + + MemRandomize(src_pixels_u, kPixels); + MemRandomize(src_pixels_v, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 2); + MemRandomize(dst_pixels_c, kPixels * 2); + + MaskCpuFlags(disable_cpu_flags_); + MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_, + dst_pixels_c, benchmark_width_ * 2, benchmark_width_, + benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_, + dst_pixels_opt, benchmark_width_ * 2, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels_u); + free_aligned_buffer_page_end(src_pixels_v); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +// 16 bit channel split and merge +TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_u, kPixels * 2); + align_buffer_page_end(src_pixels_v, kPixels * 2); + align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2); + align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2); + MemRandomize(src_pixels_u, kPixels * 2); + MemRandomize(src_pixels_v, kPixels * 2); + MemRandomize(dst_pixels_opt, kPixels * 2 * 2); + MemRandomize(dst_pixels_c, kPixels * 2 * 2); + + MaskCpuFlags(disable_cpu_flags_); + MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_, + (const uint16_t*)src_pixels_v, benchmark_width_, + (uint16_t*)dst_pixels_c, benchmark_width_ * 2, + benchmark_width_, benchmark_height_, 12); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_, + (const uint16_t*)src_pixels_v, benchmark_width_, + (uint16_t*)dst_pixels_opt, benchmark_width_ * 2, + benchmark_width_, benchmark_height_, 12); + } + + for (int i = 0; i < kPixels * 2 * 2; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + free_aligned_buffer_page_end(src_pixels_u); + free_aligned_buffer_page_end(src_pixels_v); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 2); + align_buffer_page_end(dst_pixels_u_c, kPixels); + align_buffer_page_end(dst_pixels_v_c, kPixels); + align_buffer_page_end(dst_pixels_u_opt, kPixels); + align_buffer_page_end(dst_pixels_v_opt, kPixels); + + MemRandomize(src_pixels, kPixels * 2); + MemRandomize(dst_pixels_u_c, kPixels); + MemRandomize(dst_pixels_v_c, kPixels); + MemRandomize(dst_pixels_u_opt, kPixels); + MemRandomize(dst_pixels_v_opt, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_c, + benchmark_width_, dst_pixels_v_c, benchmark_width_, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_opt, + benchmark_width_, dst_pixels_v_opt, benchmark_width_, + benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]); + EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_u_c); + free_aligned_buffer_page_end(dst_pixels_v_c); + free_aligned_buffer_page_end(dst_pixels_u_opt); + free_aligned_buffer_page_end(dst_pixels_v_opt); +} + +// 16 bit channel split +TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 2 * 2); + align_buffer_page_end(dst_pixels_u_c, kPixels * 2); + align_buffer_page_end(dst_pixels_v_c, kPixels * 2); + align_buffer_page_end(dst_pixels_u_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_v_opt, kPixels * 2); + MemRandomize(src_pixels, kPixels * 2 * 2); + MemRandomize(dst_pixels_u_c, kPixels * 2); + MemRandomize(dst_pixels_v_c, kPixels * 2); + MemRandomize(dst_pixels_u_opt, kPixels * 2); + MemRandomize(dst_pixels_v_opt, kPixels * 2); + + MaskCpuFlags(disable_cpu_flags_); + SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2, + (uint16_t*)dst_pixels_u_c, benchmark_width_, + (uint16_t*)dst_pixels_v_c, benchmark_width_, benchmark_width_, + benchmark_height_, 10); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2, + (uint16_t*)dst_pixels_u_opt, benchmark_width_, + (uint16_t*)dst_pixels_v_opt, benchmark_width_, + benchmark_width_, benchmark_height_, 10); + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]); + EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]); + } + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_u_c); + free_aligned_buffer_page_end(dst_pixels_v_c); + free_aligned_buffer_page_end(dst_pixels_u_opt); + free_aligned_buffer_page_end(dst_pixels_v_opt); +} + +TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) { + // Round count up to multiple of 16 + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 2); + align_buffer_page_end(dst_pixels_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_c, kPixels * 2); + + MemRandomize(src_pixels, kPixels * 2); + MemRandomize(dst_pixels_opt, kPixels * 2); + MemRandomize(dst_pixels_c, kPixels * 2); + + MaskCpuFlags(disable_cpu_flags_); + SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c, + benchmark_width_ * 2, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt, + benchmark_width_ * 2, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) { + // Round count up to multiple of 16 + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 3); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 3); + align_buffer_page_end(dst_pixels_c, kPixels * 3); + + MemRandomize(src_pixels, kPixels * 3); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 3); + MemRandomize(dst_pixels_c, kPixels * 3); + + MaskCpuFlags(disable_cpu_flags_); + SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, benchmark_width_, benchmark_height_); + MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, dst_pixels_c, + benchmark_width_ * 3, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, benchmark_width_, benchmark_height_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, + benchmark_width_, tmp_pixels_b, benchmark_width_, + dst_pixels_opt, benchmark_width_ * 3, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < kPixels * 3; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) { + // Round count up to multiple of 16 + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 3); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 3); + align_buffer_page_end(dst_pixels_c, kPixels * 3); + + MemRandomize(src_pixels, kPixels * 3); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 3); + MemRandomize(dst_pixels_c, kPixels * 3); + + MaskCpuFlags(disable_cpu_flags_); + SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, benchmark_width_, benchmark_height_); + MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, dst_pixels_c, + benchmark_width_ * 3, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, benchmark_width_, + benchmark_height_); + } + MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, dst_pixels_opt, + benchmark_width_ * 3, benchmark_width_, benchmark_height_); + + for (int i = 0; i < kPixels * 3; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 4); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(tmp_pixels_a, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 4); + align_buffer_page_end(dst_pixels_c, kPixels * 4); + + MemRandomize(src_pixels, kPixels * 4); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(tmp_pixels_a, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 4); + MemRandomize(dst_pixels_c, kPixels * 4); + + MaskCpuFlags(disable_cpu_flags_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, tmp_pixels_a, benchmark_width_, + benchmark_width_, benchmark_height_); + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_, + dst_pixels_c, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + + MaskCpuFlags(benchmark_cpu_info_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, tmp_pixels_a, benchmark_width_, + benchmark_width_, benchmark_height_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, + benchmark_width_, tmp_pixels_b, benchmark_width_, + tmp_pixels_a, benchmark_width_, dst_pixels_opt, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(tmp_pixels_a); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 4); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(tmp_pixels_a, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 4); + align_buffer_page_end(dst_pixels_c, kPixels * 4); + + MemRandomize(src_pixels, kPixels * 4); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(tmp_pixels_a, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 4); + MemRandomize(dst_pixels_c, kPixels * 4); + + MaskCpuFlags(disable_cpu_flags_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, tmp_pixels_a, benchmark_width_, + benchmark_width_, benchmark_height_); + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_, + dst_pixels_c, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + + MaskCpuFlags(benchmark_cpu_info_); + for (int i = 0; i < benchmark_iterations_; ++i) { + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, tmp_pixels_a, + benchmark_width_, benchmark_width_, benchmark_height_); + } + + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_, + dst_pixels_opt, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(tmp_pixels_a); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 4); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 4); + align_buffer_page_end(dst_pixels_c, kPixels * 4); + + MemRandomize(src_pixels, kPixels * 4); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 4); + MemRandomize(dst_pixels_c, kPixels * 4); + + MaskCpuFlags(disable_cpu_flags_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, NULL, 0, benchmark_width_, + benchmark_height_); + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + + MaskCpuFlags(benchmark_cpu_info_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, NULL, 0, benchmark_width_, + benchmark_height_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, + benchmark_width_, tmp_pixels_b, benchmark_width_, NULL, 0, + dst_pixels_opt, benchmark_width_ * 4, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 4); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 4); + align_buffer_page_end(dst_pixels_c, kPixels * 4); + + MemRandomize(src_pixels, kPixels * 4); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 4); + MemRandomize(dst_pixels_c, kPixels * 4); + + MaskCpuFlags(disable_cpu_flags_); + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, NULL, 0, benchmark_width_, + benchmark_height_); + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + + MaskCpuFlags(benchmark_cpu_info_); + for (int i = 0; i < benchmark_iterations_; ++i) { + SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, NULL, 0, benchmark_width_, + benchmark_height_); + } + + MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_opt, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +// Merge 4 channels +#define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \ + TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \ + const int kWidth = W1280; \ + const int kPixels = kWidth * benchmark_height_; \ + align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \ + align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \ + MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_a, kPixels * sizeof(STYPE) + OFF); \ + memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \ + memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \ + STYPE* src_pixels_r = reinterpret_cast(src_memory_r + OFF); \ + STYPE* src_pixels_g = reinterpret_cast(src_memory_g + OFF); \ + STYPE* src_pixels_b = reinterpret_cast(src_memory_b + OFF); \ + STYPE* src_pixels_a = reinterpret_cast(src_memory_a + OFF); \ + DTYPE* dst_pixels_c = reinterpret_cast(dst_memory_c); \ + DTYPE* dst_pixels_opt = reinterpret_cast(dst_memory_opt); \ + MaskCpuFlags(disable_cpu_flags_); \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4, \ + kWidth, NEG benchmark_height_, DEPTH); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, src_pixels_a, kWidth, dst_pixels_opt, kWidth * 4, \ + kWidth, NEG benchmark_height_, DEPTH); \ + } \ + for (int i = 0; i < kPixels * 4; ++i) { \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_memory_r); \ + free_aligned_buffer_page_end(src_memory_g); \ + free_aligned_buffer_page_end(src_memory_b); \ + free_aligned_buffer_page_end(src_memory_a); \ + free_aligned_buffer_page_end(dst_memory_c); \ + free_aligned_buffer_page_end(dst_memory_opt); \ + } + +// Merge 3 channel RGB into 4 channel XRGB with opaque alpha +#define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \ + TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) { \ + const int kWidth = W1280; \ + const int kPixels = kWidth * benchmark_height_; \ + align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \ + align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \ + MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \ + memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \ + STYPE* src_pixels_r = reinterpret_cast(src_memory_r + OFF); \ + STYPE* src_pixels_g = reinterpret_cast(src_memory_g + OFF); \ + STYPE* src_pixels_b = reinterpret_cast(src_memory_b + OFF); \ + DTYPE* dst_pixels_c = reinterpret_cast(dst_memory_c); \ + DTYPE* dst_pixels_opt = reinterpret_cast(dst_memory_opt); \ + MaskCpuFlags(disable_cpu_flags_); \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth, \ + NEG benchmark_height_, DEPTH); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, NULL, 0, dst_pixels_opt, kWidth * 4, kWidth, \ + NEG benchmark_height_, DEPTH); \ + } \ + for (int i = 0; i < kPixels * 4; ++i) { \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_memory_r); \ + free_aligned_buffer_page_end(src_memory_g); \ + free_aligned_buffer_page_end(src_memory_b); \ + free_aligned_buffer_page_end(dst_memory_c); \ + free_aligned_buffer_page_end(dst_memory_opt); \ + } + +#define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \ + TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \ + TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \ + 2) \ + TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \ + TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) \ + TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, \ + 0) \ + TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \ + 2) \ + TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \ + TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) + +TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 10) +TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 12) +TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 16) +TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 10) +TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 12) +TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16) + +#define TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \ + TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \ + const int kWidth = W1280; \ + const int kPixels = kWidth * benchmark_height_; \ + align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \ + align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \ + MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \ + MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \ + STYPE* src_pixels_r = reinterpret_cast(src_memory_r + OFF); \ + STYPE* src_pixels_g = reinterpret_cast(src_memory_g + OFF); \ + STYPE* src_pixels_b = reinterpret_cast(src_memory_b + OFF); \ + DTYPE* dst_pixels_c = reinterpret_cast(dst_memory_c); \ + DTYPE* dst_pixels_opt = reinterpret_cast(dst_memory_opt); \ + memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE)); \ + memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE)); \ + MaskCpuFlags(disable_cpu_flags_); \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, dst_pixels_c, kWidth * 4, kWidth, \ + NEG benchmark_height_, DEPTH); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \ + kWidth, dst_pixels_opt, kWidth * 4, kWidth, \ + NEG benchmark_height_, DEPTH); \ + } \ + for (int i = 0; i < kPixels * 4; ++i) { \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_memory_r); \ + free_aligned_buffer_page_end(src_memory_g); \ + free_aligned_buffer_page_end(src_memory_b); \ + free_aligned_buffer_page_end(dst_memory_c); \ + free_aligned_buffer_page_end(dst_memory_opt); \ + } + +#define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \ + TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \ + TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \ + 2) \ + TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \ + TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) + +TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10) +TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12) +TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16) + +// TODO(fbarchard): improve test for platforms and cpu detect +#ifdef HAS_MERGEUVROW_16_AVX2 +TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { + // Round count up to multiple of 8 + const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; + + align_buffer_page_end(src_pixels_u, kPixels * 2); + align_buffer_page_end(src_pixels_v, kPixels * 2); + align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2); + align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2); + + MemRandomize(src_pixels_u, kPixels * 2); + MemRandomize(src_pixels_v, kPixels * 2); + memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2); + memset(dst_pixels_uv_c, 1, kPixels * 2 * 2); + + MergeUVRow_16_C(reinterpret_cast(src_pixels_u), + reinterpret_cast(src_pixels_v), + reinterpret_cast(dst_pixels_uv_c), 16, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + MergeUVRow_16_AVX2(reinterpret_cast(src_pixels_u), + reinterpret_cast(src_pixels_v), + reinterpret_cast(dst_pixels_uv_opt), 16, + kPixels); + } else { + MergeUVRow_16_C(reinterpret_cast(src_pixels_u), + reinterpret_cast(src_pixels_v), + reinterpret_cast(dst_pixels_uv_opt), 16, + kPixels); + } + } + + for (int i = 0; i < kPixels * 2 * 2; ++i) { + EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_u); + free_aligned_buffer_page_end(src_pixels_v); + free_aligned_buffer_page_end(dst_pixels_uv_opt); + free_aligned_buffer_page_end(dst_pixels_uv_c); +} +#endif + +// TODO(fbarchard): Improve test for more platforms. +#ifdef HAS_MULTIPLYROW_16_AVX2 +TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { + // Round count up to multiple of 32 + const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31; + + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_y_c, kPixels * 2); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels * 2); + memset(dst_pixels_y_c, 1, kPixels * 2); + + MultiplyRow_16_C(reinterpret_cast(src_pixels_y), + reinterpret_cast(dst_pixels_y_c), 64, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + MultiplyRow_16_AVX2(reinterpret_cast(src_pixels_y), + reinterpret_cast(dst_pixels_y_opt), 64, + kPixels); + } else { + MultiplyRow_16_C(reinterpret_cast(src_pixels_y), + reinterpret_cast(dst_pixels_y_opt), 64, + kPixels); + } + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} +#endif // HAS_MULTIPLYROW_16_AVX2 + +TEST_F(LibYUVPlanarTest, Convert16To8Plane) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + Convert16To8Plane(reinterpret_cast(src_pixels_y), + benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + Convert16To8Plane(reinterpret_cast(src_pixels_y), + benchmark_width_, dst_pixels_y_opt, benchmark_width_, + 16384, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} + +TEST_F(LibYUVPlanarTest, YUY2ToY) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt, + benchmark_width_, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} + +TEST_F(LibYUVPlanarTest, UYVYToY) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt, + benchmark_width_, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} + +#ifdef ENABLE_ROW_TESTS +// TODO(fbarchard): Improve test for more platforms. +#ifdef HAS_CONVERT16TO8ROW_AVX2 +TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { + // AVX2 does multiple of 32, so round count up + const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + // clamp source range to 10 bits. + for (int i = 0; i < kPixels; ++i) { + reinterpret_cast(src_pixels_y)[i] &= 1023; + } + + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + Convert16To8Row_C(reinterpret_cast(src_pixels_y), + dst_pixels_y_c, 16384, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + Convert16To8Row_AVX2(reinterpret_cast(src_pixels_y), + dst_pixels_y_opt, 16384, kPixels); + } else if (has_ssse3) { + Convert16To8Row_SSSE3(reinterpret_cast(src_pixels_y), + dst_pixels_y_opt, 16384, kPixels); + } else { + Convert16To8Row_C(reinterpret_cast(src_pixels_y), + dst_pixels_y_opt, 16384, kPixels); + } + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} +#endif // HAS_CONVERT16TO8ROW_AVX2 + +#ifdef HAS_UYVYTOYROW_NEON +TEST_F(LibYUVPlanarTest, UYVYToYRow_Opt) { + // NEON does multiple of 16, so round count up + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + UYVYToYRow_C(src_pixels_y, dst_pixels_y_c, kPixels); + + for (int i = 0; i < benchmark_iterations_; ++i) { + UYVYToYRow_NEON(src_pixels_y, dst_pixels_y_opt, kPixels); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} +#endif // HAS_UYVYTOYROW_NEON + +#endif // ENABLE_ROW_TESTS + +TEST_F(LibYUVPlanarTest, Convert8To16Plane) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels); + align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_y_c, kPixels * 2); + + MemRandomize(src_pixels_y, kPixels); + memset(dst_pixels_y_opt, 0, kPixels * 2); + memset(dst_pixels_y_c, 1, kPixels * 2); + + MaskCpuFlags(disable_cpu_flags_); + Convert8To16Plane(src_pixels_y, benchmark_width_, + reinterpret_cast(dst_pixels_y_c), + benchmark_width_, 1024, benchmark_width_, + benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + Convert8To16Plane(src_pixels_y, benchmark_width_, + reinterpret_cast(dst_pixels_y_opt), + benchmark_width_, 1024, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} + +#ifdef ENABLE_ROW_TESTS +// TODO(fbarchard): Improve test for more platforms. +#ifdef HAS_CONVERT8TO16ROW_AVX2 +TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) { + const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31; + align_buffer_page_end(src_pixels_y, kPixels); + align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_y_c, kPixels * 2); + + MemRandomize(src_pixels_y, kPixels); + memset(dst_pixels_y_opt, 0, kPixels * 2); + memset(dst_pixels_y_c, 1, kPixels * 2); + + Convert8To16Row_C(src_pixels_y, reinterpret_cast(dst_pixels_y_c), + 1024, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_sse2 = TestCpuFlag(kCpuHasSSE2); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + Convert8To16Row_AVX2(src_pixels_y, + reinterpret_cast(dst_pixels_y_opt), 1024, + kPixels); + } else if (has_sse2) { + Convert8To16Row_SSE2(src_pixels_y, + reinterpret_cast(dst_pixels_y_opt), 1024, + kPixels); + } else { + Convert8To16Row_C(src_pixels_y, + reinterpret_cast(dst_pixels_y_opt), 1024, + kPixels); + } + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} +#endif // HAS_CONVERT8TO16ROW_AVX2 + +float TestScaleMaxSamples(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + float scale, + bool opt) { + int i, j; + float max_c, max_opt = 0.f; + // NEON does multiple of 8, so round count up + const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; + align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48); + uint8_t* dst_c = orig_y + kPixels * 4 + 16; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32; + + // Randomize works but may contain some denormals affecting performance. + // MemRandomize(orig_y, kPixels * 4); + // large values are problematic. audio is really -1 to 1. + for (i = 0; i < kPixels; ++i) { + (reinterpret_cast(orig_y))[i] = sinf(static_cast(i) * 0.1f); + } + memset(dst_c, 0, kPixels * 4); + memset(dst_opt, 1, kPixels * 4); + + max_c = ScaleMaxSamples_C(reinterpret_cast(orig_y), + reinterpret_cast(dst_c), scale, kPixels); + + for (j = 0; j < benchmark_iterations; j++) { + if (opt) { +#ifdef HAS_SCALESUMSAMPLES_NEON + max_opt = ScaleMaxSamples_NEON(reinterpret_cast(orig_y), + reinterpret_cast(dst_opt), scale, + kPixels); +#else + max_opt = + ScaleMaxSamples_C(reinterpret_cast(orig_y), + reinterpret_cast(dst_opt), scale, kPixels); +#endif + } else { + max_opt = + ScaleMaxSamples_C(reinterpret_cast(orig_y), + reinterpret_cast(dst_opt), scale, kPixels); + } + } + + float max_diff = FAbs(max_opt - max_c); + for (i = 0; i < kPixels; ++i) { + float abs_diff = FAbs((reinterpret_cast(dst_c)[i]) - + (reinterpret_cast(dst_opt)[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) { + float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, false); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) { + float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, true); + EXPECT_EQ(0, diff); +} + +float TestScaleSumSamples(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + float scale, + bool opt) { + int i, j; + float sum_c, sum_opt = 0.f; + // NEON does multiple of 8, so round count up + const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; + align_buffer_page_end(orig_y, kPixels * 4 * 3); + uint8_t* dst_c = orig_y + kPixels * 4; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2; + + // Randomize works but may contain some denormals affecting performance. + // MemRandomize(orig_y, kPixels * 4); + // large values are problematic. audio is really -1 to 1. + for (i = 0; i < kPixels; ++i) { + (reinterpret_cast(orig_y))[i] = sinf(static_cast(i) * 0.1f); + } + memset(dst_c, 0, kPixels * 4); + memset(dst_opt, 1, kPixels * 4); + + sum_c = ScaleSumSamples_C(reinterpret_cast(orig_y), + reinterpret_cast(dst_c), scale, kPixels); + + for (j = 0; j < benchmark_iterations; j++) { + if (opt) { +#ifdef HAS_SCALESUMSAMPLES_NEON + sum_opt = ScaleSumSamples_NEON(reinterpret_cast(orig_y), + reinterpret_cast(dst_opt), scale, + kPixels); +#else + sum_opt = + ScaleSumSamples_C(reinterpret_cast(orig_y), + reinterpret_cast(dst_opt), scale, kPixels); +#endif + } else { + sum_opt = + ScaleSumSamples_C(reinterpret_cast(orig_y), + reinterpret_cast(dst_opt), scale, kPixels); + } + } + + float mse_opt = sum_opt / kPixels * 4; + float mse_c = sum_c / kPixels * 4; + float mse_error = FAbs(mse_opt - mse_c) / mse_c; + + // If the sum of a float is more than 4 million, small adds are round down on + // float and produce different results with vectorized sum vs scalar sum. + // Ignore the difference if the sum is large. + float max_diff = 0.f; + if (mse_error > 0.0001 && sum_c < 4000000) { // allow .01% difference of mse + max_diff = mse_error; + } + + for (i = 0; i < kPixels; ++i) { + float abs_diff = FAbs((reinterpret_cast(dst_c)[i]) - + (reinterpret_cast(dst_opt)[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) { + float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, false); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) { + float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, true); + EXPECT_EQ(0, diff); +} + +float TestScaleSamples(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + float scale, + bool opt) { + int i, j; + // NEON does multiple of 8, so round count up + const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; + align_buffer_page_end(orig_y, kPixels * 4 * 3); + uint8_t* dst_c = orig_y + kPixels * 4; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2; + + // Randomize works but may contain some denormals affecting performance. + // MemRandomize(orig_y, kPixels * 4); + // large values are problematic. audio is really -1 to 1. + for (i = 0; i < kPixels; ++i) { + (reinterpret_cast(orig_y))[i] = sinf(static_cast(i) * 0.1f); + } + memset(dst_c, 0, kPixels * 4); + memset(dst_opt, 1, kPixels * 4); + + ScaleSamples_C(reinterpret_cast(orig_y), + reinterpret_cast(dst_c), scale, kPixels); + + for (j = 0; j < benchmark_iterations; j++) { + if (opt) { +#ifdef HAS_SCALESUMSAMPLES_NEON + ScaleSamples_NEON(reinterpret_cast(orig_y), + reinterpret_cast(dst_opt), scale, kPixels); +#else + ScaleSamples_C(reinterpret_cast(orig_y), + reinterpret_cast(dst_opt), scale, kPixels); +#endif + } else { + ScaleSamples_C(reinterpret_cast(orig_y), + reinterpret_cast(dst_opt), scale, kPixels); + } + } + + float max_diff = 0.f; + for (i = 0; i < kPixels; ++i) { + float abs_diff = FAbs((reinterpret_cast(dst_c)[i]) - + (reinterpret_cast(dst_opt)[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestScaleSamples_C) { + float diff = TestScaleSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, false); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) { + float diff = TestScaleSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, true); + EXPECT_EQ(0, diff); +} + +float TestCopySamples(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + bool opt) { + int i, j; + // NEON does multiple of 16 floats, so round count up + const int kPixels = (benchmark_width * benchmark_height + 15) & ~15; + align_buffer_page_end(orig_y, kPixels * 4 * 3); + uint8_t* dst_c = orig_y + kPixels * 4; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2; + + // Randomize works but may contain some denormals affecting performance. + // MemRandomize(orig_y, kPixels * 4); + // large values are problematic. audio is really -1 to 1. + for (i = 0; i < kPixels; ++i) { + (reinterpret_cast(orig_y))[i] = sinf(static_cast(i) * 0.1f); + } + memset(dst_c, 0, kPixels * 4); + memset(dst_opt, 1, kPixels * 4); + + memcpy(reinterpret_cast(dst_c), reinterpret_cast(orig_y), + kPixels * 4); + + for (j = 0; j < benchmark_iterations; j++) { + if (opt) { +#ifdef HAS_COPYROW_NEON + CopyRow_NEON(orig_y, dst_opt, kPixels * 4); +#else + CopyRow_C(orig_y, dst_opt, kPixels * 4); +#endif + } else { + CopyRow_C(orig_y, dst_opt, kPixels * 4); + } + } + + float max_diff = 0.f; + for (i = 0; i < kPixels; ++i) { + float abs_diff = FAbs((reinterpret_cast(dst_c)[i]) - + (reinterpret_cast(dst_opt)[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestCopySamples_C) { + float diff = TestCopySamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, false); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) { + float diff = TestCopySamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, true); + EXPECT_EQ(0, diff); +} + +extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width); +extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width); + +TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) { + SIMD_ALIGNED(uint32_t orig_pixels[1280 + 8]); + SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); + SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); + + for (int i = 0; i < 1280 + 8; ++i) { + orig_pixels[i] = i * 256; + } + GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && \ + (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280); + } else { + GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280); + } +#else + GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280); +#endif + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + EXPECT_EQ(dst_pixels_c[0], + static_cast(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1)); + EXPECT_EQ(dst_pixels_c[639], static_cast(10256)); +} + +extern "C" void GaussCol_NEON(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, + int width); + +extern "C" void GaussCol_C(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, + int width); + +TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { + SIMD_ALIGNED(uint16_t orig_pixels[1280 * 5]); + SIMD_ALIGNED(uint32_t dst_pixels_c[1280]); + SIMD_ALIGNED(uint32_t dst_pixels_opt[1280]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); + + for (int i = 0; i < 1280 * 5; ++i) { + orig_pixels[i] = static_cast(i); + } + GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], &dst_pixels_c[0], + 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && \ + (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + GaussCol_NEON(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], + &dst_pixels_opt[0], 1280); + } else { + GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], + &dst_pixels_opt[0], 1280); + } +#else + GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], + &dst_pixels_opt[0], 1280); +#endif + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } +} + +TEST_F(LibYUVPlanarTest, TestGaussRow_F32_Opt) { + SIMD_ALIGNED(float orig_pixels[1280 + 4]); + SIMD_ALIGNED(float dst_pixels_c[1280]); + SIMD_ALIGNED(float dst_pixels_opt[1280]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); + + for (int i = 0; i < 1280 + 4; ++i) { + orig_pixels[i] = static_cast(i); + } + GaussRow_F32_C(&orig_pixels[0], &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + GaussRow_F32_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280); + } else { + GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280); + } +#else + GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280); +#endif + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } +} + +TEST_F(LibYUVPlanarTest, TestGaussCol_F32_Opt) { + SIMD_ALIGNED(float dst_pixels_c[1280]); + SIMD_ALIGNED(float dst_pixels_opt[1280]); + align_buffer_page_end(orig_pixels_buf, 1280 * 5 * 4); // 5 rows + float* orig_pixels = reinterpret_cast(orig_pixels_buf); + + memset(orig_pixels, 0, 1280 * 5 * 4); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); + + for (int i = 0; i < 1280 * 5; ++i) { + orig_pixels[i] = static_cast(i); + } + GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], + &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + GaussCol_F32_NEON(&orig_pixels[0], &orig_pixels[1280], + &orig_pixels[1280 * 2], &orig_pixels[1280 * 3], + &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280); + } else { + GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], + &orig_pixels[1280 * 2], &orig_pixels[1280 * 3], + &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280); + } +#else + GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2], + &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], + &dst_pixels_opt[0], 1280); +#endif + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + free_aligned_buffer_page_end(orig_pixels_buf); +} + +TEST_F(LibYUVPlanarTest, SwapUVRow) { + const int kPixels = benchmark_width_ * benchmark_height_; + void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = + SwapUVRow_C; + + align_buffer_page_end(src_pixels_vu, kPixels * 2); + align_buffer_page_end(dst_pixels_uv, kPixels * 2); + MemRandomize(src_pixels_vu, kPixels * 2); + memset(dst_pixels_uv, 1, kPixels * 2); + +#if defined(HAS_SWAPUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + SwapUVRow = SwapUVRow_Any_NEON; + if (IS_ALIGNED(kPixels, 16)) { + SwapUVRow = SwapUVRow_NEON; + } + } +#endif + + for (int j = 0; j < benchmark_iterations_; j++) { + SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels); + } + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]); + EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]); + } + + free_aligned_buffer_page_end(src_pixels_vu); + free_aligned_buffer_page_end(dst_pixels_uv); +} +#endif // ENABLE_ROW_TESTS + +TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) { + const int kSize = benchmark_width_ * benchmark_height_ * 4; + align_buffer_page_end(orig_pixels, kSize); + align_buffer_page_end(dst_pixels_opt, kSize); + align_buffer_page_end(dst_pixels_c, kSize); + + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + ((float*)(orig_pixels))[i] = (i & 1023) * 3.14f; + } + memset(dst_pixels_opt, 1, kSize); + memset(dst_pixels_c, 2, kSize); + + MaskCpuFlags(disable_cpu_flags_); + GaussPlane_F32((const float*)(orig_pixels), benchmark_width_, + (float*)(dst_pixels_c), benchmark_width_, benchmark_width_, + benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + GaussPlane_F32((const float*)(orig_pixels), benchmark_width_, + (float*)(dst_pixels_opt), benchmark_width_, benchmark_width_, + benchmark_height_); + } + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + EXPECT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f) + << i; + } + + free_aligned_buffer_page_end(dst_pixels_c); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) { + int dst_width = (benchmark_width_ + 1) / 2; + int dst_height = (benchmark_height_ + 1) / 2; + align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_pixels_v, benchmark_width_ * benchmark_height_); + align_buffer_page_end(tmp_pixels_u, dst_width * dst_height); + align_buffer_page_end(tmp_pixels_v, dst_width * dst_height); + align_buffer_page_end(dst_pixels_uv_opt, dst_width * 2 * dst_height); + align_buffer_page_end(dst_pixels_uv_c, dst_width * 2 * dst_height); + + MemRandomize(src_pixels_u, benchmark_width_ * benchmark_height_); + MemRandomize(src_pixels_v, benchmark_width_ * benchmark_height_); + MemRandomize(tmp_pixels_u, dst_width * dst_height); + MemRandomize(tmp_pixels_v, dst_width * dst_height); + MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height); + MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height); + + MaskCpuFlags(disable_cpu_flags_); + HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, + benchmark_width_, dst_pixels_uv_c, dst_width * 2, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, + benchmark_width_, dst_pixels_uv_opt, dst_width * 2, + benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < dst_width * 2 * dst_height; ++i) { + EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels_u); + free_aligned_buffer_page_end(src_pixels_v); + free_aligned_buffer_page_end(tmp_pixels_u); + free_aligned_buffer_page_end(tmp_pixels_v); + free_aligned_buffer_page_end(dst_pixels_uv_opt); + free_aligned_buffer_page_end(dst_pixels_uv_c); +} + +TEST_F(LibYUVPlanarTest, NV12Copy) { + const int halfwidth = (benchmark_width_ + 1) >> 1; + const int halfheight = (benchmark_height_ + 1) >> 1; + align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_uv, halfwidth * 2 * halfheight); + align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_); + align_buffer_page_end(dst_uv, halfwidth * 2 * halfheight); + + MemRandomize(src_y, benchmark_width_ * benchmark_height_); + MemRandomize(src_uv, halfwidth * 2 * halfheight); + MemRandomize(dst_y, benchmark_width_ * benchmark_height_); + MemRandomize(dst_uv, halfwidth * 2 * halfheight); + + for (int i = 0; i < benchmark_iterations_; ++i) { + NV12Copy(src_y, benchmark_width_, src_uv, halfwidth * 2, dst_y, + benchmark_width_, dst_uv, halfwidth * 2, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + EXPECT_EQ(src_y[i], dst_y[i]); + } + for (int i = 0; i < halfwidth * 2 * halfheight; ++i) { + EXPECT_EQ(src_uv[i], dst_uv[i]); + } + + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_uv); + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVPlanarTest, NV21Copy) { + const int halfwidth = (benchmark_width_ + 1) >> 1; + const int halfheight = (benchmark_height_ + 1) >> 1; + align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_vu, halfwidth * 2 * halfheight); + align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_); + align_buffer_page_end(dst_vu, halfwidth * 2 * halfheight); + + MemRandomize(src_y, benchmark_width_ * benchmark_height_); + MemRandomize(src_vu, halfwidth * 2 * halfheight); + MemRandomize(dst_y, benchmark_width_ * benchmark_height_); + MemRandomize(dst_vu, halfwidth * 2 * halfheight); + + for (int i = 0; i < benchmark_iterations_; ++i) { + NV21Copy(src_y, benchmark_width_, src_vu, halfwidth * 2, dst_y, + benchmark_width_, dst_vu, halfwidth * 2, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { + EXPECT_EQ(src_y[i], dst_y[i]); + } + for (int i = 0; i < halfwidth * 2 * halfheight; ++i) { + EXPECT_EQ(src_vu[i], dst_vu[i]); + } + + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_vu); + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_vu); +} + +} // namespace libyuv diff --git a/unit_test/rotate_argb_test.cc b/unit_test/rotate_argb_test.cc new file mode 100644 index 00000000..74952c4e --- /dev/null +++ b/unit_test/rotate_argb_test.cc @@ -0,0 +1,334 @@ +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/cpu_id.h" +#include "libyuv/rotate_argb.h" + +namespace libyuv { + +void TestRotateBpp(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + const int kBpp) { + if (src_width < 1) { + src_width = 1; + } + if (src_height < 1) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_stride_argb = src_width * kBpp; + int src_argb_plane_size = src_stride_argb * abs(src_height); + align_buffer_page_end(src_argb, src_argb_plane_size); + for (int i = 0; i < src_argb_plane_size; ++i) { + src_argb[i] = fastrand() & 0xff; + } + + int dst_stride_argb = dst_width * kBpp; + int dst_argb_plane_size = dst_stride_argb * dst_height; + align_buffer_page_end(dst_argb_c, dst_argb_plane_size); + align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); + memset(dst_argb_c, 2, dst_argb_plane_size); + memset(dst_argb_opt, 3, dst_argb_plane_size); + + if (kBpp == 1) { + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + RotatePlane(src_argb, src_stride_argb, dst_argb_c, dst_stride_argb, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + RotatePlane(src_argb, src_stride_argb, dst_argb_opt, dst_stride_argb, + src_width, src_height, mode); + } + } else if (kBpp == 4) { + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + ARGBRotate(src_argb, src_stride_argb, dst_argb_c, dst_stride_argb, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBRotate(src_argb, src_stride_argb, dst_argb_opt, dst_stride_argb, + src_width, src_height, mode); + } + } + + // Rotation should be exact. + for (int i = 0; i < dst_argb_plane_size; ++i) { + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); + } + + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + free_aligned_buffer_page_end(src_argb); +} + +static void ARGBTestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + TestRotateBpp(src_width, src_height, dst_width, dst_height, mode, + benchmark_iterations, disable_cpu_flags, benchmark_cpu_info, 4); +} + +TEST_F(LibYUVRotateTest, ARGBRotate0_Opt) { + ARGBTestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, ARGBRotate90_Opt) { + ARGBTestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, ARGBRotate180_Opt) { + ARGBTestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, ARGBRotate270_Opt) { + ARGBTestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +static void TestRotatePlane(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + TestRotateBpp(src_width, src_height, dst_width, dst_height, mode, + benchmark_iterations, disable_cpu_flags, benchmark_cpu_info, 1); +} + +TEST_F(LibYUVRotateTest, RotatePlane0_Opt) { + TestRotatePlane(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane90_Opt) { + TestRotatePlane(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane180_Opt) { + TestRotatePlane(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane270_Opt) { + TestRotatePlane(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_RotatePlane0_Odd) { + TestRotatePlane(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate0, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_RotatePlane90_Odd) { + TestRotatePlane(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate90, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_RotatePlane180_Odd) { + TestRotatePlane(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate180, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_RotatePlane270_Odd) { + TestRotatePlane(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate270, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) { + int argb_plane_size = benchmark_width_ * 4 * abs(benchmark_height_); + + align_buffer_page_end(src_argb, argb_plane_size); + align_buffer_page_end(dst_argb, argb_plane_size); + + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, + benchmark_width_ * 4, benchmark_width_, + benchmark_height_, kRotate0)); + + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, + benchmark_width_ * 4 - 1, benchmark_width_ - 1, + benchmark_height_, kRotate0)); + + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, + benchmark_width_ * 4, benchmark_width_, + benchmark_height_, kRotate180)); + + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, + benchmark_width_ * 4 - 1, benchmark_width_ - 1, + benchmark_height_, kRotate180)); + + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, + abs(benchmark_height_) * 4, benchmark_width_, + benchmark_height_, kRotate90)); + + EXPECT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, + abs(benchmark_height_) * 4, benchmark_width_ - 1, + benchmark_height_, kRotate90)); + + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, + abs(benchmark_height_) * 4, benchmark_width_, + benchmark_height_, kRotate270)); + + EXPECT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, + abs(benchmark_height_) * 4, benchmark_width_ - 1, + benchmark_height_, kRotate270)); + + free_aligned_buffer_page_end(dst_argb); + free_aligned_buffer_page_end(src_argb); +} + +static void TestRotatePlane_16(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height < 1) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_stride = src_width; + int src_plane_size = src_stride * abs(src_height); + align_buffer_page_end_16(src, src_plane_size); + for (int i = 0; i < src_plane_size; ++i) { + src[i] = fastrand() & 0xff; + } + + int dst_stride = dst_width; + int dst_plane_size = dst_stride * dst_height; + align_buffer_page_end_16(dst_c, dst_plane_size); + align_buffer_page_end_16(dst_opt, dst_plane_size); + memset(dst_c, 2, dst_plane_size); + memset(dst_opt, 3, dst_plane_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + RotatePlane_16(src, src_stride, dst_c, dst_stride, src_width, src_height, + mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + RotatePlane_16(src, src_stride, dst_opt, dst_stride, src_width, src_height, + mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_c); + free_aligned_buffer_page_end_16(dst_opt); + free_aligned_buffer_page_end_16(src); +} + +TEST_F(LibYUVRotateTest, RotatePlane0_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane90_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane180_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane270_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane0_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate0, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane90_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate90, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane180_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate180, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane270_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate270, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +} // namespace libyuv diff --git a/unit_test/rotate_test.cc b/unit_test/rotate_test.cc new file mode 100644 index 00000000..abc08efa --- /dev/null +++ b/unit_test/rotate_test.cc @@ -0,0 +1,962 @@ +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/cpu_id.h" +#include "libyuv/rotate.h" + +#ifdef ENABLE_ROW_TESTS +#include "libyuv/rotate_row.h" +#endif + +namespace libyuv { + +#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) + +static void I420TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i420_y_size = src_width * Abs(src_height); + int src_i420_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2); + int src_i420_size = src_i420_y_size + src_i420_uv_size * 2; + align_buffer_page_end(src_i420, src_i420_size); + for (int i = 0; i < src_i420_size; ++i) { + src_i420[i] = fastrand() & 0xff; + } + + int dst_i420_y_size = dst_width * dst_height; + int dst_i420_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2); + int dst_i420_size = dst_i420_y_size + dst_i420_uv_size * 2; + align_buffer_page_end(dst_i420_c, dst_i420_size); + align_buffer_page_end(dst_i420_opt, dst_i420_size); + memset(dst_i420_c, 2, dst_i420_size); + memset(dst_i420_opt, 3, dst_i420_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I420Rotate(src_i420, src_width, src_i420 + src_i420_y_size, + (src_width + 1) / 2, src_i420 + src_i420_y_size + src_i420_uv_size, + (src_width + 1) / 2, dst_i420_c, dst_width, + dst_i420_c + dst_i420_y_size, (dst_width + 1) / 2, + dst_i420_c + dst_i420_y_size + dst_i420_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I420Rotate( + src_i420, src_width, src_i420 + src_i420_y_size, (src_width + 1) / 2, + src_i420 + src_i420_y_size + src_i420_uv_size, (src_width + 1) / 2, + dst_i420_opt, dst_width, dst_i420_opt + dst_i420_y_size, + (dst_width + 1) / 2, dst_i420_opt + dst_i420_y_size + dst_i420_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i420_size; ++i) { + EXPECT_EQ(dst_i420_c[i], dst_i420_opt[i]); + } + + free_aligned_buffer_page_end(dst_i420_c); + free_aligned_buffer_page_end(dst_i420_opt); + free_aligned_buffer_page_end(src_i420); +} + +TEST_F(LibYUVRotateTest, I420Rotate0_Opt) { + I420TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I420Rotate90_Opt) { + I420TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I420Rotate180_Opt) { + I420TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I420Rotate270_Opt) { + I420TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +// TODO(fbarchard): Remove odd width tests. +// Odd width tests work but disabled because they use C code and can be +// tested by passing an odd width command line or environment variable. +TEST_F(LibYUVRotateTest, DISABLED_I420Rotate0_Odd) { + I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate0, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I420Rotate90_Odd) { + I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate90, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I420Rotate180_Odd) { + I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate180, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) { + I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate270, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +static void I422TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i422_y_size = src_width * Abs(src_height); + int src_i422_uv_size = ((src_width + 1) / 2) * Abs(src_height); + int src_i422_size = src_i422_y_size + src_i422_uv_size * 2; + align_buffer_page_end(src_i422, src_i422_size); + for (int i = 0; i < src_i422_size; ++i) { + src_i422[i] = fastrand() & 0xff; + } + + int dst_i422_y_size = dst_width * dst_height; + int dst_i422_uv_size = ((dst_width + 1) / 2) * dst_height; + int dst_i422_size = dst_i422_y_size + dst_i422_uv_size * 2; + align_buffer_page_end(dst_i422_c, dst_i422_size); + align_buffer_page_end(dst_i422_opt, dst_i422_size); + memset(dst_i422_c, 2, dst_i422_size); + memset(dst_i422_opt, 3, dst_i422_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I422Rotate(src_i422, src_width, src_i422 + src_i422_y_size, + (src_width + 1) / 2, src_i422 + src_i422_y_size + src_i422_uv_size, + (src_width + 1) / 2, dst_i422_c, dst_width, + dst_i422_c + dst_i422_y_size, (dst_width + 1) / 2, + dst_i422_c + dst_i422_y_size + dst_i422_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I422Rotate( + src_i422, src_width, src_i422 + src_i422_y_size, (src_width + 1) / 2, + src_i422 + src_i422_y_size + src_i422_uv_size, (src_width + 1) / 2, + dst_i422_opt, dst_width, dst_i422_opt + dst_i422_y_size, + (dst_width + 1) / 2, dst_i422_opt + dst_i422_y_size + dst_i422_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i422_size; ++i) { + EXPECT_EQ(dst_i422_c[i], dst_i422_opt[i]); + } + + free_aligned_buffer_page_end(dst_i422_c); + free_aligned_buffer_page_end(dst_i422_opt); + free_aligned_buffer_page_end(src_i422); +} + +TEST_F(LibYUVRotateTest, I422Rotate0_Opt) { + I422TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I422Rotate90_Opt) { + I422TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I422Rotate180_Opt) { + I422TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I422Rotate270_Opt) { + I422TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +static void I444TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i444_y_size = src_width * Abs(src_height); + int src_i444_uv_size = src_width * Abs(src_height); + int src_i444_size = src_i444_y_size + src_i444_uv_size * 2; + align_buffer_page_end(src_i444, src_i444_size); + for (int i = 0; i < src_i444_size; ++i) { + src_i444[i] = fastrand() & 0xff; + } + + int dst_i444_y_size = dst_width * dst_height; + int dst_i444_uv_size = dst_width * dst_height; + int dst_i444_size = dst_i444_y_size + dst_i444_uv_size * 2; + align_buffer_page_end(dst_i444_c, dst_i444_size); + align_buffer_page_end(dst_i444_opt, dst_i444_size); + memset(dst_i444_c, 2, dst_i444_size); + memset(dst_i444_opt, 3, dst_i444_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width, + src_i444 + src_i444_y_size + src_i444_uv_size, src_width, + dst_i444_c, dst_width, dst_i444_c + dst_i444_y_size, dst_width, + dst_i444_c + dst_i444_y_size + dst_i444_uv_size, dst_width, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width, + src_i444 + src_i444_y_size + src_i444_uv_size, src_width, + dst_i444_opt, dst_width, dst_i444_opt + dst_i444_y_size, + dst_width, dst_i444_opt + dst_i444_y_size + dst_i444_uv_size, + dst_width, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i444_size; ++i) { + EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]); + } + + free_aligned_buffer_page_end(dst_i444_c); + free_aligned_buffer_page_end(dst_i444_opt); + free_aligned_buffer_page_end(src_i444); +} + +TEST_F(LibYUVRotateTest, I444Rotate0_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate90_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate180_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate270_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +// TODO(fbarchard): Remove odd width tests. +// Odd width tests work but disabled because they use C code and can be +// tested by passing an odd width command line or environment variable. +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) { + I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate0, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) { + I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate90, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) { + I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate180, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) { + I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate270, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +static void NV12TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { // allow negative for inversion test. + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_nv12_y_size = src_width * Abs(src_height); + int src_nv12_uv_size = + ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2) * 2; + int src_nv12_size = src_nv12_y_size + src_nv12_uv_size; + align_buffer_page_end(src_nv12, src_nv12_size); + for (int i = 0; i < src_nv12_size; ++i) { + src_nv12[i] = fastrand() & 0xff; + } + + int dst_i420_y_size = dst_width * dst_height; + int dst_i420_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2); + int dst_i420_size = dst_i420_y_size + dst_i420_uv_size * 2; + align_buffer_page_end(dst_i420_c, dst_i420_size); + align_buffer_page_end(dst_i420_opt, dst_i420_size); + memset(dst_i420_c, 2, dst_i420_size); + memset(dst_i420_opt, 3, dst_i420_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + NV12ToI420Rotate(src_nv12, src_width, src_nv12 + src_nv12_y_size, + (src_width + 1) & ~1, dst_i420_c, dst_width, + dst_i420_c + dst_i420_y_size, (dst_width + 1) / 2, + dst_i420_c + dst_i420_y_size + dst_i420_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + NV12ToI420Rotate(src_nv12, src_width, src_nv12 + src_nv12_y_size, + (src_width + 1) & ~1, dst_i420_opt, dst_width, + dst_i420_opt + dst_i420_y_size, (dst_width + 1) / 2, + dst_i420_opt + dst_i420_y_size + dst_i420_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i420_size; ++i) { + EXPECT_EQ(dst_i420_c[i], dst_i420_opt[i]); + } + + free_aligned_buffer_page_end(dst_i420_c); + free_aligned_buffer_page_end(dst_i420_opt); + free_aligned_buffer_page_end(src_nv12); +} + +TEST_F(LibYUVRotateTest, NV12Rotate0_Opt) { + NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, NV12Rotate90_Opt) { + NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, NV12Rotate180_Opt) { + NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, NV12Rotate270_Opt) { + NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate0_Odd) { + NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate0, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate90_Odd) { + NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate90, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate180_Odd) { + NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate180, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate270_Odd) { + NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate270, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, NV12Rotate0_Invert) { + NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, NV12Rotate90_Invert) { + NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, NV12Rotate180_Invert) { + NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) { + NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +// Test Android 420 to I420 Rotate +#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF, PN, OFF_U, OFF_V, ROT) \ + TEST_F(LibYUVRotateTest, \ + SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate##ROT##To##PN##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSizeUV = \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_uv, \ + kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + uint8_t* src_u = src_uv + OFF_U; \ + uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ + int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ + (fastrand() & 0xff); \ + src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ + (fastrand() & 0xff); \ + } \ + } \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_u_c, 2, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_c, 3, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_u_opt, 102, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_opt, 103, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \ + kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight, \ + (libyuv::RotationMode)ROT); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, \ + dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight, \ + (libyuv::RotationMode)ROT); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \ + SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \ + SUBSAMP_Y) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \ + _Any, +, 0, PN, OFF_U, OFF_V, 0) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \ + _Unaligned, +, 2, PN, OFF_U, OFF_V, 0) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \ + -, 0, PN, OFF_U, OFF_V, 0) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \ + 0, PN, OFF_U, OFF_V, 0) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \ + 0, PN, OFF_U, OFF_V, 180) + +TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2) +TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2) +TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) +#undef TESTAPLANARTOP +#undef TESTAPLANARTOPI + +static void I010TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i010_y_size = src_width * Abs(src_height); + int src_i010_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2); + int src_i010_size = src_i010_y_size + src_i010_uv_size * 2; + align_buffer_page_end_16(src_i010, src_i010_size); + for (int i = 0; i < src_i010_size; ++i) { + src_i010[i] = fastrand() & 0x3ff; + } + + int dst_i010_y_size = dst_width * dst_height; + int dst_i010_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2); + int dst_i010_size = dst_i010_y_size + dst_i010_uv_size * 2; + align_buffer_page_end_16(dst_i010_c, dst_i010_size); + align_buffer_page_end_16(dst_i010_opt, dst_i010_size); + memset(dst_i010_c, 2, dst_i010_size * 2); + memset(dst_i010_opt, 3, dst_i010_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I010Rotate(src_i010, src_width, src_i010 + src_i010_y_size, + (src_width + 1) / 2, src_i010 + src_i010_y_size + src_i010_uv_size, + (src_width + 1) / 2, dst_i010_c, dst_width, + dst_i010_c + dst_i010_y_size, (dst_width + 1) / 2, + dst_i010_c + dst_i010_y_size + dst_i010_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I010Rotate( + src_i010, src_width, src_i010 + src_i010_y_size, (src_width + 1) / 2, + src_i010 + src_i010_y_size + src_i010_uv_size, (src_width + 1) / 2, + dst_i010_opt, dst_width, dst_i010_opt + dst_i010_y_size, + (dst_width + 1) / 2, dst_i010_opt + dst_i010_y_size + dst_i010_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i010_size; ++i) { + EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i010_c); + free_aligned_buffer_page_end_16(dst_i010_opt); + free_aligned_buffer_page_end_16(src_i010); +} + +TEST_F(LibYUVRotateTest, I010Rotate0_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate90_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate180_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate270_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +static void I210TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i210_y_size = src_width * Abs(src_height); + int src_i210_uv_size = ((src_width + 1) / 2) * Abs(src_height); + int src_i210_size = src_i210_y_size + src_i210_uv_size * 2; + align_buffer_page_end_16(src_i210, src_i210_size); + for (int i = 0; i < src_i210_size; ++i) { + src_i210[i] = fastrand() & 0x3ff; + } + + int dst_i210_y_size = dst_width * dst_height; + int dst_i210_uv_size = ((dst_width + 1) / 2) * dst_height; + int dst_i210_size = dst_i210_y_size + dst_i210_uv_size * 2; + align_buffer_page_end_16(dst_i210_c, dst_i210_size); + align_buffer_page_end_16(dst_i210_opt, dst_i210_size); + memset(dst_i210_c, 2, dst_i210_size * 2); + memset(dst_i210_opt, 3, dst_i210_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I210Rotate(src_i210, src_width, src_i210 + src_i210_y_size, + (src_width + 1) / 2, src_i210 + src_i210_y_size + src_i210_uv_size, + (src_width + 1) / 2, dst_i210_c, dst_width, + dst_i210_c + dst_i210_y_size, (dst_width + 1) / 2, + dst_i210_c + dst_i210_y_size + dst_i210_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I210Rotate( + src_i210, src_width, src_i210 + src_i210_y_size, (src_width + 1) / 2, + src_i210 + src_i210_y_size + src_i210_uv_size, (src_width + 1) / 2, + dst_i210_opt, dst_width, dst_i210_opt + dst_i210_y_size, + (dst_width + 1) / 2, dst_i210_opt + dst_i210_y_size + dst_i210_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i210_size; ++i) { + EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i210_c); + free_aligned_buffer_page_end_16(dst_i210_opt); + free_aligned_buffer_page_end_16(src_i210); +} + +TEST_F(LibYUVRotateTest, I210Rotate0_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate90_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate180_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate270_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +static void I410TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i410_y_size = src_width * Abs(src_height); + int src_i410_uv_size = src_width * Abs(src_height); + int src_i410_size = src_i410_y_size + src_i410_uv_size * 2; + align_buffer_page_end_16(src_i410, src_i410_size); + for (int i = 0; i < src_i410_size; ++i) { + src_i410[i] = fastrand() & 0x3ff; + } + + int dst_i410_y_size = dst_width * dst_height; + int dst_i410_uv_size = dst_width * dst_height; + int dst_i410_size = dst_i410_y_size + dst_i410_uv_size * 2; + align_buffer_page_end_16(dst_i410_c, dst_i410_size); + align_buffer_page_end_16(dst_i410_opt, dst_i410_size); + memset(dst_i410_c, 2, dst_i410_size * 2); + memset(dst_i410_opt, 3, dst_i410_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width, + src_i410 + src_i410_y_size + src_i410_uv_size, src_width, + dst_i410_c, dst_width, dst_i410_c + dst_i410_y_size, dst_width, + dst_i410_c + dst_i410_y_size + dst_i410_uv_size, dst_width, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width, + src_i410 + src_i410_y_size + src_i410_uv_size, src_width, + dst_i410_opt, dst_width, dst_i410_opt + dst_i410_y_size, + dst_width, dst_i410_opt + dst_i410_y_size + dst_i410_uv_size, + dst_width, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i410_size; ++i) { + EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i410_c); + free_aligned_buffer_page_end_16(dst_i410_opt); + free_aligned_buffer_page_end_16(src_i410); +} + +TEST_F(LibYUVRotateTest, I410Rotate0_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate90_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate180_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate270_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +#if defined(ENABLE_ROW_TESTS) + +TEST_F(LibYUVRotateTest, Transpose4x4_Test) { + // dst width and height + const int width = 4; + const int height = 4; + int src_pixels[4][4]; + int dst_pixels_c[4][4]; + int dst_pixels_opt[4][4]; + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + src_pixels[i][j] = i * 10 + j; + } + } + memset(dst_pixels_c, 1, width * height * 4); + memset(dst_pixels_opt, 2, width * height * 4); + + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_c, width * 4, width); + + const int benchmark_iterations = + (benchmark_iterations_ * benchmark_width_ * benchmark_height_ + 15) / + (4 * 4); + for (int i = 0; i < benchmark_iterations; ++i) { +#if defined(HAS_TRANSPOSE4X4_32_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#elif defined(HAS_TRANSPOSE4X4_32_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#endif + { + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } + } + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + EXPECT_EQ(dst_pixels_c[i][j], src_pixels[j][i]); + EXPECT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]); + } + } +} + +TEST_F(LibYUVRotateTest, Transpose4x4_Opt) { + // dst width and height + const int width = ((benchmark_width_ * benchmark_height_ + 3) / 4 + 3) & ~3; + const int height = 4; + align_buffer_page_end(src_pixels, height * width * 4); + align_buffer_page_end(dst_pixels_c, width * height * 4); + align_buffer_page_end(dst_pixels_opt, width * height * 4); + + MemRandomize(src_pixels, height * width * 4); + memset(dst_pixels_c, 1, width * height * 4); + memset(dst_pixels_opt, 2, width * height * 4); + + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_c, width * 4, width); + + for (int i = 0; i < benchmark_iterations_; ++i) { +#if defined(HAS_TRANSPOSE4X4_32_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#elif defined(HAS_TRANSPOSE4X4_32_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Transpose4x4_32_AVX2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else if (TestCpuFlag(kCpuHasSSE2)) { + Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#endif + { + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } + } + + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_c); + free_aligned_buffer_page_end(dst_pixels_opt); +} + +#endif // ENABLE_ROW_TESTS + +} // namespace libyuv diff --git a/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc new file mode 100644 index 00000000..f54a68f1 --- /dev/null +++ b/unit_test/scale_argb_test.cc @@ -0,0 +1,588 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/convert_argb.h" +#include "libyuv/cpu_id.h" +#include "libyuv/scale_argb.h" +#include "libyuv/video_common.h" + +namespace libyuv { + +#define STRINGIZE(line) #line +#define FILELINESTR(file, line) file ":" STRINGIZE(line) + +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +// SLOW TESTS are those that are unoptimized C code. +// FULL TESTS are optimized but test many variations of the same code. +#define ENABLE_FULL_TESTS +#endif + +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int ARGBTestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i, j; + const int b = 0; // 128 to test for padding/stride. + int64_t src_argb_plane_size = + (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4LL; + int src_stride_argb = (b * 2 + Abs(src_width)) * 4; + + align_buffer_page_end(src_argb, src_argb_plane_size); + if (!src_argb) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_argb, src_argb_plane_size); + + int64_t dst_argb_plane_size = + (dst_width + b * 2) * (dst_height + b * 2) * 4LL; + int dst_stride_argb = (b * 2 + dst_width) * 4; + + align_buffer_page_end(dst_argb_c, dst_argb_plane_size); + align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); + if (!dst_argb_c || !dst_argb_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + memset(dst_argb_c, 2, dst_argb_plane_size); + memset(dst_argb_opt, 3, dst_argb_plane_size); + + // Warm up both versions for consistent benchmarks. + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, + src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4, + dst_stride_argb, dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, + src_width, src_height, dst_argb_opt + (dst_stride_argb * b) + b * 4, + dst_stride_argb, dst_width, dst_height, f); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + double c_time = get_time(); + ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, + src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4, + dst_stride_argb, dst_width, dst_height, f); + + c_time = (get_time() - c_time); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, + src_width, src_height, + dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, + dst_width, dst_height, f); + } + opt_time = (get_time() - opt_time) / benchmark_iterations; + + // Report performance of C vs OPT + printf("filter %d - %8d us C - %8d us OPT\n", f, + static_cast(c_time * 1e6), static_cast(opt_time * 1e6)); + + // C version may be a little off from the optimized. Order of + // operations may introduce rounding somewhere. So do a difference + // of the buffers and look to see that the max difference isn't + // over 2. + int max_diff = 0; + for (i = b; i < (dst_height + b); ++i) { + for (j = b * 4; j < (dst_width + b) * 4; ++j) { + int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - + dst_argb_opt[(i * dst_stride_argb) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + free_aligned_buffer_page_end(src_argb); + return max_diff; +} + +static const int kTileX = 64; +static const int kTileY = 64; + +static int TileARGBScale(const uint8_t* src_argb, + int src_stride_argb, + int src_width, + int src_height, + uint8_t* dst_argb, + int dst_stride_argb, + int dst_width, + int dst_height, + FilterMode filtering) { + for (int y = 0; y < dst_height; y += kTileY) { + for (int x = 0; x < dst_width; x += kTileX) { + int clip_width = kTileX; + if (x + clip_width > dst_width) { + clip_width = dst_width - x; + } + int clip_height = kTileY; + if (y + clip_height > dst_height) { + clip_height = dst_height - y; + } + int r = ARGBScaleClip(src_argb, src_stride_argb, src_width, src_height, + dst_argb, dst_stride_argb, dst_width, dst_height, x, + y, clip_width, clip_height, filtering); + if (r) { + return r; + } + } + } + return 0; +} + +static int ARGBClipTestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + const int b = 128; + int64_t src_argb_plane_size = + (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4; + int src_stride_argb = (b * 2 + Abs(src_width)) * 4; + + align_buffer_page_end(src_argb, src_argb_plane_size); + if (!src_argb) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + memset(src_argb, 1, src_argb_plane_size); + + int64_t dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; + int dst_stride_argb = (b * 2 + dst_width) * 4; + + int i, j; + for (i = b; i < (Abs(src_height) + b); ++i) { + for (j = b; j < (Abs(src_width) + b) * 4; ++j) { + src_argb[(i * src_stride_argb) + j] = (fastrand() & 0xff); + } + } + + align_buffer_page_end(dst_argb_c, dst_argb_plane_size); + align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); + if (!dst_argb_c || !dst_argb_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + memset(dst_argb_c, 2, dst_argb_plane_size); + memset(dst_argb_opt, 3, dst_argb_plane_size); + + // Do full image, no clipping. + double c_time = get_time(); + ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, + src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4, + dst_stride_argb, dst_width, dst_height, f); + c_time = (get_time() - c_time); + + // Do tiled image, clipping scale to a tile at a time. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + TileARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, + src_width, src_height, + dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, + dst_width, dst_height, f); + } + opt_time = (get_time() - opt_time) / benchmark_iterations; + + // Report performance of Full vs Tiled. + printf("filter %d - %8d us Full - %8d us Tiled\n", f, + static_cast(c_time * 1e6), static_cast(opt_time * 1e6)); + + // Compare full scaled image vs tiled image. + int max_diff = 0; + for (i = b; i < (dst_height + b); ++i) { + for (j = b * 4; j < (dst_width + b) * 4; ++j) { + int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - + dst_argb_opt[(i * dst_stride_argb) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + free_aligned_buffer_page_end(src_argb); + return max_diff; +} + +// The following adjustments in dimensions ensure the scale factor will be +// exactly achieved. +#define DX(x, nom, denom) static_cast((Abs(x) / nom) * nom) +#define SX(x, nom, denom) static_cast((x / nom) * denom) + +#define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff) \ + TEST_F(LibYUVScaleTest, ARGBScaleDownBy##name##_##filter) { \ + int diff = ARGBTestFilter( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, DISABLED_##ARGBScaleDownClipBy##name##_##filter) { \ + int diff = ARGBClipTestFilter( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but +// filtering is different fixed point implementations for SSSE3, Neon and C. +#ifndef DISABLE_SLOW_TESTS +#define TEST_FACTOR(name, nom, denom) \ + TEST_FACTOR1(, name, None, nom, denom, 0) \ + TEST_FACTOR1(, name, Linear, nom, denom, 3) \ + TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \ + TEST_FACTOR1(, name, Box, nom, denom, 3) +#else +#if defined(ENABLE_FULL_TESTS) +#define TEST_FACTOR(name, nom, denom) \ + TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \ + TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \ + TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \ + TEST_FACTOR1(DISABLED_, name, Box, nom, denom, 3) +#else +#define TEST_FACTOR(name, nom, denom) \ + TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) +#endif +#endif + +TEST_FACTOR(2, 1, 2) +TEST_FACTOR(4, 1, 4) +#ifndef DISABLE_SLOW_TESTS +TEST_FACTOR(8, 1, 8) +#endif +TEST_FACTOR(3by4, 3, 4) +TEST_FACTOR(3by8, 3, 8) +TEST_FACTOR(3, 1, 3) +#undef TEST_FACTOR1 +#undef TEST_FACTOR +#undef SX +#undef DX + +#define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff) \ + TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \ + int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ + int diff = ARGBTestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##name##ClipTo##width##x##height##_##filter) { \ + int diff = \ + ARGBClipTestFilter(benchmark_width_, benchmark_height_, width, height, \ + kFilter##filter, benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##name##ClipFrom##width##x##height##_##filter) { \ + int diff = ARGBClipTestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ + } + +#ifndef DISABLE_SLOW_TESTS +// Test scale to a specified size with all 4 filters. +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(, name, width, height, None, 0) \ + TEST_SCALETO1(, name, width, height, Linear, 3) \ + TEST_SCALETO1(, name, width, height, Bilinear, 3) +#else +#if defined(ENABLE_FULL_TESTS) +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \ + TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \ + TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) +#else +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) +#endif +#endif + +TEST_SCALETO(ARGBScale, 1, 1) +TEST_SCALETO(ARGBScale, 569, 480) +TEST_SCALETO(ARGBScale, 640, 360) +#ifndef DISABLE_SLOW_TESTS +TEST_SCALETO(ARGBScale, 256, 144) /* 128x72 * 2 */ +TEST_SCALETO(ARGBScale, 320, 240) +TEST_SCALETO(ARGBScale, 1280, 720) +TEST_SCALETO(ARGBScale, 1920, 1080) +#endif // DISABLE_SLOW_TESTS +#undef TEST_SCALETO1 +#undef TEST_SCALETO + +#define TEST_SCALESWAPXY1(name, filter, max_diff) \ + TEST_F(LibYUVScaleTest, name##SwapXY_##filter) { \ + int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +#if defined(ENABLE_FULL_TESTS) +// Test scale with swapped width and height with all 3 filters. +TEST_SCALESWAPXY1(ARGBScale, None, 0) +TEST_SCALESWAPXY1(ARGBScale, Linear, 0) +TEST_SCALESWAPXY1(ARGBScale, Bilinear, 0) +#else +TEST_SCALESWAPXY1(ARGBScale, Bilinear, 0) +#endif +#undef TEST_SCALESWAPXY1 + +// Scale with YUV conversion to ARGB and clipping. +// TODO(fbarchard): Add fourcc support. All 4 ARGB formats is easy to support. +LIBYUV_API +int YUVToARGBScaleReference2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint32_t /* src_fourcc */, + int src_width, + int src_height, + uint8_t* dst_argb, + int dst_stride_argb, + uint32_t /* dst_fourcc */, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, + enum FilterMode filtering) { + uint8_t* argb_buffer = + static_cast(malloc(src_width * src_height * 4)); + int r; + I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + argb_buffer, src_width * 4, src_width, src_height); + + r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb, + dst_stride_argb, dst_width, dst_height, clip_x, clip_y, + clip_width, clip_height, filtering); + free(argb_buffer); + return r; +} + +static void FillRamp(uint8_t* buf, + int width, + int height, + int v, + int dx, + int dy) { + int rv = v; + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + *buf++ = v; + v += dx; + if (v < 0 || v > 255) { + dx = -dx; + v += dx; + } + } + v = rv + dy; + if (v < 0 || v > 255) { + dy = -dy; + v += dy; + } + rv = v; + } +} + +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int YUVToARGBTestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations) { + int64_t src_y_plane_size = Abs(src_width) * Abs(src_height); + int64_t src_uv_plane_size = + ((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2); + int src_stride_y = Abs(src_width); + int src_stride_uv = (Abs(src_width) + 1) / 2; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + + int64_t dst_argb_plane_size = (dst_width) * (dst_height)*4LL; + int dst_stride_argb = (dst_width)*4; + align_buffer_page_end(dst_argb_c, dst_argb_plane_size); + align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); + if (!dst_argb_c || !dst_argb_opt || !src_y || !src_u || !src_v) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + // Fill YUV image with continuous ramp, which is less sensitive to + // subsampling and filtering differences for test purposes. + FillRamp(src_y, Abs(src_width), Abs(src_height), 128, 1, 1); + FillRamp(src_u, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 3, 1, 1); + FillRamp(src_v, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 4, 1, 1); + memset(dst_argb_c, 2, dst_argb_plane_size); + memset(dst_argb_opt, 3, dst_argb_plane_size); + + YUVToARGBScaleReference2(src_y, src_stride_y, src_u, src_stride_uv, src_v, + src_stride_uv, libyuv::FOURCC_I420, src_width, + src_height, dst_argb_c, dst_stride_argb, + libyuv::FOURCC_I420, dst_width, dst_height, 0, 0, + dst_width, dst_height, f); + + for (int i = 0; i < benchmark_iterations; ++i) { + YUVToARGBScaleClip(src_y, src_stride_y, src_u, src_stride_uv, src_v, + src_stride_uv, libyuv::FOURCC_I420, src_width, + src_height, dst_argb_opt, dst_stride_argb, + libyuv::FOURCC_I420, dst_width, dst_height, 0, 0, + dst_width, dst_height, f); + } + int max_diff = 0; + for (int i = 0; i < dst_height; ++i) { + for (int j = 0; j < dst_width * 4; ++j) { + int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - + dst_argb_opt[(i * dst_stride_argb) + j]); + if (abs_diff > max_diff) { + printf("error %d at %d,%d c %d opt %d", abs_diff, j, i, + dst_argb_c[(i * dst_stride_argb) + j], + dst_argb_opt[(i * dst_stride_argb) + j]); + EXPECT_LE(abs_diff, 40); + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + return max_diff; +} + +TEST_F(LibYUVScaleTest, YUVToRGBScaleUp) { + int diff = + YUVToARGBTestFilter(benchmark_width_, benchmark_height_, + benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, + libyuv::kFilterBilinear, benchmark_iterations_); + EXPECT_LE(diff, 10); +} + +TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) { + int diff = YUVToARGBTestFilter( + benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, benchmark_width_, + benchmark_height_, libyuv::kFilterBilinear, benchmark_iterations_); + EXPECT_LE(diff, 10); +} + +TEST_F(LibYUVScaleTest, ARGBTest3x) { + const int kSrcStride = 480 * 4; + const int kDstStride = 160 * 4; + const int kSize = kSrcStride * 3; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 480 * 3; ++i) { + orig_pixels[i * 4 + 0] = i; + orig_pixels[i * 4 + 1] = 255 - i; + orig_pixels[i * 4 + 2] = i + 1; + orig_pixels[i * 4 + 3] = i + 10; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + ARGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); + EXPECT_EQ(226, dest_pixels[2]); + EXPECT_EQ(235, dest_pixels[3]); + + ARGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); + EXPECT_EQ(226, dest_pixels[2]); + EXPECT_EQ(235, dest_pixels[3]); + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, ARGBTest4x) { + const int kSrcStride = 640 * 4; + const int kDstStride = 160 * 4; + const int kSize = kSrcStride * 4; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 640 * 4; ++i) { + orig_pixels[i * 4 + 0] = i; + orig_pixels[i * 4 + 1] = 255 - i; + orig_pixels[i * 4 + 2] = i + 1; + orig_pixels[i * 4 + 3] = i + 10; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + ARGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_NEAR(66, dest_pixels[0], 4); + EXPECT_NEAR(255 - 66, dest_pixels[1], 4); + EXPECT_NEAR(67, dest_pixels[2], 4); + EXPECT_NEAR(76, dest_pixels[3], 4); + + ARGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(2, dest_pixels[0]); + EXPECT_EQ(255 - 2, dest_pixels[1]); + EXPECT_EQ(3, dest_pixels[2]); + EXPECT_EQ(12, dest_pixels[3]); + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +} // namespace libyuv diff --git a/unit_test/scale_rgb_test.cc b/unit_test/scale_rgb_test.cc new file mode 100644 index 00000000..8296abe3 --- /dev/null +++ b/unit_test/scale_rgb_test.cc @@ -0,0 +1,280 @@ +/* + * Copyright 2022 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/cpu_id.h" +#include "libyuv/scale_rgb.h" + +namespace libyuv { + +#define STRINGIZE(line) #line +#define FILELINESTR(file, line) file ":" STRINGIZE(line) + +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +// SLOW TESTS are those that are unoptimized C code. +// FULL TESTS are optimized but test many variations of the same code. +#define ENABLE_FULL_TESTS +#endif + +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int RGBTestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i, j; + const int b = 0; // 128 to test for padding/stride. + int64_t src_rgb_plane_size = + (Abs(src_width) + b * 3) * (Abs(src_height) + b * 3) * 3LL; + int src_stride_rgb = (b * 3 + Abs(src_width)) * 3; + + align_buffer_page_end(src_rgb, src_rgb_plane_size); + if (!src_rgb) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_rgb, src_rgb_plane_size); + + int64_t dst_rgb_plane_size = (dst_width + b * 3) * (dst_height + b * 3) * 3LL; + int dst_stride_rgb = (b * 3 + dst_width) * 3; + + align_buffer_page_end(dst_rgb_c, dst_rgb_plane_size); + align_buffer_page_end(dst_rgb_opt, dst_rgb_plane_size); + if (!dst_rgb_c || !dst_rgb_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + memset(dst_rgb_c, 2, dst_rgb_plane_size); + memset(dst_rgb_opt, 3, dst_rgb_plane_size); + + // Warm up both versions for consistent benchmarks. + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width, + src_height, dst_rgb_c + (dst_stride_rgb * b) + b * 3, dst_stride_rgb, + dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width, + src_height, dst_rgb_opt + (dst_stride_rgb * b) + b * 3, + dst_stride_rgb, dst_width, dst_height, f); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + double c_time = get_time(); + RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width, + src_height, dst_rgb_c + (dst_stride_rgb * b) + b * 3, dst_stride_rgb, + dst_width, dst_height, f); + + c_time = (get_time() - c_time); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + RGBScale(src_rgb + (src_stride_rgb * b) + b * 3, src_stride_rgb, src_width, + src_height, dst_rgb_opt + (dst_stride_rgb * b) + b * 3, + dst_stride_rgb, dst_width, dst_height, f); + } + opt_time = (get_time() - opt_time) / benchmark_iterations; + + // Report performance of C vs OPT + printf("filter %d - %8d us C - %8d us OPT\n", f, + static_cast(c_time * 1e6), static_cast(opt_time * 1e6)); + + // C version may be a little off from the optimized. Order of + // operations may introduce rounding somewhere. So do a difference + // of the buffers and look to see that the max difference isn't + // over 2. + int max_diff = 0; + for (i = b; i < (dst_height + b); ++i) { + for (j = b * 3; j < (dst_width + b) * 3; ++j) { + int abs_diff = Abs(dst_rgb_c[(i * dst_stride_rgb) + j] - + dst_rgb_opt[(i * dst_stride_rgb) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_page_end(dst_rgb_c); + free_aligned_buffer_page_end(dst_rgb_opt); + free_aligned_buffer_page_end(src_rgb); + return max_diff; +} + +// The following adjustments in dimensions ensure the scale factor will be +// exactly achieved. +#define DX(x, nom, denom) static_cast((Abs(x) / nom) * nom) +#define SX(x, nom, denom) static_cast((x / nom) * denom) + +#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ + TEST_F(LibYUVScaleTest, RGBScaleDownBy##name##_##filter) { \ + int diff = RGBTestFilter( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +#if defined(ENABLE_FULL_TESTS) +// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but +// filtering is different fixed point implementations for SSSE3, Neon and C. +#define TEST_FACTOR(name, nom, denom) \ + TEST_FACTOR1(name, None, nom, denom, 0) \ + TEST_FACTOR1(name, Linear, nom, denom, 3) \ + TEST_FACTOR1(name, Bilinear, nom, denom, 3) \ + TEST_FACTOR1(name, Box, nom, denom, 3) +#else +// Test a scale factor with Bilinear. +#define TEST_FACTOR(name, nom, denom) \ + TEST_FACTOR1(name, Bilinear, nom, denom, 3) +#endif + +TEST_FACTOR(2, 1, 2) +#ifndef DISABLE_SLOW_TESTS +TEST_FACTOR(4, 1, 4) +// TEST_FACTOR(8, 1, 8) Disable for benchmark performance. +TEST_FACTOR(3by4, 3, 4) +TEST_FACTOR(3by8, 3, 8) +TEST_FACTOR(3, 1, 3) +#endif +#undef TEST_FACTOR1 +#undef TEST_FACTOR +#undef SX +#undef DX + +#define TEST_SCALETO1(name, width, height, filter, max_diff) \ + TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \ + int diff = RGBTestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ + int diff = RGBTestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +#if defined(ENABLE_FULL_TESTS) +/// Test scale to a specified size with all 4 filters. +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(name, width, height, None, 0) \ + TEST_SCALETO1(name, width, height, Linear, 3) \ + TEST_SCALETO1(name, width, height, Bilinear, 3) +#else +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(name, width, height, Bilinear, 3) +#endif + +TEST_SCALETO(RGBScale, 640, 360) +#ifndef DISABLE_SLOW_TESTS +TEST_SCALETO(RGBScale, 1, 1) +TEST_SCALETO(RGBScale, 256, 144) /* 128x72 * 3 */ +TEST_SCALETO(RGBScale, 320, 240) +TEST_SCALETO(RGBScale, 569, 480) +TEST_SCALETO(RGBScale, 1280, 720) +TEST_SCALETO(RGBScale, 1920, 1080) +#endif // DISABLE_SLOW_TESTS +#undef TEST_SCALETO1 +#undef TEST_SCALETO + +#define TEST_SCALESWAPXY1(name, filter, max_diff) \ + TEST_F(LibYUVScaleTest, name##SwapXY_##filter) { \ + int diff = RGBTestFilter(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +#if defined(ENABLE_FULL_TESTS) +// Test scale with swapped width and height with all 3 filters. +TEST_SCALESWAPXY1(RGBScale, None, 0) +TEST_SCALESWAPXY1(RGBScale, Linear, 0) +TEST_SCALESWAPXY1(RGBScale, Bilinear, 0) +#else +TEST_SCALESWAPXY1(RGBScale, Bilinear, 0) +#endif +#undef TEST_SCALESWAPXY1 + +TEST_F(LibYUVScaleTest, RGBTest3x) { + const int kSrcStride = 480 * 3; + const int kDstStride = 160 * 3; + const int kSize = kSrcStride * 3; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 480 * 3; ++i) { + orig_pixels[i * 3 + 0] = i; + orig_pixels[i * 3 + 1] = 255 - i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + RGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); + + RGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, RGBTest4x) { + const int kSrcStride = 640 * 3; + const int kDstStride = 160 * 3; + const int kSize = kSrcStride * 4; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 640 * 4; ++i) { + orig_pixels[i * 3 + 0] = i; + orig_pixels[i * 3 + 1] = 255 - i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + RGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(66, dest_pixels[0]); + EXPECT_EQ(190, dest_pixels[1]); + + RGBScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1, + kFilterNone); + + EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row + EXPECT_EQ(255 - 2, dest_pixels[1]); + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +} // namespace libyuv diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc new file mode 100644 index 00000000..a8c95268 --- /dev/null +++ b/unit_test/scale_test.cc @@ -0,0 +1,1601 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/cpu_id.h" +#include "libyuv/scale.h" + +#ifdef ENABLE_ROW_TESTS +#include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C +#endif + +#define STRINGIZE(line) #line +#define FILELINESTR(file, line) file ":" STRINGIZE(line) + +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +// SLOW TESTS are those that are unoptimized C code. +// FULL TESTS are optimized but test many variations of the same code. +#define ENABLE_FULL_TESTS +#endif + +namespace libyuv { + +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int I420TestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i, j; + int src_width_uv = (Abs(src_width) + 1) >> 1; + int src_height_uv = (Abs(src_height) + 1) >> 1; + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + if (!src_y || !src_u || !src_v) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + int dst_width_uv = (dst_width + 1) >> 1; + int dst_height_uv = (dst_height + 1) >> 1; + + int64_t dst_y_plane_size = (dst_width) * (dst_height); + int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_c, dst_y_plane_size); + align_buffer_page_end(dst_u_c, dst_uv_plane_size); + align_buffer_page_end(dst_v_c, dst_uv_plane_size); + align_buffer_page_end(dst_y_opt, dst_y_plane_size); + align_buffer_page_end(dst_u_opt, dst_uv_plane_size); + align_buffer_page_end(dst_v_opt, dst_uv_plane_size); + if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt || + !dst_v_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + double c_time = get_time(); + I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_c, dst_stride_y, dst_u_c, + dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f); + c_time = (get_time() - c_time); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt, + dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height, + f); + } + opt_time = (get_time() - opt_time) / benchmark_iterations; + // Report performance of C vs OPT. + printf("filter %d - %8d us C - %8d us OPT\n", f, + static_cast(c_time * 1e6), static_cast(opt_time * 1e6)); + + // C version may be a little off from the optimized. Order of + // operations may introduce rounding somewhere. So do a difference + // of the buffers and look to see that the max difference is not + // over 3. + int max_diff = 0; + for (i = 0; i < (dst_height); ++i) { + for (j = 0; j < (dst_width); ++j) { + int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] - + dst_y_opt[(i * dst_stride_y) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + for (i = 0; i < (dst_height_uv); ++i) { + for (j = 0; j < (dst_width_uv); ++j) { + int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] - + dst_u_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] - + dst_v_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_page_end(dst_y_c); + free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_y_opt); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_opt); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + + return max_diff; +} + +// Test scaling with 8 bit C vs 12 bit C and return maximum pixel difference. +// 0 = exact. +static int I420TestFilter_12(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int src_width_uv = (Abs(src_width) + 1) >> 1; + int src_height_uv = (Abs(src_height) + 1) >> 1; + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + align_buffer_page_end(src_y_12, src_y_plane_size * 2); + align_buffer_page_end(src_u_12, src_uv_plane_size * 2); + align_buffer_page_end(src_v_12, src_uv_plane_size * 2); + if (!src_y || !src_u || !src_v || !src_y_12 || !src_u_12 || !src_v_12) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + uint16_t* p_src_y_12 = reinterpret_cast(src_y_12); + uint16_t* p_src_u_12 = reinterpret_cast(src_u_12); + uint16_t* p_src_v_12 = reinterpret_cast(src_v_12); + + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_12[i] = src_y[i]; + } + for (i = 0; i < src_uv_plane_size; ++i) { + p_src_u_12[i] = src_u[i]; + p_src_v_12[i] = src_v[i]; + } + + int dst_width_uv = (dst_width + 1) >> 1; + int dst_height_uv = (dst_height + 1) >> 1; + + int dst_y_plane_size = (dst_width) * (dst_height); + int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_u_8, dst_uv_plane_size); + align_buffer_page_end(dst_v_8, dst_uv_plane_size); + align_buffer_page_end(dst_y_12, dst_y_plane_size * 2); + align_buffer_page_end(dst_u_12, dst_uv_plane_size * 2); + align_buffer_page_end(dst_v_12, dst_uv_plane_size * 2); + + uint16_t* p_dst_y_12 = reinterpret_cast(dst_y_12); + uint16_t* p_dst_u_12 = reinterpret_cast(dst_u_12); + uint16_t* p_dst_v_12 = reinterpret_cast(dst_v_12); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_8, dst_stride_y, dst_u_8, + dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (i = 0; i < benchmark_iterations; ++i) { + I420Scale_12(p_src_y_12, src_stride_y, p_src_u_12, src_stride_uv, + p_src_v_12, src_stride_uv, src_width, src_height, p_dst_y_12, + dst_stride_y, p_dst_u_12, dst_stride_uv, p_dst_v_12, + dst_stride_uv, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_12[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + for (i = 0; i < dst_uv_plane_size; ++i) { + int abs_diff = Abs(dst_u_8[i] - p_dst_u_12[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_8[i] - p_dst_v_12[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_u_8); + free_aligned_buffer_page_end(dst_v_8); + free_aligned_buffer_page_end(dst_y_12); + free_aligned_buffer_page_end(dst_u_12); + free_aligned_buffer_page_end(dst_v_12); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + free_aligned_buffer_page_end(src_y_12); + free_aligned_buffer_page_end(src_u_12); + free_aligned_buffer_page_end(src_v_12); + + return max_diff; +} + +// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference. +// 0 = exact. +static int I420TestFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int src_width_uv = (Abs(src_width) + 1) >> 1; + int src_height_uv = (Abs(src_height) + 1) >> 1; + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + align_buffer_page_end(src_y_16, src_y_plane_size * 2); + align_buffer_page_end(src_u_16, src_uv_plane_size * 2); + align_buffer_page_end(src_v_16, src_uv_plane_size * 2); + if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + uint16_t* p_src_y_16 = reinterpret_cast(src_y_16); + uint16_t* p_src_u_16 = reinterpret_cast(src_u_16); + uint16_t* p_src_v_16 = reinterpret_cast(src_v_16); + + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_16[i] = src_y[i]; + } + for (i = 0; i < src_uv_plane_size; ++i) { + p_src_u_16[i] = src_u[i]; + p_src_v_16[i] = src_v[i]; + } + + int dst_width_uv = (dst_width + 1) >> 1; + int dst_height_uv = (dst_height + 1) >> 1; + + int dst_y_plane_size = (dst_width) * (dst_height); + int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_u_8, dst_uv_plane_size); + align_buffer_page_end(dst_v_8, dst_uv_plane_size); + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); + align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2); + align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2); + + uint16_t* p_dst_y_16 = reinterpret_cast(dst_y_16); + uint16_t* p_dst_u_16 = reinterpret_cast(dst_u_16); + uint16_t* p_dst_v_16 = reinterpret_cast(dst_v_16); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_8, dst_stride_y, dst_u_8, + dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (i = 0; i < benchmark_iterations; ++i) { + I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv, + p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16, + dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16, + dst_stride_uv, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + for (i = 0; i < dst_uv_plane_size; ++i) { + int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_u_8); + free_aligned_buffer_page_end(dst_v_8); + free_aligned_buffer_page_end(dst_y_16); + free_aligned_buffer_page_end(dst_u_16); + free_aligned_buffer_page_end(dst_v_16); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + free_aligned_buffer_page_end(src_y_16); + free_aligned_buffer_page_end(src_u_16); + free_aligned_buffer_page_end(src_v_16); + + return max_diff; +} + +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int I444TestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i, j; + int src_width_uv = Abs(src_width); + int src_height_uv = Abs(src_height); + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + if (!src_y || !src_u || !src_v) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + int dst_width_uv = dst_width; + int dst_height_uv = dst_height; + + int64_t dst_y_plane_size = (dst_width) * (dst_height); + int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_c, dst_y_plane_size); + align_buffer_page_end(dst_u_c, dst_uv_plane_size); + align_buffer_page_end(dst_v_c, dst_uv_plane_size); + align_buffer_page_end(dst_y_opt, dst_y_plane_size); + align_buffer_page_end(dst_u_opt, dst_uv_plane_size); + align_buffer_page_end(dst_v_opt, dst_uv_plane_size); + if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt || + !dst_v_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + double c_time = get_time(); + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_c, dst_stride_y, dst_u_c, + dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f); + c_time = (get_time() - c_time); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt, + dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height, + f); + } + opt_time = (get_time() - opt_time) / benchmark_iterations; + // Report performance of C vs OPT. + printf("filter %d - %8d us C - %8d us OPT\n", f, + static_cast(c_time * 1e6), static_cast(opt_time * 1e6)); + + // C version may be a little off from the optimized. Order of + // operations may introduce rounding somewhere. So do a difference + // of the buffers and look to see that the max difference is not + // over 3. + int max_diff = 0; + for (i = 0; i < (dst_height); ++i) { + for (j = 0; j < (dst_width); ++j) { + int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] - + dst_y_opt[(i * dst_stride_y) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + for (i = 0; i < (dst_height_uv); ++i) { + for (j = 0; j < (dst_width_uv); ++j) { + int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] - + dst_u_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] - + dst_v_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_page_end(dst_y_c); + free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_y_opt); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_opt); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + + return max_diff; +} + +// Test scaling with 8 bit C vs 12 bit C and return maximum pixel difference. +// 0 = exact. +static int I444TestFilter_12(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int src_width_uv = Abs(src_width); + int src_height_uv = Abs(src_height); + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + align_buffer_page_end(src_y_12, src_y_plane_size * 2); + align_buffer_page_end(src_u_12, src_uv_plane_size * 2); + align_buffer_page_end(src_v_12, src_uv_plane_size * 2); + if (!src_y || !src_u || !src_v || !src_y_12 || !src_u_12 || !src_v_12) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + uint16_t* p_src_y_12 = reinterpret_cast(src_y_12); + uint16_t* p_src_u_12 = reinterpret_cast(src_u_12); + uint16_t* p_src_v_12 = reinterpret_cast(src_v_12); + + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_12[i] = src_y[i]; + } + for (i = 0; i < src_uv_plane_size; ++i) { + p_src_u_12[i] = src_u[i]; + p_src_v_12[i] = src_v[i]; + } + + int dst_width_uv = dst_width; + int dst_height_uv = dst_height; + + int dst_y_plane_size = (dst_width) * (dst_height); + int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_u_8, dst_uv_plane_size); + align_buffer_page_end(dst_v_8, dst_uv_plane_size); + align_buffer_page_end(dst_y_12, dst_y_plane_size * 2); + align_buffer_page_end(dst_u_12, dst_uv_plane_size * 2); + align_buffer_page_end(dst_v_12, dst_uv_plane_size * 2); + + uint16_t* p_dst_y_12 = reinterpret_cast(dst_y_12); + uint16_t* p_dst_u_12 = reinterpret_cast(dst_u_12); + uint16_t* p_dst_v_12 = reinterpret_cast(dst_v_12); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_8, dst_stride_y, dst_u_8, + dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (i = 0; i < benchmark_iterations; ++i) { + I444Scale_12(p_src_y_12, src_stride_y, p_src_u_12, src_stride_uv, + p_src_v_12, src_stride_uv, src_width, src_height, p_dst_y_12, + dst_stride_y, p_dst_u_12, dst_stride_uv, p_dst_v_12, + dst_stride_uv, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_12[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + for (i = 0; i < dst_uv_plane_size; ++i) { + int abs_diff = Abs(dst_u_8[i] - p_dst_u_12[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_8[i] - p_dst_v_12[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_u_8); + free_aligned_buffer_page_end(dst_v_8); + free_aligned_buffer_page_end(dst_y_12); + free_aligned_buffer_page_end(dst_u_12); + free_aligned_buffer_page_end(dst_v_12); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + free_aligned_buffer_page_end(src_y_12); + free_aligned_buffer_page_end(src_u_12); + free_aligned_buffer_page_end(src_v_12); + + return max_diff; +} + +// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference. +// 0 = exact. +static int I444TestFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int src_width_uv = Abs(src_width); + int src_height_uv = Abs(src_height); + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + align_buffer_page_end(src_y_16, src_y_plane_size * 2); + align_buffer_page_end(src_u_16, src_uv_plane_size * 2); + align_buffer_page_end(src_v_16, src_uv_plane_size * 2); + if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + uint16_t* p_src_y_16 = reinterpret_cast(src_y_16); + uint16_t* p_src_u_16 = reinterpret_cast(src_u_16); + uint16_t* p_src_v_16 = reinterpret_cast(src_v_16); + + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_16[i] = src_y[i]; + } + for (i = 0; i < src_uv_plane_size; ++i) { + p_src_u_16[i] = src_u[i]; + p_src_v_16[i] = src_v[i]; + } + + int dst_width_uv = dst_width; + int dst_height_uv = dst_height; + + int dst_y_plane_size = (dst_width) * (dst_height); + int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_u_8, dst_uv_plane_size); + align_buffer_page_end(dst_v_8, dst_uv_plane_size); + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); + align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2); + align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2); + + uint16_t* p_dst_y_16 = reinterpret_cast(dst_y_16); + uint16_t* p_dst_u_16 = reinterpret_cast(dst_u_16); + uint16_t* p_dst_v_16 = reinterpret_cast(dst_v_16); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_8, dst_stride_y, dst_u_8, + dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (i = 0; i < benchmark_iterations; ++i) { + I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv, + p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16, + dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16, + dst_stride_uv, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + for (i = 0; i < dst_uv_plane_size; ++i) { + int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_u_8); + free_aligned_buffer_page_end(dst_v_8); + free_aligned_buffer_page_end(dst_y_16); + free_aligned_buffer_page_end(dst_u_16); + free_aligned_buffer_page_end(dst_v_16); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + free_aligned_buffer_page_end(src_y_16); + free_aligned_buffer_page_end(src_u_16); + free_aligned_buffer_page_end(src_v_16); + + return max_diff; +} + +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int NV12TestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i, j; + int src_width_uv = (Abs(src_width) + 1) >> 1; + int src_height_uv = (Abs(src_height) + 1) >> 1; + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv)*2; + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv * 2; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_uv, src_uv_plane_size); + if (!src_y || !src_uv) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_uv, src_uv_plane_size); + + int dst_width_uv = (dst_width + 1) >> 1; + int dst_height_uv = (dst_height + 1) >> 1; + + int64_t dst_y_plane_size = (dst_width) * (dst_height); + int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv)*2; + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv * 2; + + align_buffer_page_end(dst_y_c, dst_y_plane_size); + align_buffer_page_end(dst_uv_c, dst_uv_plane_size); + align_buffer_page_end(dst_y_opt, dst_y_plane_size); + align_buffer_page_end(dst_uv_opt, dst_uv_plane_size); + if (!dst_y_c || !dst_uv_c || !dst_y_opt || !dst_uv_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + double c_time = get_time(); + NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height, + dst_y_c, dst_stride_y, dst_uv_c, dst_stride_uv, dst_width, + dst_height, f); + c_time = (get_time() - c_time); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height, + dst_y_opt, dst_stride_y, dst_uv_opt, dst_stride_uv, dst_width, + dst_height, f); + } + opt_time = (get_time() - opt_time) / benchmark_iterations; + // Report performance of C vs OPT. + printf("filter %d - %8d us C - %8d us OPT\n", f, + static_cast(c_time * 1e6), static_cast(opt_time * 1e6)); + + // C version may be a little off from the optimized. Order of + // operations may introduce rounding somewhere. So do a difference + // of the buffers and look to see that the max difference is not + // over 3. + int max_diff = 0; + for (i = 0; i < (dst_height); ++i) { + for (j = 0; j < (dst_width); ++j) { + int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] - + dst_y_opt[(i * dst_stride_y) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + for (i = 0; i < (dst_height_uv); ++i) { + for (j = 0; j < (dst_width_uv * 2); ++j) { + int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] - + dst_uv_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_page_end(dst_y_c); + free_aligned_buffer_page_end(dst_uv_c); + free_aligned_buffer_page_end(dst_y_opt); + free_aligned_buffer_page_end(dst_uv_opt); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_uv); + + return max_diff; +} + +// The following adjustments in dimensions ensure the scale factor will be +// exactly achieved. +// 2 is chroma subsample. +#define DX(x, nom, denom) static_cast(((Abs(x) / nom + 1) / 2) * nom * 2) +#define SX(x, nom, denom) static_cast(((x / nom + 1) / 2) * denom * 2) + +#define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff) \ + TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \ + int diff = I420TestFilter( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \ + int diff = I444TestFilter( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_12) { \ + int diff = I420TestFilter_12( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_12) { \ + int diff = I444TestFilter_12( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, NV12ScaleDownBy##name##_##filter) { \ + int diff = NV12TestFilter( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but +// filtering is different fixed point implementations for SSSE3, Neon and C. +#ifndef DISABLE_SLOW_TESTS +#define TEST_FACTOR(name, nom, denom, boxdiff) \ + TEST_FACTOR1(, name, None, nom, denom, 0) \ + TEST_FACTOR1(, name, Linear, nom, denom, 3) \ + TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \ + TEST_FACTOR1(, name, Box, nom, denom, boxdiff) +#else +#if defined(ENABLE_FULL_TESTS) +#define TEST_FACTOR(name, nom, denom, boxdiff) \ + TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \ + TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \ + TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \ + TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff) +#else +#define TEST_FACTOR(name, nom, denom, boxdiff) \ + TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \ + TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff) +#endif +#endif + +TEST_FACTOR(2, 1, 2, 0) +TEST_FACTOR(4, 1, 4, 0) +#ifndef DISABLE_SLOW_TESTS +TEST_FACTOR(8, 1, 8, 0) +#endif +TEST_FACTOR(3by4, 3, 4, 1) +TEST_FACTOR(3by8, 3, 8, 1) +TEST_FACTOR(3, 1, 3, 0) +#undef TEST_FACTOR1 +#undef TEST_FACTOR +#undef SX +#undef DX + +#define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff) \ + TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \ + int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \ + int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##I420##name##To##width##x##height##_##filter##_12) { \ + int diff = I420TestFilter_12( \ + benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##I444##name##To##width##x##height##_##filter##_12) { \ + int diff = I444TestFilter_12( \ + benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \ + int diff = I420TestFilter_16( \ + benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \ + int diff = I444TestFilter_16( \ + benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, NV12##name##To##width##x##height##_##filter) { \ + int diff = NV12TestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \ + int diff = I420TestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \ + int diff = I444TestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##I420##name##From##width##x##height##_##filter##_12) { \ + int diff = I420TestFilter_12(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##I444##name##From##width##x##height##_##filter##_12) { \ + int diff = I444TestFilter_12(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##I420##name##From##width##x##height##_##filter##_16) { \ + int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##I444##name##From##width##x##height##_##filter##_16) { \ + int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, NV12##name##From##width##x##height##_##filter) { \ + int diff = NV12TestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +#ifndef DISABLE_SLOW_TESTS +// Test scale to a specified size with all 4 filters. +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(, name, width, height, None, 0) \ + TEST_SCALETO1(, name, width, height, Linear, 3) \ + TEST_SCALETO1(, name, width, height, Bilinear, 3) \ + TEST_SCALETO1(, name, width, height, Box, 3) +#else +#if defined(ENABLE_FULL_TESTS) +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \ + TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \ + TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \ + TEST_SCALETO1(DISABLED_, name, width, height, Box, 3) +#else +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \ + TEST_SCALETO1(DISABLED_, name, width, height, Box, 3) +#endif +#endif + +TEST_SCALETO(Scale, 1, 1) +TEST_SCALETO(Scale, 569, 480) +TEST_SCALETO(Scale, 640, 360) +#ifndef DISABLE_SLOW_TESTS +TEST_SCALETO(Scale, 256, 144) /* 128x72 * 2 */ +TEST_SCALETO(Scale, 320, 240) +TEST_SCALETO(Scale, 1280, 720) +TEST_SCALETO(Scale, 1920, 1080) +#endif // DISABLE_SLOW_TESTS +#undef TEST_SCALETO1 +#undef TEST_SCALETO + +#define TEST_SCALESWAPXY1(DISABLED_, name, filter, max_diff) \ + TEST_F(LibYUVScaleTest, I420##name##SwapXY_##filter) { \ + int diff = I420TestFilter(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444##name##SwapXY_##filter) { \ + int diff = I444TestFilter(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_12) { \ + int diff = I420TestFilter_12(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_12) { \ + int diff = I444TestFilter_12(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_16) { \ + int diff = I420TestFilter_16(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_16) { \ + int diff = I444TestFilter_16(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, NV12##name##SwapXY_##filter) { \ + int diff = NV12TestFilter(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test scale to a specified size with all 4 filters. +#ifndef DISABLE_SLOW_TESTS +TEST_SCALESWAPXY1(, Scale, None, 0) +TEST_SCALESWAPXY1(, Scale, Linear, 3) +TEST_SCALESWAPXY1(, Scale, Bilinear, 3) +TEST_SCALESWAPXY1(, Scale, Box, 3) +#else +#if defined(ENABLE_FULL_TESTS) +TEST_SCALESWAPXY1(DISABLED_, Scale, None, 0) +TEST_SCALESWAPXY1(DISABLED_, Scale, Linear, 3) +TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3) +TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3) +#else +TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3) +TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3) +#endif +#endif + +#undef TEST_SCALESWAPXY1 + +#ifdef ENABLE_ROW_TESTS +#ifdef HAS_SCALEROWDOWN2_SSSE3 +TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { + SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[64]); + SIMD_ALIGNED(uint8_t dst_pixels_c[64]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt)); + memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); + + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + if (!has_ssse3) { + printf("Warning SSSE3 not detected; Skipping test.\n"); + } else { + // TL. + orig_pixels[0] = 255u; + orig_pixels[1] = 0u; + orig_pixels[128 + 0] = 0u; + orig_pixels[128 + 1] = 0u; + // TR. + orig_pixels[2] = 0u; + orig_pixels[3] = 100u; + orig_pixels[128 + 2] = 0u; + orig_pixels[128 + 3] = 0u; + // BL. + orig_pixels[4] = 0u; + orig_pixels[5] = 0u; + orig_pixels[128 + 4] = 50u; + orig_pixels[128 + 5] = 0u; + // BR. + orig_pixels[6] = 0u; + orig_pixels[7] = 0u; + orig_pixels[128 + 6] = 0u; + orig_pixels[128 + 7] = 20u; + // Odd. + orig_pixels[126] = 4u; + orig_pixels[127] = 255u; + orig_pixels[128 + 126] = 16u; + orig_pixels[128 + 127] = 255u; + + // Test regular half size. + ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(133u, dst_pixels_c[63]); + + // Test Odd width version - Last pixel is just 1 horizontal pixel. + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(10u, dst_pixels_c[63]); + + // Test one pixel less, should skip the last pixel. + memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(0u, dst_pixels_c[63]); + + // Test regular half size SSSE3. + ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); + + EXPECT_EQ(64u, dst_pixels_opt[0]); + EXPECT_EQ(25u, dst_pixels_opt[1]); + EXPECT_EQ(13u, dst_pixels_opt[2]); + EXPECT_EQ(5u, dst_pixels_opt[3]); + EXPECT_EQ(0u, dst_pixels_opt[4]); + EXPECT_EQ(133u, dst_pixels_opt[63]); + + // Compare C and SSSE3 match. + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); + ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); + for (int i = 0; i < 64; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + } +} +#endif // HAS_SCALEROWDOWN2_SSSE3 + +extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); + +TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { + SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun. + SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); + SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt)); + memset(dst_pixels_c, 2, sizeof(dst_pixels_c)); + + for (int i = 0; i < 640 * 2 + 1; ++i) { + orig_pixels[i] = i; + } + ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); + } else { + ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); + } +#else + ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); +#endif + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16); + EXPECT_EQ(dst_pixels_c[1279], 800); +} + +extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); + +TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { + SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]); + SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); + SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); + + for (int i = 0; i < 2560 * 2; ++i) { + orig_pixels[i] = i; + } + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); + } else { + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); + } +#else + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); +#endif + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4); + EXPECT_EQ(dst_pixels_c[1279], 3839); +} +#endif // ENABLE_ROW_TESTS + +// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel +// difference. +// 0 = exact. +static int TestPlaneFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int src_stride_y = Abs(src_width); + int dst_y_plane_size = dst_width * dst_height; + int dst_stride_y = dst_width; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_y_16, src_y_plane_size * 2); + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); + uint16_t* p_src_y_16 = reinterpret_cast(src_y_16); + uint16_t* p_dst_y_16 = reinterpret_cast(dst_y_16); + + MemRandomize(src_y, src_y_plane_size); + memset(dst_y_8, 0, dst_y_plane_size); + memset(dst_y_16, 1, dst_y_plane_size * 2); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_16[i] = src_y[i] & 255; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y, + dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + + for (i = 0; i < benchmark_iterations; ++i) { + ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16, + dst_stride_y, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_y_16); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_y_16); + + return max_diff; +} + +// The following adjustments in dimensions ensure the scale factor will be +// exactly achieved. +// 2 is chroma subsample. +#define DX(x, nom, denom) static_cast(((Abs(x) / nom + 1) / 2) * nom * 2) +#define SX(x, nom, denom) static_cast(((x / nom + 1) / 2) * denom * 2) + +#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ + TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \ + int diff = TestPlaneFilter_16( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but +// filtering is different fixed point implementations for SSSE3, Neon and C. +#define TEST_FACTOR(name, nom, denom, boxdiff) \ + TEST_FACTOR1(name, None, nom, denom, 0) \ + TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \ + TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \ + TEST_FACTOR1(name, Box, nom, denom, boxdiff) + +TEST_FACTOR(2, 1, 2, 0) +TEST_FACTOR(4, 1, 4, 0) +// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds. +TEST_FACTOR(3by4, 3, 4, 1) +TEST_FACTOR(3by8, 3, 8, 1) +TEST_FACTOR(3, 1, 3, 0) +#undef TEST_FACTOR1 +#undef TEST_FACTOR +#undef SX +#undef DX + +TEST_F(LibYUVScaleTest, PlaneTest3x) { + const int kSrcStride = 480; + const int kDstStride = 160; + const int kSize = kSrcStride * 3; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 480 * 3; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(225, dest_pixels[0]); + + ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(225, dest_pixels[0]); + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest4x) { + const int kSrcStride = 640; + const int kDstStride = 160; + const int kSize = kSrcStride * 4; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 640 * 4; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(66, dest_pixels[0]); + + ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +// Intent is to test 200x50 to 50x200 but width and height can be parameters. +TEST_F(LibYUVScaleTest, PlaneTestRotate_None) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterNone); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterNone); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterBilinear); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterBilinear); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +// Intent is to test 200x50 to 50x200 but width and height can be parameters. +TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterBox); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterBox); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest1_Box) { + align_buffer_page_end(orig_pixels, 3); + align_buffer_page_end(dst_pixels, 3); + + // Pad the 1x1 byte image with invalid values before and after in case libyuv + // reads outside the memory boundaries. + orig_pixels[0] = 0; + orig_pixels[1] = 1; // scale this pixel + orig_pixels[2] = 2; + dst_pixels[0] = 3; + dst_pixels[1] = 3; + dst_pixels[2] = 3; + + libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, + /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, + /* dst_width= */ 1, /* dst_height= */ 2, + libyuv::kFilterBox); + + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); + + free_aligned_buffer_page_end(dst_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) { + align_buffer_page_end(orig_pixels_alloc, 3 * 2); + align_buffer_page_end(dst_pixels_alloc, 3 * 2); + uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc; + uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc; + + // Pad the 1x1 byte image with invalid values before and after in case libyuv + // reads outside the memory boundaries. + orig_pixels[0] = 0; + orig_pixels[1] = 1; // scale this pixel + orig_pixels[2] = 2; + dst_pixels[0] = 3; + dst_pixels[1] = 3; + dst_pixels[2] = 3; + + libyuv::ScalePlane_16( + orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, + /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, + /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone); + + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); + + free_aligned_buffer_page_end(dst_pixels_alloc); + free_aligned_buffer_page_end(orig_pixels_alloc); +} +} // namespace libyuv diff --git a/unit_test/scale_uv_test.cc b/unit_test/scale_uv_test.cc new file mode 100644 index 00000000..dab217c9 --- /dev/null +++ b/unit_test/scale_uv_test.cc @@ -0,0 +1,249 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/cpu_id.h" +#include "libyuv/scale_uv.h" + +namespace libyuv { + +#define STRINGIZE(line) #line +#define FILELINESTR(file, line) file ":" STRINGIZE(line) + +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +// SLOW TESTS are those that are unoptimized C code. +// FULL TESTS are optimized but test many variations of the same code. +#define ENABLE_FULL_TESTS +#endif + +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int UVTestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int64_t src_uv_plane_size = Abs(src_width) * Abs(src_height) * 2LL; + int src_stride_uv = Abs(src_width) * 2; + int64_t dst_uv_plane_size = dst_width * dst_height * 2LL; + int dst_stride_uv = dst_width * 2; + + align_buffer_page_end(src_uv, src_uv_plane_size); + align_buffer_page_end(dst_uv_c, dst_uv_plane_size); + align_buffer_page_end(dst_uv_opt, dst_uv_plane_size); + + if (!src_uv || !dst_uv_c || !dst_uv_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_uv, src_uv_plane_size); + memset(dst_uv_c, 2, dst_uv_plane_size); + memset(dst_uv_opt, 123, dst_uv_plane_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + double c_time = get_time(); + UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_c, dst_stride_uv, + dst_width, dst_height, f); + c_time = (get_time() - c_time); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_opt, + dst_stride_uv, dst_width, dst_height, f); + } + opt_time = (get_time() - opt_time) / benchmark_iterations; + + // Report performance of C vs OPT + printf("filter %d - %8d us C - %8d us OPT\n", f, + static_cast(c_time * 1e6), static_cast(opt_time * 1e6)); + + int max_diff = 0; + for (i = 0; i < dst_uv_plane_size; ++i) { + int abs_diff = Abs(dst_uv_c[i] - dst_uv_opt[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_uv_c); + free_aligned_buffer_page_end(dst_uv_opt); + free_aligned_buffer_page_end(src_uv); + return max_diff; +} + +// The following adjustments in dimensions ensure the scale factor will be +// exactly achieved. +#define DX(x, nom, denom) static_cast((Abs(x) / nom) * nom) +#define SX(x, nom, denom) static_cast((x / nom) * denom) + +#define TEST_FACTOR1(name, filter, nom, denom) \ + TEST_F(LibYUVScaleTest, UVScaleDownBy##name##_##filter) { \ + int diff = UVTestFilter( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_EQ(0, diff); \ + } + +#if defined(ENABLE_FULL_TESTS) +// Test a scale factor with all 4 filters. Expect exact for SIMD vs C. +#define TEST_FACTOR(name, nom, denom) \ + TEST_FACTOR1(name, None, nom, denom) \ + TEST_FACTOR1(name, Linear, nom, denom) \ + TEST_FACTOR1(name, Bilinear, nom, denom) \ + TEST_FACTOR1(name, Box, nom, denom) +#else +// Test a scale factor with Bilinear. +#define TEST_FACTOR(name, nom, denom) TEST_FACTOR1(name, Bilinear, nom, denom) +#endif + +TEST_FACTOR(2, 1, 2) +TEST_FACTOR(4, 1, 4) +// TEST_FACTOR(8, 1, 8) Disable for benchmark performance. +TEST_FACTOR(3by4, 3, 4) +TEST_FACTOR(3by8, 3, 8) +TEST_FACTOR(3, 1, 3) +#undef TEST_FACTOR1 +#undef TEST_FACTOR +#undef SX +#undef DX + +#define TEST_SCALETO1(name, width, height, filter, max_diff) \ + TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \ + int diff = UVTestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ + int diff = UVTestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +#if defined(ENABLE_FULL_TESTS) +/// Test scale to a specified size with all 4 filters. +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(name, width, height, None, 0) \ + TEST_SCALETO1(name, width, height, Linear, 3) \ + TEST_SCALETO1(name, width, height, Bilinear, 3) +#else +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(name, width, height, Bilinear, 3) +#endif + +TEST_SCALETO(UVScale, 1, 1) +TEST_SCALETO(UVScale, 569, 480) +TEST_SCALETO(UVScale, 640, 360) +#ifndef DISABLE_SLOW_TESTS +TEST_SCALETO(UVScale, 256, 144) /* 128x72 * 2 */ +TEST_SCALETO(UVScale, 320, 240) +TEST_SCALETO(UVScale, 1280, 720) +TEST_SCALETO(UVScale, 1920, 1080) +#endif // DISABLE_SLOW_TESTS +#undef TEST_SCALETO1 +#undef TEST_SCALETO + +#define TEST_SCALESWAPXY1(name, filter, max_diff) \ + TEST_F(LibYUVScaleTest, name##SwapXY_##filter) { \ + int diff = \ + UVTestFilter(benchmark_width_, benchmark_height_, benchmark_height_, \ + benchmark_width_, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +#if defined(ENABLE_FULL_TESTS) +// Test scale with swapped width and height with all 3 filters. +TEST_SCALESWAPXY1(UVScale, None, 0) +TEST_SCALESWAPXY1(UVScale, Linear, 0) +TEST_SCALESWAPXY1(UVScale, Bilinear, 0) +#else +TEST_SCALESWAPXY1(UVScale, Bilinear, 0) +#endif +#undef TEST_SCALESWAPXY1 + +TEST_F(LibYUVScaleTest, UVTest3x) { + const int kSrcStride = 480 * 2; + const int kDstStride = 160 * 2; + const int kSize = kSrcStride * 3; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 480 * 3; ++i) { + orig_pixels[i * 2 + 0] = i; + orig_pixels[i * 2 + 1] = 255 - i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + UVScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); + + UVScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, UVTest4x) { + const int kSrcStride = 640 * 2; + const int kDstStride = 160 * 2; + const int kSize = kSrcStride * 4; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 640 * 4; ++i) { + orig_pixels[i * 2 + 0] = i; + orig_pixels[i * 2 + 1] = 255 - i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + UVScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(66, dest_pixels[0]); + EXPECT_EQ(190, dest_pixels[1]); + + UVScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1, + kFilterNone); + + EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row + EXPECT_EQ(255 - 2, dest_pixels[1]); + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +} // namespace libyuv diff --git a/unit_test/testdata/arm_v7.txt b/unit_test/testdata/arm_v7.txt new file mode 100644 index 00000000..5d7dbd04 --- /dev/null +++ b/unit_test/testdata/arm_v7.txt @@ -0,0 +1,12 @@ +Processor : ARMv7 Processor rev 5 (v7l) +BogoMIPS : 795.44 +Features : swp half thumb fastmult vfp edsp iwmmxt thumbee vfpv3 vfpv3d16 +CPU implementer : 0x56 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0x581 +CPU revision : 5 + +Hardware : OLPC XO-1.75 +Revision : 0000 +Serial : 0000000000000000 diff --git a/unit_test/testdata/juno.txt b/unit_test/testdata/juno.txt new file mode 100644 index 00000000..dd465272 --- /dev/null +++ b/unit_test/testdata/juno.txt @@ -0,0 +1,15 @@ +Processor : AArch64 Processor rev 0 (aarch64) +processor : 0 +processor : 1 +processor : 2 +processor : 3 +processor : 4 +processor : 5 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 +CPU implementer : 0x41 +CPU architecture: AArch64 +CPU variant : 0x0 +CPU part : 0xd07 +CPU revision : 0 + +Hardware : Juno diff --git a/unit_test/testdata/mips.txt b/unit_test/testdata/mips.txt new file mode 100644 index 00000000..d9f28cbf --- /dev/null +++ b/unit_test/testdata/mips.txt @@ -0,0 +1,7 @@ +system type : generic-loongson-machine +machine : loongson,generic +processor : 0 + +isa : mips1 mips2 mips3 mips4 mips5 mips32r1 mips32r2 mips64r1 mips64r2 +ASEs implemented : vz +shadow register sets : 1 diff --git a/unit_test/testdata/mips_loongson2k.txt b/unit_test/testdata/mips_loongson2k.txt new file mode 100644 index 00000000..8a88d38f --- /dev/null +++ b/unit_test/testdata/mips_loongson2k.txt @@ -0,0 +1,5 @@ +system type : Loongson2K-SBC +machine : loongson,LS2k1000-EVP +processor : 0 +cpu model : Loongson-2K V0.3 FPU V0.1 +BogoMIPS : 1980.41 diff --git a/unit_test/testdata/mips_loongson3.txt b/unit_test/testdata/mips_loongson3.txt new file mode 100644 index 00000000..1f540b12 --- /dev/null +++ b/unit_test/testdata/mips_loongson3.txt @@ -0,0 +1,10 @@ +system type : generic-loongson-machine +machine : Unknown +processor : 0 +cpu model : ICT Loongson-3 V0.9 FPU V0.1 +model name : ICT Loongson-3A R3 (Loongson-3A3000) @ 1500MHz +BogoMIPS : 2990.15 + +isa : mips1 mips2 mips3 mips4 mips5 mips32r1 mips32r2 mips64r1 mips64r2 +ASEs implemented : dsp dsp2 vz +shadow register sets : 1 diff --git a/unit_test/testdata/mips_loongson_mmi.txt b/unit_test/testdata/mips_loongson_mmi.txt new file mode 100644 index 00000000..0f10b8bb --- /dev/null +++ b/unit_test/testdata/mips_loongson_mmi.txt @@ -0,0 +1,7 @@ +system type : generic-loongson-machine +machine : loongson,generic +processor : 0 + +isa : mips1 mips2 mips3 mips4 mips5 mips32r1 mips32r2 mips64r1 mips64r2 +ASEs implemented : vz loongson-mmi loongson-ext +shadow register sets : 1 diff --git a/unit_test/testdata/mips_msa.txt b/unit_test/testdata/mips_msa.txt new file mode 100644 index 00000000..ac930615 --- /dev/null +++ b/unit_test/testdata/mips_msa.txt @@ -0,0 +1,7 @@ +system type : generic-loongson-machine +machine : loongson,generic +processor : 0 + +isa : mips1 mips2 mips3 mips4 mips5 mips32r1 mips32r2 mips64r1 mips64r2 +ASEs implemented : vz msa +shadow register sets : 1 diff --git a/unit_test/testdata/riscv64.txt b/unit_test/testdata/riscv64.txt new file mode 100644 index 00000000..fbb4200f --- /dev/null +++ b/unit_test/testdata/riscv64.txt @@ -0,0 +1,4 @@ +processor : 0 +hart : 1 +isa : rv64imac +mmu : sv48 \ No newline at end of file diff --git a/unit_test/testdata/riscv64_rvv.txt b/unit_test/testdata/riscv64_rvv.txt new file mode 100644 index 00000000..af1b3f36 --- /dev/null +++ b/unit_test/testdata/riscv64_rvv.txt @@ -0,0 +1,4 @@ +processor : 0 +hart : 1 +isa : rv64imafdcv +mmu : sv48 \ No newline at end of file diff --git a/unit_test/testdata/riscv64_rvv_zvfh.txt b/unit_test/testdata/riscv64_rvv_zvfh.txt new file mode 100644 index 00000000..c416c1af --- /dev/null +++ b/unit_test/testdata/riscv64_rvv_zvfh.txt @@ -0,0 +1,4 @@ +processor : 0 +hart : 1 +isa : rv64imafdcv_zfh_zvfh +mmu : sv48 \ No newline at end of file diff --git a/unit_test/testdata/tegra3.txt b/unit_test/testdata/tegra3.txt new file mode 100644 index 00000000..d1b09f6b --- /dev/null +++ b/unit_test/testdata/tegra3.txt @@ -0,0 +1,23 @@ +Processor : ARMv7 Processor rev 9 (v7l) +processor : 0 +BogoMIPS : 1992.29 + +processor : 1 +BogoMIPS : 1992.29 + +processor : 2 +BogoMIPS : 1992.29 + +processor : 3 +BogoMIPS : 1992.29 + +Features : swp half thumb fastmult vfp edsp neon vfpv3 +CPU implementer : 0×41 +CPU architecture: 7 +CPU variant : 0×2 +CPU part : 0xc09 +CPU revision : 9 + +Hardware : cardhu +Revision : 0000 + diff --git a/unit_test/testdata/test0.jpg b/unit_test/testdata/test0.jpg new file mode 100644 index 00000000..f4461a81 Binary files /dev/null and b/unit_test/testdata/test0.jpg differ diff --git a/unit_test/testdata/test1.jpg b/unit_test/testdata/test1.jpg new file mode 100644 index 00000000..a0210e9d Binary files /dev/null and b/unit_test/testdata/test1.jpg differ diff --git a/unit_test/testdata/test2.jpg b/unit_test/testdata/test2.jpg new file mode 100644 index 00000000..816ca767 Binary files /dev/null and b/unit_test/testdata/test2.jpg differ diff --git a/unit_test/testdata/test3.jpg b/unit_test/testdata/test3.jpg new file mode 100644 index 00000000..792d91dc Binary files /dev/null and b/unit_test/testdata/test3.jpg differ diff --git a/unit_test/testdata/test4.jpg b/unit_test/testdata/test4.jpg new file mode 100644 index 00000000..1ef41668 Binary files /dev/null and b/unit_test/testdata/test4.jpg differ diff --git a/unit_test/unit_test.cc b/unit_test/unit_test.cc new file mode 100644 index 00000000..b66ebfab --- /dev/null +++ b/unit_test/unit_test.cc @@ -0,0 +1,562 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "../unit_test/unit_test.h" + +#include // For getenv() + +#include + +#ifdef LIBYUV_USE_ABSL_FLAGS +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#endif +#include "libyuv/cpu_id.h" + +unsigned int fastrand_seed = 0xfb; + +#ifdef LIBYUV_USE_ABSL_FLAGS +ABSL_FLAG(int32_t, libyuv_width, 0, "width of test image."); +ABSL_FLAG(int32_t, libyuv_height, 0, "height of test image."); +ABSL_FLAG(int32_t, libyuv_repeat, 0, "number of times to repeat test."); +ABSL_FLAG(int32_t, + libyuv_flags, + 0, + "cpu flags for reference code. 1 = C, -1 = SIMD"); +ABSL_FLAG(int32_t, + libyuv_cpu_info, + 0, + "cpu flags for benchmark code. 1 = C, -1 = SIMD"); +#else +// Disable command line parameters if absl/flags disabled. +static const int32_t FLAGS_libyuv_width = 0; +static const int32_t FLAGS_libyuv_height = 0; +static const int32_t FLAGS_libyuv_repeat = 0; +static const int32_t FLAGS_libyuv_flags = 0; +static const int32_t FLAGS_libyuv_cpu_info = 0; +#endif + +#ifdef LIBYUV_USE_ABSL_FLAGS +#define LIBYUV_GET_FLAG(f) absl::GetFlag(f) +#else +#define LIBYUV_GET_FLAG(f) f +#endif + +// Test environment variable for disabling CPU features. Any non-zero value +// to disable. Zero ignored to make it easy to set the variable on/off. +#if !defined(__native_client__) && !defined(_M_ARM) +static LIBYUV_BOOL TestEnv(const char* name) { + const char* var = getenv(name); + if (var) { + if (var[0] != '0') { + return LIBYUV_TRUE; + } + } + return LIBYUV_FALSE; +} +#else // nacl does not support getenv(). +static LIBYUV_BOOL TestEnv(const char*) { + return LIBYUV_FALSE; +} +#endif + +int TestCpuEnv(int cpu_info) { +#if defined(__arm__) || defined(__aarch64__) + if (TestEnv("LIBYUV_DISABLE_NEON")) { + cpu_info &= ~libyuv::kCpuHasNEON; + } +#endif +#if defined(__mips__) && defined(__linux__) + if (TestEnv("LIBYUV_DISABLE_MSA")) { + cpu_info &= ~libyuv::kCpuHasMSA; + } +#endif +#if defined(__longarch__) && defined(__linux__) + if (TestEnv("LIBYUV_DISABLE_LSX")) { + cpu_info &= ~libyuv::kCpuHasLSX; + } +#endif +#if defined(__longarch__) && defined(__linux__) + if (TestEnv("LIBYUV_DISABLE_LASX")) { + cpu_info &= ~libyuv::kCpuHasLASX; + } +#endif +#if defined(__riscv) && defined(__linux__) + if (TestEnv("LIBYUV_DISABLE_RVV")) { + cpu_info &= ~libyuv::kCpuHasRVV; + } +#endif +#if !defined(__pnacl__) && !defined(__CLR_VER) && \ + (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ + defined(_M_IX86)) + if (TestEnv("LIBYUV_DISABLE_X86")) { + cpu_info &= ~libyuv::kCpuHasX86; + } + if (TestEnv("LIBYUV_DISABLE_SSE2")) { + cpu_info &= ~libyuv::kCpuHasSSE2; + } + if (TestEnv("LIBYUV_DISABLE_SSSE3")) { + cpu_info &= ~libyuv::kCpuHasSSSE3; + } + if (TestEnv("LIBYUV_DISABLE_SSE41")) { + cpu_info &= ~libyuv::kCpuHasSSE41; + } + if (TestEnv("LIBYUV_DISABLE_SSE42")) { + cpu_info &= ~libyuv::kCpuHasSSE42; + } + if (TestEnv("LIBYUV_DISABLE_AVX")) { + cpu_info &= ~libyuv::kCpuHasAVX; + } + if (TestEnv("LIBYUV_DISABLE_AVX2")) { + cpu_info &= ~libyuv::kCpuHasAVX2; + } + if (TestEnv("LIBYUV_DISABLE_ERMS")) { + cpu_info &= ~libyuv::kCpuHasERMS; + } + if (TestEnv("LIBYUV_DISABLE_FMA3")) { + cpu_info &= ~libyuv::kCpuHasFMA3; + } + if (TestEnv("LIBYUV_DISABLE_F16C")) { + cpu_info &= ~libyuv::kCpuHasF16C; + } + if (TestEnv("LIBYUV_DISABLE_AVX512BW")) { + cpu_info &= ~libyuv::kCpuHasAVX512BW; + } + if (TestEnv("LIBYUV_DISABLE_AVX512VL")) { + cpu_info &= ~libyuv::kCpuHasAVX512VL; + } + if (TestEnv("LIBYUV_DISABLE_AVX512VNNI")) { + cpu_info &= ~libyuv::kCpuHasAVX512VNNI; + } + if (TestEnv("LIBYUV_DISABLE_AVX512VBMI")) { + cpu_info &= ~libyuv::kCpuHasAVX512VBMI; + } + if (TestEnv("LIBYUV_DISABLE_AVX512VBMI2")) { + cpu_info &= ~libyuv::kCpuHasAVX512VBMI2; + } + if (TestEnv("LIBYUV_DISABLE_AVX512VBITALG")) { + cpu_info &= ~libyuv::kCpuHasAVX512VBITALG; + } + if (TestEnv("LIBYUV_DISABLE_AVX512VPOPCNTDQ")) { + cpu_info &= ~libyuv::kCpuHasAVX512VPOPCNTDQ; + } + if (TestEnv("LIBYUV_DISABLE_GFNI")) { + cpu_info &= ~libyuv::kCpuHasGFNI; + } +#endif + if (TestEnv("LIBYUV_DISABLE_ASM")) { + cpu_info = libyuv::kCpuInitialized; + } + return cpu_info; +} + +// For quicker unittests, default is 128 x 72. But when benchmarking, +// default to 720p. Allow size to specify. +// Set flags to -1 for benchmarking to avoid slower C code. + +LibYUVConvertTest::LibYUVConvertTest() + : benchmark_iterations_(1), + benchmark_width_(128), + benchmark_height_(72), + disable_cpu_flags_(1), + benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) { + benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat); + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) { + benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width); + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) { + benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height); + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) { + disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags); + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) { + benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info); + } + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); + benchmark_pixels_div1280_ = + static_cast((static_cast(Abs(benchmark_width_)) * + static_cast(Abs(benchmark_height_)) * + static_cast(benchmark_iterations_) + + 1279.0) / + 1280.0); +} + +LibYUVColorTest::LibYUVColorTest() + : benchmark_iterations_(1), + benchmark_width_(128), + benchmark_height_(72), + disable_cpu_flags_(1), + benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) { + benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat); + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) { + benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width); + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) { + benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height); + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) { + disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags); + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) { + benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info); + } + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); + benchmark_pixels_div1280_ = + static_cast((static_cast(Abs(benchmark_width_)) * + static_cast(Abs(benchmark_height_)) * + static_cast(benchmark_iterations_) + + 1279.0) / + 1280.0); +} + +LibYUVScaleTest::LibYUVScaleTest() + : benchmark_iterations_(1), + benchmark_width_(128), + benchmark_height_(72), + disable_cpu_flags_(1), + benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) { + benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat); + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) { + benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width); + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) { + benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height); + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) { + disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags); + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) { + benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info); + } + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); + benchmark_pixels_div1280_ = + static_cast((static_cast(Abs(benchmark_width_)) * + static_cast(Abs(benchmark_height_)) * + static_cast(benchmark_iterations_) + + 1279.0) / + 1280.0); +} + +LibYUVRotateTest::LibYUVRotateTest() + : benchmark_iterations_(1), + benchmark_width_(128), + benchmark_height_(72), + disable_cpu_flags_(1), + benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) { + benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat); + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) { + benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width); + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) { + benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height); + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) { + disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags); + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) { + benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info); + } + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); + benchmark_pixels_div1280_ = + static_cast((static_cast(Abs(benchmark_width_)) * + static_cast(Abs(benchmark_height_)) * + static_cast(benchmark_iterations_) + + 1279.0) / + 1280.0); +} + +LibYUVPlanarTest::LibYUVPlanarTest() + : benchmark_iterations_(1), + benchmark_width_(128), + benchmark_height_(72), + disable_cpu_flags_(1), + benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) { + benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat); + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) { + benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width); + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) { + benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height); + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) { + disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags); + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) { + benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info); + } + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); + benchmark_pixels_div1280_ = + static_cast((static_cast(Abs(benchmark_width_)) * + static_cast(Abs(benchmark_height_)) * + static_cast(benchmark_iterations_) + + 1279.0) / + 1280.0); +} + +LibYUVBaseTest::LibYUVBaseTest() + : benchmark_iterations_(1), + benchmark_width_(128), + benchmark_height_(72), + disable_cpu_flags_(1), + benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) { + benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat); + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) { + benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width); + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) { + benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height); + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) { + disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags); + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) { + benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info); + } + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); + benchmark_pixels_div1280_ = + static_cast((static_cast(Abs(benchmark_width_)) * + static_cast(Abs(benchmark_height_)) * + static_cast(benchmark_iterations_) + + 1279.0) / + 1280.0); +} + +LibYUVCompareTest::LibYUVCompareTest() + : benchmark_iterations_(1), + benchmark_width_(128), + benchmark_height_(72), + disable_cpu_flags_(1), + benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_repeat)) { + benchmark_iterations_ = LIBYUV_GET_FLAG(FLAGS_libyuv_repeat); + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_width)) { + benchmark_width_ = LIBYUV_GET_FLAG(FLAGS_libyuv_width); + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_height)) { + benchmark_height_ = LIBYUV_GET_FLAG(FLAGS_libyuv_height); + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_flags)) { + disable_cpu_flags_ = LIBYUV_GET_FLAG(FLAGS_libyuv_flags); + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info)) { + benchmark_cpu_info_ = LIBYUV_GET_FLAG(FLAGS_libyuv_cpu_info); + } + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); + benchmark_pixels_div1280_ = + static_cast((static_cast(Abs(benchmark_width_)) * + static_cast(Abs(benchmark_height_)) * + static_cast(benchmark_iterations_) + + 1279.0) / + 1280.0); +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); +#ifdef LIBYUV_USE_ABSL_FLAGS + absl::ParseCommandLine(argc, argv); +#endif + return RUN_ALL_TESTS(); +} diff --git a/unit_test/unit_test.h b/unit_test/unit_test.h new file mode 100644 index 00000000..99cc8d19 --- /dev/null +++ b/unit_test/unit_test.h @@ -0,0 +1,223 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef UNIT_TEST_UNIT_TEST_H_ // NOLINT +#define UNIT_TEST_UNIT_TEST_H_ + +#include // For NULL +#ifdef _WIN32 +#include +#else +#include +#endif + +#include + +#include "libyuv/basic_types.h" + +#ifndef SIMD_ALIGNED +#if defined(_MSC_VER) && !defined(__CLR_VER) +#define SIMD_ALIGNED(var) __declspec(align(16)) var +#elif defined(__GNUC__) && !defined(__pnacl__) +#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) +#else +#define SIMD_ALIGNED(var) var +#endif +#endif + +static __inline int Abs(int v) { + return v >= 0 ? v : -v; +} + +static __inline float FAbs(float v) { + return v >= 0 ? v : -v; +} +#define OFFBY 0 + +// Scaling uses 16.16 fixed point to step thru the source image, so a +// maximum size of 32767.999 can be expressed. 32768 is valid because +// the step is 1 beyond the image but not used. +// Destination size is mainly constrained by valid scale step not the +// absolute size, so it may be possible to relax the destination size +// constraint. +// Source size is unconstrained for most specialized scalers. e.g. +// An image of 65536 scaled to half size would be valid. The test +// could be relaxed for special scale factors. +// If this test is removed, the scaling function should gracefully +// fail with a return code. The test could be changed to know that +// libyuv failed in a controlled way. + +static const int kMaxWidth = 32768; +static const int kMaxHeight = 32768; + +static inline bool SizeValid(int src_width, + int src_height, + int dst_width, + int dst_height) { + if (src_width > kMaxWidth || src_height > kMaxHeight || + dst_width > kMaxWidth || dst_height > kMaxHeight) { + printf("Warning - size too large to test. Skipping\n"); + return false; + } + return true; +} + +#define align_buffer_page_end(var, size) \ + uint8_t* var##_mem = \ + reinterpret_cast(malloc(((size) + 4095 + 63) & ~4095)); \ + uint8_t* var = reinterpret_cast( \ + (intptr_t)(var##_mem + (((size) + 4095 + 63) & ~4095) - (size)) & ~63) + +#define free_aligned_buffer_page_end(var) \ + free(var##_mem); \ + var = NULL + +#define align_buffer_page_end_16(var, size) \ + uint8_t* var##_mem = \ + reinterpret_cast(malloc(((size)*2 + 4095 + 63) & ~4095)); \ + uint16_t* var = reinterpret_cast( \ + (intptr_t)(var##_mem + (((size)*2 + 4095 + 63) & ~4095) - (size)*2) & \ + ~63) + +#define free_aligned_buffer_page_end_16(var) \ + free(var##_mem); \ + var = NULL + +#ifdef WIN32 +static inline double get_time() { + LARGE_INTEGER t, f; + QueryPerformanceCounter(&t); + QueryPerformanceFrequency(&f); + return static_cast(t.QuadPart) / static_cast(f.QuadPart); +} +#else +static inline double get_time() { + struct timeval t; + struct timezone tzp; + gettimeofday(&t, &tzp); + return t.tv_sec + t.tv_usec * 1e-6; +} +#endif + +#ifndef SIMD_ALIGNED +#if defined(_MSC_VER) && !defined(__CLR_VER) +#define SIMD_ALIGNED(var) __declspec(align(16)) var +#elif defined(__GNUC__) && !defined(__pnacl__) +#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) +#else +#define SIMD_ALIGNED(var) var +#endif +#endif + +extern unsigned int fastrand_seed; +inline int fastrand() { + fastrand_seed = fastrand_seed * 214013u + 2531011u; + return static_cast((fastrand_seed >> 16) & 0xffff); +} + +// ubsan fails if dst is unaligned unless we use uint8 +static inline void MemRandomize(uint8_t* dst, int64_t len) { + int64_t i; + for (i = 0; i < len - 1; i += 2) { + int r = fastrand(); + dst[0] = static_cast(r); + dst[1] = static_cast(r >> 8); + dst += 2; + } + for (; i < len; ++i) { + *dst++ = fastrand(); + } +} + +class LibYUVColorTest : public ::testing::Test { + protected: + LibYUVColorTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +class LibYUVConvertTest : public ::testing::Test { + protected: + LibYUVConvertTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +class LibYUVScaleTest : public ::testing::Test { + protected: + LibYUVScaleTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +class LibYUVRotateTest : public ::testing::Test { + protected: + LibYUVRotateTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +class LibYUVPlanarTest : public ::testing::Test { + protected: + LibYUVPlanarTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +class LibYUVBaseTest : public ::testing::Test { + protected: + LibYUVBaseTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +class LibYUVCompareTest : public ::testing::Test { + protected: + LibYUVCompareTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +#endif // UNIT_TEST_UNIT_TEST_H_ NOLINT diff --git a/unit_test/video_common_test.cc b/unit_test/video_common_test.cc new file mode 100644 index 00000000..36728ea9 --- /dev/null +++ b/unit_test/video_common_test.cc @@ -0,0 +1,112 @@ +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "../unit_test/unit_test.h" +#include "libyuv/video_common.h" + +namespace libyuv { + +// Tests FourCC codes in video common, which are used for ConvertToI420(). + +static bool TestValidChar(uint32_t onecc) { + return (onecc >= '0' && onecc <= '9') || (onecc >= 'A' && onecc <= 'Z') || + (onecc >= 'a' && onecc <= 'z') || (onecc == ' ') || (onecc == 0xff); +} + +static bool TestValidFourCC(uint32_t fourcc, int bpp) { + if (!TestValidChar(fourcc & 0xff) || !TestValidChar((fourcc >> 8) & 0xff) || + !TestValidChar((fourcc >> 16) & 0xff) || + !TestValidChar((fourcc >> 24) & 0xff)) { + return false; + } + if (bpp < 0 || bpp > 64) { + return false; + } + return true; +} + +TEST_F(LibYUVBaseTest, TestCanonicalFourCC) { + EXPECT_EQ(static_cast(FOURCC_I420), CanonicalFourCC(FOURCC_IYUV)); + EXPECT_EQ(static_cast(FOURCC_I420), CanonicalFourCC(FOURCC_YU12)); + EXPECT_EQ(static_cast(FOURCC_I422), CanonicalFourCC(FOURCC_YU16)); + EXPECT_EQ(static_cast(FOURCC_I444), CanonicalFourCC(FOURCC_YU24)); + EXPECT_EQ(static_cast(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUYV)); + EXPECT_EQ(static_cast(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUVS)); + EXPECT_EQ(static_cast(FOURCC_UYVY), CanonicalFourCC(FOURCC_HDYC)); + EXPECT_EQ(static_cast(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY)); + EXPECT_EQ(static_cast(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG)); + EXPECT_EQ(static_cast(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1)); + EXPECT_EQ(static_cast(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3)); + EXPECT_EQ(static_cast(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3)); + EXPECT_EQ(static_cast(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32)); + EXPECT_EQ(static_cast(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24)); + EXPECT_EQ(static_cast(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555)); + EXPECT_EQ(static_cast(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565)); + EXPECT_EQ(static_cast(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551)); +} + +TEST_F(LibYUVBaseTest, TestFourCC) { + EXPECT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I422, FOURCC_BPP_I422)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I444, FOURCC_BPP_I444)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I400, FOURCC_BPP_I400)); + EXPECT_TRUE(TestValidFourCC(FOURCC_NV21, FOURCC_BPP_NV21)); + EXPECT_TRUE(TestValidFourCC(FOURCC_NV12, FOURCC_BPP_NV12)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YUY2, FOURCC_BPP_YUY2)); + EXPECT_TRUE(TestValidFourCC(FOURCC_UYVY, FOURCC_BPP_UYVY)); + EXPECT_TRUE(TestValidFourCC(FOURCC_M420, FOURCC_BPP_M420)); // deprecated. + EXPECT_TRUE(TestValidFourCC(FOURCC_Q420, FOURCC_BPP_Q420)); // deprecated. + EXPECT_TRUE(TestValidFourCC(FOURCC_ARGB, FOURCC_BPP_ARGB)); + EXPECT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA)); + EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AB30, FOURCC_BPP_AB30)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AR64, FOURCC_BPP_AR64)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AB64, FOURCC_BPP_AB64)); + EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGBP, FOURCC_BPP_RGBP)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO)); + EXPECT_TRUE(TestValidFourCC(FOURCC_R444, FOURCC_BPP_R444)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H422, FOURCC_BPP_H422)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H210, FOURCC_BPP_H210)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I010, FOURCC_BPP_I010)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I210, FOURCC_BPP_I210)); + EXPECT_TRUE(TestValidFourCC(FOURCC_P010, FOURCC_BPP_P010)); + EXPECT_TRUE(TestValidFourCC(FOURCC_P210, FOURCC_BPP_P210)); + EXPECT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YV24, FOURCC_BPP_YV24)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YU12, FOURCC_BPP_YU12)); + EXPECT_TRUE(TestValidFourCC(FOURCC_IYUV, FOURCC_BPP_IYUV)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YU16, FOURCC_BPP_YU16)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YU24, FOURCC_BPP_YU24)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YUYV, FOURCC_BPP_YUYV)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YUVS, FOURCC_BPP_YUVS)); + EXPECT_TRUE(TestValidFourCC(FOURCC_HDYC, FOURCC_BPP_HDYC)); + EXPECT_TRUE(TestValidFourCC(FOURCC_2VUY, FOURCC_BPP_2VUY)); + EXPECT_TRUE(TestValidFourCC(FOURCC_JPEG, FOURCC_BPP_JPEG)); + EXPECT_TRUE(TestValidFourCC(FOURCC_DMB1, FOURCC_BPP_DMB1)); + EXPECT_TRUE(TestValidFourCC(FOURCC_BA81, FOURCC_BPP_BA81)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGB3, FOURCC_BPP_RGB3)); + EXPECT_TRUE(TestValidFourCC(FOURCC_BGR3, FOURCC_BPP_BGR3)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H264, FOURCC_BPP_H264)); + EXPECT_TRUE(TestValidFourCC(FOURCC_ANY, FOURCC_BPP_ANY)); +} + +} // namespace libyuv -- cgit v1.2.3