diff options
author | Hangyu Kuang <hkuang@google.com> | 2016-07-06 14:21:45 -0700 |
---|---|---|
committer | Hangyu Kuang <hkuang@google.com> | 2016-07-08 09:51:10 -0700 |
commit | f047e7ca6983218eed7703c7afd51fed7bd3b5c9 (patch) | |
tree | 2667579566b6270c21ee4b495b4cd119af5ccf5b /files/unit_test | |
parent | bb74e3e19b98261031216de8cadcef34cccd9e4a (diff) | |
download | libyuv-f047e7ca6983218eed7703c7afd51fed7bd3b5c9.tar.gz |
Update libyuv to r1602 version to get best performance.android-cts_7.1_r1android-cts-7.1_r9android-cts-7.1_r8android-cts-7.1_r7android-cts-7.1_r6android-cts-7.1_r5android-cts-7.1_r4android-cts-7.1_r3android-cts-7.1_r29android-cts-7.1_r28android-cts-7.1_r27android-cts-7.1_r26android-cts-7.1_r25android-cts-7.1_r24android-cts-7.1_r23android-cts-7.1_r22android-cts-7.1_r21android-cts-7.1_r20android-cts-7.1_r2android-cts-7.1_r19android-cts-7.1_r18android-cts-7.1_r17android-cts-7.1_r16android-cts-7.1_r15android-cts-7.1_r14android-cts-7.1_r13android-cts-7.1_r12android-cts-7.1_r11android-cts-7.1_r10android-cts-7.1_r1android-7.1.2_r9android-7.1.2_r8android-7.1.2_r6android-7.1.2_r5android-7.1.2_r4android-7.1.2_r39android-7.1.2_r38android-7.1.2_r37android-7.1.2_r36android-7.1.2_r33android-7.1.2_r32android-7.1.2_r30android-7.1.2_r3android-7.1.2_r29android-7.1.2_r28android-7.1.2_r27android-7.1.2_r25android-7.1.2_r24android-7.1.2_r23android-7.1.2_r2android-7.1.2_r19android-7.1.2_r18android-7.1.2_r17android-7.1.2_r16android-7.1.2_r15android-7.1.2_r14android-7.1.2_r13android-7.1.2_r12android-7.1.2_r11android-7.1.2_r10android-7.1.2_r1android-7.1.1_r9android-7.1.1_r8android-7.1.1_r7android-7.1.1_r61android-7.1.1_r60android-7.1.1_r6android-7.1.1_r59android-7.1.1_r58android-7.1.1_r57android-7.1.1_r56android-7.1.1_r55android-7.1.1_r54android-7.1.1_r53android-7.1.1_r52android-7.1.1_r51android-7.1.1_r50android-7.1.1_r49android-7.1.1_r48android-7.1.1_r47android-7.1.1_r46android-7.1.1_r45android-7.1.1_r44android-7.1.1_r43android-7.1.1_r42android-7.1.1_r41android-7.1.1_r40android-7.1.1_r4android-7.1.1_r39android-7.1.1_r38android-7.1.1_r35android-7.1.1_r33android-7.1.1_r32android-7.1.1_r31android-7.1.1_r3android-7.1.1_r28android-7.1.1_r27android-7.1.1_r26android-7.1.1_r25android-7.1.1_r24android-7.1.1_r23android-7.1.1_r22android-7.1.1_r21android-7.1.1_r20android-7.1.1_r2android-7.1.1_r17android-7.1.1_r16android-7.1.1_r15android-7.1.1_r14android-7.1.1_r13android-7.1.1_r12android-7.1.1_r11android-7.1.1_r10android-7.1.1_r1android-7.1.0_r7android-7.1.0_r6android-7.1.0_r5android-7.1.0_r4android-7.1.0_r3android-7.1.0_r2android-7.1.0_r1nougat-mr2.3-releasenougat-mr2.2-releasenougat-mr2.1-releasenougat-mr2-security-releasenougat-mr2-releasenougat-mr2-pixel-releasenougat-mr2-devnougat-mr1.8-releasenougat-mr1.7-releasenougat-mr1.6-releasenougat-mr1.5-releasenougat-mr1.4-releasenougat-mr1.3-releasenougat-mr1.2-releasenougat-mr1.1-releasenougat-mr1-volantis-releasenougat-mr1-security-releasenougat-mr1-releasenougat-mr1-flounder-releasenougat-mr1-devnougat-mr1-cts-releasenougat-dr1-release
Bug: 29870647
Change-Id: I8ec9fab7f55765fa33ebe7ba1c7ad2147f418de2
Diffstat (limited to 'files/unit_test')
-rw-r--r-- | files/unit_test/basictypes_test.cc | 60 | ||||
-rw-r--r-- | files/unit_test/color_test.cc | 570 | ||||
-rw-r--r-- | files/unit_test/compare_test.cc | 419 | ||||
-rw-r--r-- | files/unit_test/convert_test.cc | 1861 | ||||
-rw-r--r-- | files/unit_test/cpu_test.cc | 71 | ||||
-rw-r--r-- | files/unit_test/math_test.cc | 155 | ||||
-rw-r--r-- | files/unit_test/planar_test.cc | 2520 | ||||
-rw-r--r-- | files/unit_test/rotate_argb_test.cc | 287 | ||||
-rw-r--r-- | files/unit_test/rotate_test.cc | 1725 | ||||
-rw-r--r-- | files/unit_test/scale_argb_test.cc | 515 | ||||
-rw-r--r-- | files/unit_test/scale_test.cc | 494 | ||||
-rw-r--r-- | files/unit_test/testdata/juno.txt | 15 | ||||
-rw-r--r-- | files/unit_test/unit_test.cc | 339 | ||||
-rw-r--r-- | files/unit_test/unit_test.h | 183 | ||||
-rw-r--r-- | files/unit_test/version_test.cc | 42 | ||||
-rw-r--r-- | files/unit_test/video_common_test.cc | 107 |
16 files changed, 6473 insertions, 2890 deletions
diff --git a/files/unit_test/basictypes_test.cc b/files/unit_test/basictypes_test.cc new file mode 100644 index 00000000..89f7644d --- /dev/null +++ b/files/unit_test/basictypes_test.cc @@ -0,0 +1,60 @@ +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "../unit_test/unit_test.h" +#include "libyuv/basic_types.h" + +namespace libyuv { + +TEST_F(LibYUVBaseTest, Endian) { + uint16 v16 = 0x1234u; + uint8 first_byte = *reinterpret_cast<uint8*>(&v16); +#if defined(LIBYUV_LITTLE_ENDIAN) + EXPECT_EQ(0x34u, first_byte); +#else + EXPECT_EQ(0x12u, first_byte); +#endif +} + +TEST_F(LibYUVBaseTest, SizeOfTypes) { + int8 i8 = -1; + uint8 u8 = 1u; + int16 i16 = -1; + uint16 u16 = 1u; + int32 i32 = -1; + uint32 u32 = 1u; + int64 i64 = -1; + uint64 u64 = 1u; + EXPECT_EQ(1u, sizeof(i8)); + EXPECT_EQ(1u, sizeof(u8)); + EXPECT_EQ(2u, sizeof(i16)); + EXPECT_EQ(2u, sizeof(u16)); + EXPECT_EQ(4u, sizeof(i32)); + EXPECT_EQ(4u, sizeof(u32)); + EXPECT_EQ(8u, sizeof(i64)); + EXPECT_EQ(8u, sizeof(u64)); + EXPECT_GT(0, i8); + EXPECT_LT(0u, u8); + EXPECT_GT(0, i16); + EXPECT_LT(0u, u16); + EXPECT_GT(0, i32); + EXPECT_LT(0u, u32); + EXPECT_GT(0, i64); + EXPECT_LT(0u, u64); +} + +TEST_F(LibYUVBaseTest, SizeOfConstants) { + EXPECT_EQ(8u, sizeof(INT64_C(0))); + EXPECT_EQ(8u, sizeof(UINT64_C(0))); + EXPECT_EQ(8u, sizeof(INT64_C(0x1234567887654321))); + EXPECT_EQ(8u, sizeof(UINT64_C(0x8765432112345678))); +} + +} // namespace libyuv diff --git a/files/unit_test/color_test.cc b/files/unit_test/color_test.cc new file mode 100644 index 00000000..36041d99 --- /dev/null +++ b/files/unit_test/color_test.cc @@ -0,0 +1,570 @@ +/* + * Copyright 2015 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> + +#include "libyuv/basic_types.h" +#include "libyuv/convert.h" +#include "libyuv/convert_argb.h" +#include "libyuv/convert_from.h" +#include "libyuv/convert_from_argb.h" +#include "libyuv/cpu_id.h" +#include "../unit_test/unit_test.h" + +namespace libyuv { + +// TODO(fbarchard): Port high accuracy YUV to RGB to Neon. +#if !defined(LIBYUV_DISABLE_NEON) && \ + (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) +#define ERROR_R 1 +#define ERROR_G 1 +#define ERROR_B 3 +#define ERROR_FULL 6 +#define ERROR_J420 5 +#else +#define ERROR_R 1 +#define ERROR_G 1 +#define ERROR_B 3 +#define ERROR_FULL 5 +#define ERROR_J420 3 +#endif + +#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF) \ + TEST_F(LibYUVColorTest, TESTNAME) { \ + const int kPixels = benchmark_width_ * benchmark_height_; \ + const int kHalfPixels = ((benchmark_width_ + 1) / 2) * \ + ((benchmark_height_ + HS1) / HS); \ + align_buffer_page_end(orig_y, kPixels); \ + align_buffer_page_end(orig_u, kHalfPixels); \ + align_buffer_page_end(orig_v, kHalfPixels); \ + align_buffer_page_end(orig_pixels, kPixels * 4); \ + align_buffer_page_end(temp_y, kPixels); \ + align_buffer_page_end(temp_u, kHalfPixels); \ + align_buffer_page_end(temp_v, kHalfPixels); \ + align_buffer_page_end(dst_pixels_opt, kPixels * 4); \ + align_buffer_page_end(dst_pixels_c, kPixels * 4); \ + \ + MemRandomize(orig_pixels, kPixels * 4); \ + MemRandomize(orig_y, kPixels); \ + MemRandomize(orig_u, kHalfPixels); \ + MemRandomize(orig_v, kHalfPixels); \ + MemRandomize(temp_y, kPixels); \ + MemRandomize(temp_u, kHalfPixels); \ + MemRandomize(temp_v, kHalfPixels); \ + MemRandomize(dst_pixels_opt, kPixels * 4); \ + MemRandomize(dst_pixels_c, kPixels * 4); \ + \ + /* The test is overall for color conversion matrix being reversible, so */ \ + /* this initializes the pixel with 2x2 blocks to eliminate subsampling. */ \ + uint8* p = orig_y; \ + for (int y = 0; y < benchmark_height_ - HS1; y += HS) { \ + for (int x = 0; x < benchmark_width_ - 1; x += 2) { \ + uint8 r = static_cast<uint8>(fastrand()); \ + p[0] = r; \ + p[1] = r; \ + p[HN] = r; \ + p[HN + 1] = r; \ + p += 2; \ + } \ + if (benchmark_width_ & 1) { \ + uint8 r = static_cast<uint8>(fastrand()); \ + p[0] = r; \ + p[HN] = r; \ + p += 1; \ + } \ + p += HN; \ + } \ + if ((benchmark_height_ & 1) && HS == 2) { \ + for (int x = 0; x < benchmark_width_ - 1; x += 2) { \ + uint8 r = static_cast<uint8>(fastrand()); \ + p[0] = r; \ + p[1] = r; \ + p += 2; \ + } \ + if (benchmark_width_ & 1) { \ + uint8 r = static_cast<uint8>(fastrand()); \ + p[0] = r; \ + p += 1; \ + } \ + } \ + /* Start with YUV converted to ARGB. */ \ + YUVTOARGB(orig_y, benchmark_width_, \ + orig_u, (benchmark_width_ + 1) / 2, \ + orig_v, (benchmark_width_ + 1) / 2, \ + orig_pixels, benchmark_width_ * 4, \ + benchmark_width_, benchmark_height_); \ + \ + ARGBTOYUV(orig_pixels, benchmark_width_ * 4, \ + temp_y, benchmark_width_, \ + temp_u, (benchmark_width_ + 1) / 2, \ + temp_v, (benchmark_width_ + 1) / 2, \ + benchmark_width_, benchmark_height_); \ + \ + MaskCpuFlags(disable_cpu_flags_); \ + YUVTOARGB(temp_y, benchmark_width_, \ + temp_u, (benchmark_width_ + 1) / 2, \ + temp_v, (benchmark_width_ + 1) / 2, \ + dst_pixels_c, benchmark_width_ * 4, \ + benchmark_width_, benchmark_height_); \ + MaskCpuFlags(benchmark_cpu_info_); \ + \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + YUVTOARGB(temp_y, benchmark_width_, \ + temp_u, (benchmark_width_ + 1) / 2, \ + temp_v, (benchmark_width_ + 1) / 2, \ + dst_pixels_opt, benchmark_width_ * 4, \ + benchmark_width_, benchmark_height_); \ + } \ + /* Test C and SIMD match. */ \ + for (int i = 0; i < kPixels * 4; ++i) { \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + } \ + /* Test SIMD is close to original. */ \ + for (int i = 0; i < kPixels * 4; ++i) { \ + EXPECT_NEAR(static_cast<int>(orig_pixels[i]), \ + static_cast<int>(dst_pixels_opt[i]), DIFF); \ + } \ + \ + free_aligned_buffer_page_end(orig_pixels); \ + free_aligned_buffer_page_end(orig_y); \ + free_aligned_buffer_page_end(orig_u); \ + free_aligned_buffer_page_end(orig_v); \ + free_aligned_buffer_page_end(temp_y); \ + free_aligned_buffer_page_end(temp_u); \ + free_aligned_buffer_page_end(temp_v); \ + free_aligned_buffer_page_end(dst_pixels_opt); \ + free_aligned_buffer_page_end(dst_pixels_c); \ +} \ + +TESTCS(TestI420, I420ToARGB, ARGBToI420, 1, 2, benchmark_width_, ERROR_FULL) +TESTCS(TestI422, I422ToARGB, ARGBToI422, 0, 1, 0, ERROR_FULL) +TESTCS(TestJ420, J420ToARGB, ARGBToJ420, 1, 2, benchmark_width_, ERROR_J420) +TESTCS(TestJ422, J422ToARGB, ARGBToJ422, 0, 1, 0, ERROR_J420) + +static void YUVToRGB(int y, int u, int v, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); + + SIMD_ALIGNED(uint8 orig_y[16]); + SIMD_ALIGNED(uint8 orig_u[8]); + SIMD_ALIGNED(uint8 orig_v[8]); + SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + memset(orig_u, u, kHalfPixels); + memset(orig_v, v, kHalfPixels); + + /* YUV converted to ARGB. */ + I422ToARGB(orig_y, kWidth, + orig_u, (kWidth + 1) / 2, + orig_v, (kWidth + 1) / 2, + orig_pixels, kWidth * 4, + kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +static void YUVJToRGB(int y, int u, int v, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); + + SIMD_ALIGNED(uint8 orig_y[16]); + SIMD_ALIGNED(uint8 orig_u[8]); + SIMD_ALIGNED(uint8 orig_v[8]); + SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + memset(orig_u, u, kHalfPixels); + memset(orig_v, v, kHalfPixels); + + /* YUV converted to ARGB. */ + J422ToARGB(orig_y, kWidth, + orig_u, (kWidth + 1) / 2, + orig_v, (kWidth + 1) / 2, + orig_pixels, kWidth * 4, + kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +static void YToRGB(int y, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + + SIMD_ALIGNED(uint8 orig_y[16]); + SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + + /* YUV converted to ARGB. */ + I400ToARGB(orig_y, kWidth, orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +static void YJToRGB(int y, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + + SIMD_ALIGNED(uint8 orig_y[16]); + SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + + /* YUV converted to ARGB. */ + J400ToARGB(orig_y, kWidth, orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +// Pick a method for clamping. +// #define CLAMPMETHOD_IF 1 +// #define CLAMPMETHOD_TABLE 1 +#define CLAMPMETHOD_TERNARY 1 +// #define CLAMPMETHOD_MASK 1 + +// Pick a method for rounding. +#define ROUND(f) static_cast<int>(f + 0.5f) +// #define ROUND(f) lrintf(f) +// #define ROUND(f) static_cast<int>(round(f)) +// #define ROUND(f) _mm_cvt_ss2si(_mm_load_ss(&f)) + +#if defined(CLAMPMETHOD_IF) +static int RoundToByte(float f) { + int i = ROUND(f); + if (i < 0) { + i = 0; + } + if (i > 255) { + i = 255; + } + return i; +} +#elif defined(CLAMPMETHOD_TABLE) +static const unsigned char clamptable[811] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, + 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, + 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, + 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, + 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, + 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, + 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, + 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, + 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, + 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, + 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, + 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 +}; + +static int RoundToByte(float f) { + return clamptable[ROUND(f) + 276]; +} +#elif defined(CLAMPMETHOD_TERNARY) +static int RoundToByte(float f) { + int i = ROUND(f); + return (i < 0) ? 0 : ((i > 255) ? 255 : i); +} +#elif defined(CLAMPMETHOD_MASK) +static int RoundToByte(float f) { + int i = ROUND(f); + i = ((-(i) >> 31) & (i)); // clamp to 0. + return (((255 - (i)) >> 31) | (i)) & 255; // clamp to 255. +} +#endif + +#define RANDOM256(s) ((s & 1) ? ((s >> 1) ^ 0xb8) : (s >> 1)) + +TEST_F(LibYUVColorTest, TestRoundToByte) { + int allb = 0; + int count = benchmark_width_ * benchmark_height_; + for (int i = 0; i < benchmark_iterations_; ++i) { + float f = (fastrand() & 255) * 3.14f - 260.f; + for (int j = 0; j < count; ++j) { + int b = RoundToByte(f); + f += 0.91f; + allb |= b; + } + } + EXPECT_GE(allb, 0); + EXPECT_LE(allb, 255); +} + +static void YUVToRGBReference(int y, int u, int v, int* r, int* g, int* b) { + *r = RoundToByte((y - 16) * 1.164 - (v - 128) * -1.596); + *g = RoundToByte((y - 16) * 1.164 - (u - 128) * 0.391 - (v - 128) * 0.813); + *b = RoundToByte((y - 16) * 1.164 - (u - 128) * -2.018); +} + +static void YUVJToRGBReference(int y, int u, int v, int* r, int* g, int* b) { + *r = RoundToByte(y - (v - 128) * -1.40200); + *g = RoundToByte(y - (u - 128) * 0.34414 - (v - 128) * 0.71414); + *b = RoundToByte(y - (u - 128) * -1.77200); +} + +TEST_F(LibYUVColorTest, TestYUV) { + int r0, g0, b0, r1, g1, b1; + + // cyan (less red) + YUVToRGBReference(240, 255, 0, &r0, &g0, &b0); + EXPECT_EQ(56, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(255, b0); + + YUVToRGB(240, 255, 0, &r1, &g1, &b1); + EXPECT_EQ(57, r1); + EXPECT_EQ(255, g1); + EXPECT_EQ(255, b1); + + // green (less red and blue) + YUVToRGBReference(240, 0, 0, &r0, &g0, &b0); + EXPECT_EQ(56, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(2, b0); + + YUVToRGB(240, 0, 0, &r1, &g1, &b1); + EXPECT_EQ(57, r1); + EXPECT_EQ(255, g1); + EXPECT_EQ(5, b1); + + for (int i = 0; i < 256; ++i) { + YUVToRGBReference(i, 128, 128, &r0, &g0, &b0); + YUVToRGB(i, 128, 128, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + + YUVToRGBReference(i, 0, 0, &r0, &g0, &b0); + YUVToRGB(i, 0, 0, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + + YUVToRGBReference(i, 0, 255, &r0, &g0, &b0); + YUVToRGB(i, 0, 255, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + } +} + +TEST_F(LibYUVColorTest, TestGreyYUV) { + int r0, g0, b0, r1, g1, b1, r2, g2, b2; + + // black + YUVToRGBReference(16, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(0, r0); + EXPECT_EQ(0, g0); + EXPECT_EQ(0, b0); + + YUVToRGB(16, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(0, r1); + EXPECT_EQ(0, g1); + EXPECT_EQ(0, b1); + + // white + YUVToRGBReference(240, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(255, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(255, b0); + + YUVToRGB(240, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(255, r1); + EXPECT_EQ(255, g1); + EXPECT_EQ(255, b1); + + // grey + YUVToRGBReference(128, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(130, r0); + EXPECT_EQ(130, g0); + EXPECT_EQ(130, b0); + + YUVToRGB(128, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(130, r1); + EXPECT_EQ(130, g1); + EXPECT_EQ(130, b1); + + + for (int y = 0; y < 256; ++y) { + YUVToRGBReference(y, 128, 128, &r0, &g0, &b0); + YUVToRGB(y, 128, 128, &r1, &g1, &b1); + YToRGB(y, &r2, &g2, &b2); + EXPECT_EQ(r0, r1); + EXPECT_EQ(g0, g1); + EXPECT_EQ(b0, b1); + EXPECT_EQ(r0, r2); + EXPECT_EQ(g0, g2); + EXPECT_EQ(b0, b2); + } +} + +static void PrintHistogram(int rh[256], int gh[256], int bh[256]) { + int i; + printf("hist"); + for (i = 0; i < 256; ++i) { + if (rh[i] || gh[i] || bh[i]) { + printf("\t%8d", i - 128); + } + } + printf("\nred"); + for (i = 0; i < 256; ++i) { + if (rh[i] || gh[i] || bh[i]) { + printf("\t%8d", rh[i]); + } + } + printf("\ngreen"); + for (i = 0; i < 256; ++i) { + if (rh[i] || gh[i] || bh[i]) { + printf("\t%8d", gh[i]); + } + } + printf("\nblue"); + for (i = 0; i < 256; ++i) { + if (rh[i] || gh[i] || bh[i]) { + printf("\t%8d", bh[i]); + } + } + printf("\n"); +} + +TEST_F(LibYUVColorTest, TestFullYUV) { + int rh[256] = { 0, }, gh[256] = { 0, }, bh[256] = { 0, }; + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { + for (int y2 = 0; y2 < 256; ++y2) { + int r0, g0, b0, r1, g1, b1; + int y = RANDOM256(y2); + YUVToRGBReference(y, u, v, &r0, &g0, &b0); + YUVToRGB(y, u, v, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); + ++rh[r1 - r0 + 128]; + ++gh[g1 - g0 + 128]; + ++bh[b1 - b0 + 128]; + } + } + } + PrintHistogram(rh, gh, bh); +} + +TEST_F(LibYUVColorTest, TestFullYUVJ) { + int rh[256] = { 0, }, gh[256] = { 0, }, bh[256] = { 0, }; + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { + for (int y2 = 0; y2 < 256; ++y2) { + int r0, g0, b0, r1, g1, b1; + int y = RANDOM256(y2); + YUVJToRGBReference(y, u, v, &r0, &g0, &b0); + YUVJToRGB(y, u, v, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, 1); + EXPECT_NEAR(g0, g1, 1); + EXPECT_NEAR(b0, b1, 1); + ++rh[r1 - r0 + 128]; + ++gh[g1 - g0 + 128]; + ++bh[b1 - b0 + 128]; + } + } + } + PrintHistogram(rh, gh, bh); +} + +TEST_F(LibYUVColorTest, TestGreyYUVJ) { + int r0, g0, b0, r1, g1, b1, r2, g2, b2; + + // black + YUVJToRGBReference(0, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(0, r0); + EXPECT_EQ(0, g0); + EXPECT_EQ(0, b0); + + YUVJToRGB(0, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(0, r1); + EXPECT_EQ(0, g1); + EXPECT_EQ(0, b1); + + // white + YUVJToRGBReference(255, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(255, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(255, b0); + + YUVJToRGB(255, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(255, r1); + EXPECT_EQ(255, g1); + EXPECT_EQ(255, b1); + + // grey + YUVJToRGBReference(128, 128, 128, &r0, &g0, &b0); + EXPECT_EQ(128, r0); + EXPECT_EQ(128, g0); + EXPECT_EQ(128, b0); + + YUVJToRGB(128, 128, 128, &r1, &g1, &b1); + EXPECT_EQ(128, r1); + EXPECT_EQ(128, g1); + EXPECT_EQ(128, b1); + + for (int y = 0; y < 256; ++y) { + YUVJToRGBReference(y, 128, 128, &r0, &g0, &b0); + YUVJToRGB(y, 128, 128, &r1, &g1, &b1); + YJToRGB(y, &r2, &g2, &b2); + EXPECT_EQ(r0, r1); + EXPECT_EQ(g0, g1); + EXPECT_EQ(b0, b1); + EXPECT_EQ(r0, r2); + EXPECT_EQ(g0, g2); + EXPECT_EQ(b0, b2); + } +} + +} // namespace libyuv diff --git a/files/unit_test/compare_test.cc b/files/unit_test/compare_test.cc index 8a49a612..a8ce671d 100644 --- a/files/unit_test/compare_test.cc +++ b/files/unit_test/compare_test.cc @@ -4,7 +4,7 @@ * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ @@ -16,6 +16,7 @@ #include "libyuv/basic_types.h" #include "libyuv/compare.h" #include "libyuv/cpu_id.h" +#include "libyuv/video_common.h" namespace libyuv { @@ -30,50 +31,93 @@ static uint32 ReferenceHashDjb2(const uint8* src, uint64 count, uint32 seed) { return hash; } -TEST_F(libyuvTest, TestDjb2) { - const int kMaxTest = 2049; - align_buffer_16(src_a, kMaxTest) +TEST_F(LibYUVBaseTest, Djb2_Test) { + const int kMaxTest = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_a, kMaxTest); + align_buffer_page_end(src_b, kMaxTest); - for (int i = 0; i < kMaxTest; ++i) { - src_a[i] = i; - } - for (int i = 0; i < kMaxTest; ++i) { - uint32 h1 = HashDjb2(src_a, kMaxTest, 5381); - uint32 h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381); - EXPECT_EQ(h1, h2); - } - // Hash constant generator using for tables in compare - int h = 1; - for (int i = 0; i <= 16 ; ++i) { - printf("%08x ", h); - h *= 33; - } - printf("\n"); - - free_aligned_buffer_16(src_a) -} - -TEST_F(libyuvTest, BenchmakDjb2_C) { - const int kMaxTest = 1280 * 720; - align_buffer_16(src_a, kMaxTest) + const char* fox = "The quick brown fox jumps over the lazy dog" + " and feels as if he were in the seventh heaven of typography" + " together with Hermann Zapf"; + uint32 foxhash = HashDjb2(reinterpret_cast<const uint8*>(fox), 131, 5381); + const uint32 kExpectedFoxHash = 2611006483u; + EXPECT_EQ(kExpectedFoxHash, foxhash); for (int i = 0; i < kMaxTest; ++i) { - src_a[i] = i; - } - uint32 h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381); - uint32 h1; - MaskCpuFlags(kCpuInitialized); - for (int i = 0; i < benchmark_iterations_; ++i) { - h1 = HashDjb2(src_a, kMaxTest, 5381); + src_a[i] = (fastrand() & 0xff); + src_b[i] = (fastrand() & 0xff); } - MaskCpuFlags(-1); + // Compare different buffers. Expect hash is different. + uint32 h1 = HashDjb2(src_a, kMaxTest, 5381); + uint32 h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make last half same. Expect hash is different. + memcpy(src_a + kMaxTest / 2, src_b + kMaxTest / 2, kMaxTest / 2); + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make first half same. Expect hash is different. + memcpy(src_a + kMaxTest / 2, src_a, kMaxTest / 2); + memcpy(src_b + kMaxTest / 2, src_b, kMaxTest / 2); + memcpy(src_a, src_b, kMaxTest / 2); + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make same. Expect hash is same. + memcpy(src_a, src_b, kMaxTest); + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_EQ(h1, h2); + + // Mask seed different. Expect hash is different. + memcpy(src_a, src_b, kMaxTest); + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 1234); + EXPECT_NE(h1, h2); + + // Make one byte different in middle. Expect hash is different. + memcpy(src_a, src_b, kMaxTest); + ++src_b[kMaxTest / 2]; + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make first byte different. Expect hash is different. + memcpy(src_a, src_b, kMaxTest); + ++src_b[0]; + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make last byte different. Expect hash is different. + memcpy(src_a, src_b, kMaxTest); + ++src_b[kMaxTest - 1]; + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_b, kMaxTest, 5381); + EXPECT_NE(h1, h2); + + // Make a zeros. Test different lengths. Expect hash is different. + memset(src_a, 0, kMaxTest); + h1 = HashDjb2(src_a, kMaxTest, 5381); + h2 = HashDjb2(src_a, kMaxTest / 2, 5381); + EXPECT_NE(h1, h2); + + // Make a zeros and seed of zero. Test different lengths. Expect hash is same. + memset(src_a, 0, kMaxTest); + h1 = HashDjb2(src_a, kMaxTest, 0); + h2 = HashDjb2(src_a, kMaxTest / 2, 0); EXPECT_EQ(h1, h2); - free_aligned_buffer_16(src_a) + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); } -TEST_F(libyuvTest, BenchmakDjb2_OPT) { - const int kMaxTest = 1280 * 720; - align_buffer_16(src_a, kMaxTest) +TEST_F(LibYUVBaseTest, BenchmarkDjb2_Opt) { + const int kMaxTest = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_a, kMaxTest); for (int i = 0; i < kMaxTest; ++i) { src_a[i] = i; @@ -84,13 +128,12 @@ TEST_F(libyuvTest, BenchmakDjb2_OPT) { h1 = HashDjb2(src_a, kMaxTest, 5381); } EXPECT_EQ(h1, h2); - free_aligned_buffer_16(src_a) + free_aligned_buffer_page_end(src_a); } -TEST_F(libyuvTest, BenchmakDjb2_Unaligned_OPT) { - const int kMaxTest = 1280 * 720; - align_buffer_16(src_a, kMaxTest + 1) - +TEST_F(LibYUVBaseTest, BenchmarkDjb2_Unaligned) { + const int kMaxTest = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_a, kMaxTest + 1); for (int i = 0; i < kMaxTest; ++i) { src_a[i + 1] = i; } @@ -100,64 +143,106 @@ TEST_F(libyuvTest, BenchmakDjb2_Unaligned_OPT) { h1 = HashDjb2(src_a + 1, kMaxTest, 5381); } EXPECT_EQ(h1, h2); - free_aligned_buffer_16(src_a) + free_aligned_buffer_page_end(src_a); } -TEST_F(libyuvTest, BenchmarkSumSquareError_C) { - const int kMaxWidth = 4096 * 3; - align_buffer_16(src_a, kMaxWidth) - align_buffer_16(src_b, kMaxWidth) - - for (int i = 0; i < kMaxWidth; ++i) { - src_a[i] = i; - src_b[i] = i; +TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Opt) { + uint32 fourcc; + const int kMaxTest = benchmark_width_ * benchmark_height_ * 4; + align_buffer_page_end(src_a, kMaxTest); + for (int i = 0; i < kMaxTest; ++i) { + src_a[i] = 255; } - MaskCpuFlags(kCpuInitialized); + src_a[0] = 0; + fourcc = ARGBDetect(src_a, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + EXPECT_EQ(libyuv::FOURCC_BGRA, fourcc); + src_a[0] = 255; + src_a[3] = 0; + fourcc = ARGBDetect(src_a, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + EXPECT_EQ(libyuv::FOURCC_ARGB, fourcc); + src_a[3] = 255; + for (int i = 0; i < benchmark_iterations_; ++i) { - ComputeSumSquareError(src_a, src_b, kMaxWidth); + fourcc = ARGBDetect(src_a, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); } + EXPECT_EQ(0, fourcc); - MaskCpuFlags(-1); + free_aligned_buffer_page_end(src_a); +} - EXPECT_EQ(0, 0); +TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Unaligned) { + uint32 fourcc; + const int kMaxTest = benchmark_width_ * benchmark_height_ * 4 + 1; + align_buffer_page_end(src_a, kMaxTest); + for (int i = 1; i < kMaxTest; ++i) { + src_a[i] = 255; + } - free_aligned_buffer_16(src_a) - free_aligned_buffer_16(src_b) -} + src_a[0 + 1] = 0; + fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + EXPECT_EQ(libyuv::FOURCC_BGRA, fourcc); + src_a[0 + 1] = 255; + src_a[3 + 1] = 0; + fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + EXPECT_EQ(libyuv::FOURCC_ARGB, fourcc); + src_a[3 + 1] = 255; + + for (int i = 0; i < benchmark_iterations_; ++i) { + fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + } + EXPECT_EQ(0, fourcc); -TEST_F(libyuvTest, BenchmarkSumSquareError_OPT) { + free_aligned_buffer_page_end(src_a); +} +TEST_F(LibYUVBaseTest, BenchmarkSumSquareError_Opt) { const int kMaxWidth = 4096 * 3; - align_buffer_16(src_a, kMaxWidth) - align_buffer_16(src_b, kMaxWidth) + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); + memset(src_a, 0, kMaxWidth); + memset(src_b, 0, kMaxWidth); + + memcpy(src_a, "test0123test4567", 16); + memcpy(src_b, "tick0123tock4567", 16); + uint64 h1 = ComputeSumSquareError(src_a, src_b, 16); + EXPECT_EQ(790u, h1); for (int i = 0; i < kMaxWidth; ++i) { src_a[i] = i; src_b[i] = i; } + memset(src_a, 0, kMaxWidth); + memset(src_b, 0, kMaxWidth); - for (int i = 0; i < benchmark_iterations_; ++i) { - ComputeSumSquareError(src_a, src_b, kMaxWidth); + int count = benchmark_iterations_ * + ((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth); + for (int i = 0; i < count; ++i) { + h1 = ComputeSumSquareError(src_a, src_b, kMaxWidth); } - EXPECT_EQ(0, 0); + EXPECT_EQ(0, h1); - free_aligned_buffer_16(src_a) - free_aligned_buffer_16(src_b) + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); } -TEST_F(libyuvTest, SumSquareError) { +TEST_F(LibYUVBaseTest, SumSquareError) { const int kMaxWidth = 4096 * 3; - align_buffer_16(src_a, kMaxWidth) - align_buffer_16(src_b, kMaxWidth) - + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); memset(src_a, 0, kMaxWidth); memset(src_b, 0, kMaxWidth); uint64 err; err = ComputeSumSquareError(src_a, src_b, kMaxWidth); - EXPECT_EQ(err, 0); + EXPECT_EQ(0, err); memset(src_a, 1, kMaxWidth); err = ComputeSumSquareError(src_a, src_b, kMaxWidth); @@ -168,90 +253,83 @@ TEST_F(libyuvTest, SumSquareError) { memset(src_b, 193, kMaxWidth); err = ComputeSumSquareError(src_a, src_b, kMaxWidth); - EXPECT_EQ(err, (kMaxWidth * 3 * 3)); - - srandom(time(NULL)); + EXPECT_EQ(kMaxWidth * 3 * 3, err); for (int i = 0; i < kMaxWidth; ++i) { - src_a[i] = (random() & 0xff); - src_b[i] = (random() & 0xff); + src_a[i] = (fastrand() & 0xff); + src_b[i] = (fastrand() & 0xff); } - MaskCpuFlags(kCpuInitialized); + MaskCpuFlags(disable_cpu_flags_); uint64 c_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); - MaskCpuFlags(-1); + MaskCpuFlags(benchmark_cpu_info_); uint64 opt_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); EXPECT_EQ(c_err, opt_err); - free_aligned_buffer_16(src_a) - free_aligned_buffer_16(src_b) + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); } -TEST_F(libyuvTest, BenchmarkPsnr_C) { - align_buffer_16(src_a, benchmark_width_ * benchmark_height_) - align_buffer_16(src_b, benchmark_width_ * benchmark_height_) - +TEST_F(LibYUVBaseTest, BenchmarkPsnr_Opt) { + align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { src_a[i] = i; src_b[i] = i; } - MaskCpuFlags(kCpuInitialized); + MaskCpuFlags(benchmark_cpu_info_); - double c_time = get_time(); + double opt_time = get_time(); for (int i = 0; i < benchmark_iterations_; ++i) CalcFramePsnr(src_a, benchmark_width_, src_b, benchmark_width_, benchmark_width_, benchmark_height_); - c_time = (get_time() - c_time) / benchmark_iterations_; - printf("BenchmarkPsnr_C - %8.2f us c\n", c_time * 1e6); - - MaskCpuFlags(-1); + opt_time = (get_time() - opt_time) / benchmark_iterations_; + printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6); EXPECT_EQ(0, 0); - free_aligned_buffer_16(src_a) - free_aligned_buffer_16(src_b) + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); } -TEST_F(libyuvTest, BenchmarkPsnr_OPT) { - align_buffer_16(src_a, benchmark_width_ * benchmark_height_) - align_buffer_16(src_b, benchmark_width_ * benchmark_height_) - +TEST_F(LibYUVBaseTest, BenchmarkPsnr_Unaligned) { + align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_ + 1); + align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { - src_a[i] = i; + src_a[i + 1] = i; src_b[i] = i; } - MaskCpuFlags(-1); + MaskCpuFlags(benchmark_cpu_info_); double opt_time = get_time(); for (int i = 0; i < benchmark_iterations_; ++i) - CalcFramePsnr(src_a, benchmark_width_, + CalcFramePsnr(src_a + 1, benchmark_width_, src_b, benchmark_width_, benchmark_width_, benchmark_height_); opt_time = (get_time() - opt_time) / benchmark_iterations_; - printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6); + printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6); EXPECT_EQ(0, 0); - free_aligned_buffer_16(src_a) - free_aligned_buffer_16(src_b) + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); } -TEST_F(libyuvTest, Psnr) { - const int kSrcWidth = 1280; - const int kSrcHeight = 720; +TEST_F(LibYUVBaseTest, Psnr) { + const int kSrcWidth = benchmark_width_; + const int kSrcHeight = benchmark_height_; const int b = 128; const int kSrcPlaneSize = (kSrcWidth + b * 2) * (kSrcHeight + b * 2); const int kSrcStride = 2 * b + kSrcWidth; - align_buffer_16(src_a, kSrcPlaneSize) - align_buffer_16(src_b, kSrcPlaneSize) - + align_buffer_page_end(src_a, kSrcPlaneSize); + align_buffer_page_end(src_b, kSrcPlaneSize); memset(src_a, 0, kSrcPlaneSize); memset(src_b, 0, kSrcPlaneSize); @@ -279,36 +357,37 @@ TEST_F(libyuvTest, Psnr) { EXPECT_GT(err, 48.0); EXPECT_LT(err, 49.0); - for (int i = 0; i < kSrcPlaneSize; ++i) + for (int i = 0; i < kSrcPlaneSize; ++i) { src_a[i] = i; + } err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride, src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - EXPECT_GT(err, 4.0); - EXPECT_LT(err, 5.0); - - srandom(time(NULL)); + EXPECT_GT(err, 2.0); + if (kSrcWidth * kSrcHeight >= 256) { + EXPECT_LT(err, 6.0); + } memset(src_a, 0, kSrcPlaneSize); memset(src_b, 0, kSrcPlaneSize); for (int i = b; i < (kSrcHeight + b); ++i) { for (int j = b; j < (kSrcWidth + b); ++j) { - src_a[(i * kSrcStride) + j] = (random() & 0xff); - src_b[(i * kSrcStride) + j] = (random() & 0xff); + src_a[(i * kSrcStride) + j] = (fastrand() & 0xff); + src_b[(i * kSrcStride) + j] = (fastrand() & 0xff); } } - MaskCpuFlags(kCpuInitialized); + MaskCpuFlags(disable_cpu_flags_); double c_err, opt_err; c_err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride, src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - MaskCpuFlags(-1); + MaskCpuFlags(benchmark_cpu_info_); opt_err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride, src_b + kSrcStride * b + b, kSrcStride, @@ -316,48 +395,19 @@ TEST_F(libyuvTest, Psnr) { EXPECT_EQ(opt_err, c_err); - free_aligned_buffer_16(src_a) - free_aligned_buffer_16(src_b) -} - -TEST_F(libyuvTest, BenchmarkSsim_C) { - align_buffer_16(src_a, benchmark_width_ * benchmark_height_) - align_buffer_16(src_b, benchmark_width_ * benchmark_height_) - - for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { - src_a[i] = i; - src_b[i] = i; - } - - MaskCpuFlags(kCpuInitialized); - - double c_time = get_time(); - for (int i = 0; i < benchmark_iterations_; ++i) - CalcFrameSsim(src_a, benchmark_width_, - src_b, benchmark_width_, - benchmark_width_, benchmark_height_); - - c_time = (get_time() - c_time) / benchmark_iterations_; - printf("BenchmarkSsim_C - %8.2f us c\n", c_time * 1e6); - - MaskCpuFlags(-1); - - EXPECT_EQ(0, 0); - - free_aligned_buffer_16(src_a) - free_aligned_buffer_16(src_b) + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); } -TEST_F(libyuvTest, BenchmarkSsim_OPT) { - align_buffer_16(src_a, benchmark_width_ * benchmark_height_) - align_buffer_16(src_b, benchmark_width_ * benchmark_height_) - +TEST_F(LibYUVBaseTest, DISABLED_BenchmarkSsim_Opt) { + align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { src_a[i] = i; src_b[i] = i; } - MaskCpuFlags(-1); + MaskCpuFlags(benchmark_cpu_info_); double opt_time = get_time(); for (int i = 0; i < benchmark_iterations_; ++i) @@ -366,32 +416,37 @@ TEST_F(libyuvTest, BenchmarkSsim_OPT) { benchmark_width_, benchmark_height_); opt_time = (get_time() - opt_time) / benchmark_iterations_; - printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6); + printf("BenchmarkSsim_Opt - %8.2f us opt\n", opt_time * 1e6); - EXPECT_EQ(0, 0); + EXPECT_EQ(0, 0); // Pass if we get this far. - free_aligned_buffer_16(src_a) - free_aligned_buffer_16(src_b) + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); } -TEST_F(libyuvTest, Ssim) { - const int kSrcWidth = 1280; - const int kSrcHeight = 720; +TEST_F(LibYUVBaseTest, Ssim) { + const int kSrcWidth = benchmark_width_; + const int kSrcHeight = benchmark_height_; const int b = 128; const int kSrcPlaneSize = (kSrcWidth + b * 2) * (kSrcHeight + b * 2); const int kSrcStride = 2 * b + kSrcWidth; - align_buffer_16(src_a, kSrcPlaneSize) - align_buffer_16(src_b, kSrcPlaneSize) - + align_buffer_page_end(src_a, kSrcPlaneSize); + align_buffer_page_end(src_b, kSrcPlaneSize); memset(src_a, 0, kSrcPlaneSize); memset(src_b, 0, kSrcPlaneSize); + if (kSrcWidth <=8 || kSrcHeight <= 8) { + printf("warning - Ssim size too small. Testing function executes.\n"); + } + double err; err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride, src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - EXPECT_EQ(err, 1.0); + if (kSrcWidth > 8 && kSrcHeight > 8) { + EXPECT_EQ(err, 1.0); + } memset(src_a, 255, kSrcPlaneSize); @@ -399,7 +454,9 @@ TEST_F(libyuvTest, Ssim) { src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - EXPECT_LT(err, 0.0001); + if (kSrcWidth > 8 && kSrcHeight > 8) { + EXPECT_LT(err, 0.0001); + } memset(src_a, 1, kSrcPlaneSize); @@ -407,44 +464,50 @@ TEST_F(libyuvTest, Ssim) { src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - EXPECT_GT(err, 0.8); - EXPECT_LT(err, 0.9); + if (kSrcWidth > 8 && kSrcHeight > 8) { + EXPECT_GT(err, 0.0001); + EXPECT_LT(err, 0.9); + } - for (int i = 0; i < kSrcPlaneSize; ++i) + for (int i = 0; i < kSrcPlaneSize; ++i) { src_a[i] = i; + } err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride, src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - EXPECT_GT(err, 0.008); - EXPECT_LT(err, 0.009); + if (kSrcWidth > 8 && kSrcHeight > 8) { + EXPECT_GT(err, 0.0); + EXPECT_LT(err, 0.01); + } - srandom(time(NULL)); for (int i = b; i < (kSrcHeight + b); ++i) { for (int j = b; j < (kSrcWidth + b); ++j) { - src_a[(i * kSrcStride) + j] = (random() & 0xff); - src_b[(i * kSrcStride) + j] = (random() & 0xff); + src_a[(i * kSrcStride) + j] = (fastrand() & 0xff); + src_b[(i * kSrcStride) + j] = (fastrand() & 0xff); } } - MaskCpuFlags(kCpuInitialized); + MaskCpuFlags(disable_cpu_flags_); double c_err, opt_err; c_err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride, src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - MaskCpuFlags(-1); + MaskCpuFlags(benchmark_cpu_info_); opt_err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride, src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - EXPECT_EQ(opt_err, c_err); + if (kSrcWidth > 8 && kSrcHeight > 8) { + EXPECT_EQ(opt_err, c_err); + } - free_aligned_buffer_16(src_a) - free_aligned_buffer_16(src_b) + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); } } // namespace libyuv diff --git a/files/unit_test/convert_test.cc b/files/unit_test/convert_test.cc new file mode 100644 index 00000000..56a2bfd8 --- /dev/null +++ b/files/unit_test/convert_test.cc @@ -0,0 +1,1861 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <time.h> + +#include "libyuv/basic_types.h" +#include "libyuv/compare.h" +#include "libyuv/convert.h" +#include "libyuv/convert_argb.h" +#include "libyuv/convert_from.h" +#include "libyuv/convert_from_argb.h" +#include "libyuv/cpu_id.h" +#ifdef HAVE_JPEG +#include "libyuv/mjpeg_decoder.h" +#endif +#include "libyuv/planar_functions.h" +#include "libyuv/rotate.h" +#include "libyuv/video_common.h" +#include "../unit_test/unit_test.h" + +namespace libyuv { + +#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) + +#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ +TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_u, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ + align_buffer_page_end(src_v, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ + align_buffer_page_end(dst_y_c, kWidth * kHeight); \ + align_buffer_page_end(dst_u_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth * kHeight); \ + align_buffer_page_end(dst_u_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ + (fastrand() & 0xff); \ + src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ + (fastrand() & 0xff); \ + } \ + } \ + memset(dst_y_c, 1, kWidth * kHeight); \ + memset(dst_u_c, 2, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_c, 3, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth * kHeight); \ + memset(dst_u_opt, 102, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_opt, 103, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_u + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_c, kWidth, \ + dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_u + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_opt, kWidth, \ + dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = \ + abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ + static_cast<int>(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_EQ(0, max_diff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast<int>(dst_u_c[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ + static_cast<int>(dst_u_opt[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 3); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast<int>(dst_v_c[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ + static_cast<int>(dst_v_opt[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 3); \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ +} + +#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTPLANARTOP(I420, 2, 2, I420, 2, 2) +TESTPLANARTOP(I422, 2, 1, I420, 2, 2) +TESTPLANARTOP(I444, 1, 1, I420, 2, 2) +TESTPLANARTOP(I411, 4, 1, I420, 2, 2) +TESTPLANARTOP(I420, 2, 2, I422, 2, 1) +TESTPLANARTOP(I420, 2, 2, I444, 1, 1) +TESTPLANARTOP(I420, 2, 2, I411, 4, 1) +TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2) +TESTPLANARTOP(I422, 2, 1, I422, 2, 1) +TESTPLANARTOP(I444, 1, 1, I444, 1, 1) + +#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ +TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_u, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ + align_buffer_page_end(src_v, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ + align_buffer_page_end(dst_y_c, kWidth * kHeight); \ + align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth * kHeight); \ + align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ + (fastrand() & 0xff); \ + src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ + (fastrand() & 0xff); \ + } \ + } \ + memset(dst_y_c, 1, kWidth * kHeight); \ + memset(dst_uv_c, 2, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth * kHeight); \ + memset(dst_uv_opt, 102, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_u + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_c, kWidth, \ + dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_u + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_opt, kWidth, \ + dst_uv_opt, \ + SUBSAMPLE(kWidth * 2, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = \ + abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ + static_cast<int>(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast<int>(dst_uv_c[i * \ + SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \ + static_cast<int>(dst_uv_opt[i * \ + SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ +} + +#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTPLANARTOBP(I420, 2, 2, NV12, 2, 2) +TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2) + +#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ +TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ + align_buffer_page_end(dst_y_c, kWidth * kHeight); \ + align_buffer_page_end(dst_u_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth * kHeight); \ + align_buffer_page_end(dst_u_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_uv[(i * 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ + (fastrand() & 0xff); \ + } \ + } \ + memset(dst_y_c, 1, kWidth * kHeight); \ + memset(dst_u_c, 2, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_c, 3, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth * kHeight); \ + memset(dst_u_opt, 102, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_opt, 103, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_uv + OFF, \ + 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_c, kWidth, \ + dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_uv + OFF, \ + 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_opt, kWidth, \ + dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = \ + abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ + static_cast<int>(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast<int>(dst_u_c[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ + static_cast<int>(dst_u_opt[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast<int>(dst_v_c[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ + static_cast<int>(dst_v_opt[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ +} + +#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTBIPLANARTOP(NV12, 2, 2, I420, 2, 2) +TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2) + +#define ALIGNINT(V, ALIGN) (((V) + (ALIGN) - 1) / (ALIGN) * (ALIGN)) + +#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C) \ +TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, \ + dst_argb_c + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, \ + dst_argb_opt + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ + align_buffer_page_end(dst_argb32_c, kWidth * BPP_C * kHeight); \ + align_buffer_page_end(dst_argb32_opt, kWidth * BPP_C * kHeight); \ + memset(dst_argb32_c, 2, kWidth * BPP_C * kHeight); \ + memset(dst_argb32_opt, 102, kWidth * BPP_C * kHeight); \ + FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, \ + dst_argb32_c, kWidth * BPP_C , \ + kWidth, kHeight); \ + FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, \ + dst_argb32_opt, kWidth * BPP_C , \ + kWidth, kHeight); \ + for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ + int abs_diff = \ + abs(static_cast<int>(dst_argb32_c[i]) - \ + static_cast<int>(dst_argb32_opt[i])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + free_aligned_buffer_page_end(dst_argb32_c); \ + free_aligned_buffer_page_end(dst_argb32_opt); \ +} + +#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, DIFF, FMT_C, BPP_C) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, BPP_C) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C) + +TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(J420, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(J420, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1, 9, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1, 17, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I411, 4, 1, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1, 1, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1, 1, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1, 0, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1, 0, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4) +TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1, 0, ARGB, 4) + +#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, DIFF, N, NEG, OFF, ATTEN) \ +TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(src_a, kWidth * kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + src_a[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, \ + src_a + OFF, kWidth, \ + dst_argb_c + OFF, kStrideB, \ + kWidth, NEG kHeight, ATTEN); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, \ + src_a + OFF, kWidth, \ + dst_argb_opt + OFF, kStrideB, \ + kWidth, NEG kHeight, ATTEN); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + int abs_diff = \ + abs(static_cast<int>(dst_argb_c[i + OFF]) - \ + static_cast<int>(dst_argb_opt[i + OFF])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(src_a); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ +} + +#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, DIFF) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Invert, -, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Opt, +, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Premult, +, 0, 1) + +TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2) +TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2) + +#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + W1280, DIFF, N, NEG, OFF) \ +TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = kWidth * BPP_B; \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_uv, \ + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB * kHeight); \ + align_buffer_page_end(dst_argb_opt, kStrideB * kHeight); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < kStrideUV * 2; ++j) { \ + src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \ + } \ + } \ + memset(dst_argb_c, 1, kStrideB * kHeight); \ + memset(dst_argb_opt, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_uv + OFF, kStrideUV * 2, \ + dst_argb_c, kWidth * BPP_B, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_uv + OFF, kStrideUV * 2, \ + dst_argb_opt, kWidth * BPP_B, \ + kWidth, NEG kHeight); \ + } \ + /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ + align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight); \ + align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \ + memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \ + memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \ + FMT_B##ToARGB(dst_argb_c, kStrideB, \ + dst_argb32_c, kWidth * 4, \ + kWidth, kHeight); \ + FMT_B##ToARGB(dst_argb_opt, kStrideB, \ + dst_argb32_opt, kWidth * 4, \ + kWidth, kHeight); \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth * 4; ++j) { \ + int abs_diff = \ + abs(static_cast<int>(dst_argb32_c[i * kWidth * 4 + j]) - \ + static_cast<int>(dst_argb32_opt[i * kWidth * 4 + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + free_aligned_buffer_page_end(dst_argb32_c); \ + free_aligned_buffer_page_end(dst_argb32_opt); \ +} + +#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_ - 4, DIFF, _Any, +, 0) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, DIFF, _Unaligned, +, 1) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, DIFF, _Invert, -, 0) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, DIFF, _Opt, +, 0) + +TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2) +TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2) +TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) + +#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, DIFF, N, NEG, OFF) \ +TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kStride = \ + (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ + align_buffer_page_end(src_argb, kStride * kHeight + OFF); \ + align_buffer_page_end(dst_y_c, kWidth * kHeight); \ + align_buffer_page_end(dst_u_c, \ + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_c, \ + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth * kHeight); \ + align_buffer_page_end(dst_u_opt, \ + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_opt, \ + kStrideUV * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_c, 1, kWidth * kHeight); \ + memset(dst_u_c, 2, \ + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_c, 3, \ + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth * kHeight); \ + memset(dst_u_opt, 102, \ + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_opt, 103, \ + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ + dst_y_c, kWidth, \ + dst_u_c, kStrideUV, \ + dst_v_c, kStrideUV, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ + dst_y_opt, kWidth, \ + dst_u_opt, kStrideUV, \ + dst_v_opt, kStrideUV, \ + kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_NEAR(static_cast<int>(dst_y_c[i * kWidth + j]), \ + static_cast<int>(dst_y_opt[i * kWidth + j]), DIFF); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < kStrideUV; ++j) { \ + EXPECT_NEAR(static_cast<int>(dst_u_c[i * kStrideUV + j]), \ + static_cast<int>(dst_u_opt[i * kStrideUV + j]), DIFF); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < kStrideUV; ++j) { \ + EXPECT_NEAR(static_cast<int>(dst_v_c[i * \ + kStrideUV + j]), \ + static_cast<int>(dst_v_opt[i * \ + kStrideUV + j]), DIFF); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_argb); \ +} + +#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + DIFF) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ - 4, DIFF, _Any, +, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, DIFF, _Unaligned, +, 1) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, DIFF, _Invert, -, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, DIFF, _Opt, +, 0) + +TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4) +#if defined(__arm__) || defined (__aarch64__) +// arm version subsamples by summing 4 pixels then multiplying by matrix with +// 4x smaller coefficients which are rounded to nearest integer. +TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 4) +TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, 4) +#else +TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0) +TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, 0) +#endif +TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4) +TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4) +TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) +TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4) +TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4) +TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) +// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9. +TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15) +TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) +TESTATOPLANAR(ARGB, 4, 1, I411, 4, 1, 4) +TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2) +TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2) +TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2) +TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2) +TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2) +TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2) +TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) +TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2) + +#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ +TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + align_buffer_page_end(src_argb, kStride * kHeight + OFF); \ + align_buffer_page_end(dst_y_c, kWidth * kHeight); \ + align_buffer_page_end(dst_uv_c, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth * kHeight); \ + align_buffer_page_end(dst_uv_opt, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + memset(dst_y_c, 1, kWidth * kHeight); \ + memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth * kHeight); \ + memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ + dst_y_c, kWidth, dst_uv_c, kStrideUV * 2, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ + dst_y_opt, kWidth, \ + dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = \ + abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ + static_cast<int>(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 4); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < kStrideUV * 2; ++j) { \ + int abs_diff = \ + abs(static_cast<int>(dst_uv_c[i * kStrideUV * 2 + j]) - \ + static_cast<int>(dst_uv_opt[i * kStrideUV * 2 + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 4); \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_argb); \ +} + +#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2) +TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2) +TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2) +TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) + +#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, \ + W1280, DIFF, N, NEG, OFF) \ +TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, kStrideA * kHeightA + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 1, kStrideB * kHeightB); \ + memset(dst_argb_opt, 101, kStrideB * kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B(src_argb + OFF, kStrideA, \ + dst_argb_c, kStrideB, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B(src_argb + OFF, kStrideA, \ + dst_argb_opt, kStrideB, \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kStrideB * kHeightB; ++i) { \ + int abs_diff = \ + abs(static_cast<int>(dst_argb_c[i]) - \ + static_cast<int>(dst_argb_opt[i])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ +} + +#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \ +TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \ + for (int times = 0; times < benchmark_iterations_; ++times) { \ + const int kWidth = (fastrand() & 63) + 1; \ + const int kHeight = (fastrand() & 31) + 1; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;\ + const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;\ + align_buffer_page_end(src_argb, kStrideA * kHeightA); \ + align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ + src_argb[i] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 123, kStrideB * kHeightB); \ + memset(dst_argb_opt, 123, kStrideB * kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B(src_argb, kStrideA, \ + dst_argb_c, kStrideB, \ + kWidth, kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + FMT_A##To##FMT_B(src_argb, kStrideA, \ + dst_argb_opt, kStrideB, \ + kWidth, kHeight); \ + int max_diff = 0; \ + for (int i = 0; i < kStrideB * kHeightB; ++i) { \ + int abs_diff = \ + abs(static_cast<int>(dst_argb_c[i]) - \ + static_cast<int>(dst_argb_opt[i])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } \ +} + +#define TESTATOB(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, \ + benchmark_width_ - 4, DIFF, _Any, +, 0) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, \ + benchmark_width_, DIFF, _Unaligned, +, 1) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, \ + benchmark_width_, DIFF, _Invert, -, 0) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, \ + benchmark_width_, DIFF, _Opt, +, 0) \ + TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) + +TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) +TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4) +TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2) +TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2) +TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0) +TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, 4) +TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, 4) +TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0) +TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0) +TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0) + +#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, \ + W1280, DIFF, N, NEG, OFF) \ +TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, kStrideA * kHeightA + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 1, kStrideB * kHeightB); \ + memset(dst_argb_opt, 101, kStrideB * kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, \ + dst_argb_c, kStrideB, \ + NULL, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, \ + dst_argb_opt, kStrideB, \ + NULL, kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kStrideB * kHeightB; ++i) { \ + int abs_diff = \ + abs(static_cast<int>(dst_argb_c[i]) - \ + static_cast<int>(dst_argb_opt[i])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ +} + +#define TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \ +TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither_Random) { \ + for (int times = 0; times < benchmark_iterations_; ++times) { \ + const int kWidth = (fastrand() & 63) + 1; \ + const int kHeight = (fastrand() & 31) + 1; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;\ + const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;\ + align_buffer_page_end(src_argb, kStrideA * kHeightA); \ + align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ + src_argb[i] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 123, kStrideB * kHeightB); \ + memset(dst_argb_opt, 123, kStrideB * kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B##Dither(src_argb, kStrideA, \ + dst_argb_c, kStrideB, \ + NULL, kWidth, kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + FMT_A##To##FMT_B##Dither(src_argb, kStrideA, \ + dst_argb_opt, kStrideB, \ + NULL, kWidth, kHeight); \ + int max_diff = 0; \ + for (int i = 0; i < kStrideB * kHeightB; ++i) { \ + int abs_diff = \ + abs(static_cast<int>(dst_argb_c[i]) - \ + static_cast<int>(dst_argb_opt[i])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } \ +} + +#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, \ + benchmark_width_ - 4, DIFF, _Any, +, 0) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, \ + benchmark_width_, DIFF, _Unaligned, +, 1) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, \ + benchmark_width_, DIFF, _Invert, -, 0) \ + TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, \ + benchmark_width_, DIFF, _Opt, +, 0) \ + TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) + +TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) + +#define TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, \ + W1280, N, NEG, OFF) \ +TEST_F(LibYUVConvertTest, FMT_ATOB##_Symetric##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + align_buffer_page_end(src_argb, kStrideA * kHeightA + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideA * kHeightA); \ + align_buffer_page_end(dst_argb_opt, kStrideA * kHeightA); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c, 1, kStrideA * kHeightA); \ + memset(dst_argb_opt, 101, kStrideA * kHeightA); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_ATOB(src_argb + OFF, kStrideA, \ + dst_argb_c, kStrideA, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_ATOB(src_argb + OFF, kStrideA, \ + dst_argb_opt, kStrideA, \ + kWidth, NEG kHeight); \ + } \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_ATOB(dst_argb_c, kStrideA, \ + dst_argb_c, kStrideA, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + FMT_ATOB(dst_argb_opt, kStrideA, \ + dst_argb_opt, kStrideA, \ + kWidth, NEG kHeight); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ + EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ +} + +#define TESTSYM(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A) \ + TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, \ + benchmark_width_, _Opt, +, 0) + +TESTSYM(ARGBToARGB, 4, 4, 1) +TESTSYM(ARGBToBGRA, 4, 4, 1) +TESTSYM(ARGBToABGR, 4, 4, 1) +TESTSYM(BGRAToARGB, 4, 4, 1) +TESTSYM(ABGRToARGB, 4, 4, 1) + +TEST_F(LibYUVConvertTest, Test565) { + SIMD_ALIGNED(uint8 orig_pixels[256][4]); + SIMD_ALIGNED(uint8 pixels565[256][2]); + + for (int i = 0; i < 256; ++i) { + for (int j = 0; j < 4; ++j) { + orig_pixels[i][j] = i; + } + } + ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1); + uint32 checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); + EXPECT_EQ(610919429u, checksum); +} + +#ifdef HAVE_JPEG +TEST_F(LibYUVConvertTest, ValidateJpeg) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ? + benchmark_width_ * benchmark_height_ : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_page_end(orig_pixels, kSize); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // Test special value that matches marker start. + memset(orig_pixels, 0xff, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // EOI, SOI. Expect pass. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize)); + } + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, ValidateJpegLarge) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ? + benchmark_width_ * benchmark_height_ : kMinJpeg; + const int kSize = kImageSize + kOff; + const int kMultiple = 10; + const int kBufSize = kImageSize * kMultiple + kOff; + align_buffer_page_end(orig_pixels, kBufSize); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kBufSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kBufSize)); + + // EOI, SOI. Expect pass. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_TRUE(ValidateJpeg(orig_pixels, kBufSize)); + } + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, InvalidateJpeg) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ? + benchmark_width_ * benchmark_height_ : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_page_end(orig_pixels, kSize); + + // NULL pointer. Expect fail. + EXPECT_FALSE(ValidateJpeg(NULL, kSize)); + + // Negative size. Expect fail. + EXPECT_FALSE(ValidateJpeg(orig_pixels, -1)); + + // Too large size. Expect fail. + EXPECT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull)); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // SOI but no EOI. Expect fail. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + } + + // EOI but no SOI. Expect fail. + orig_pixels[0] = 0; + orig_pixels[1] = 0; + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, FuzzJpeg) { + // SOI but no EOI. Expect fail. + for (int times = 0; times < benchmark_iterations_; ++times) { + const int kSize = fastrand() % 5000 + 2; + align_buffer_page_end(orig_pixels, kSize); + MemRandomize(orig_pixels, kSize); + + // Add SOI so frame will be scanned. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[kSize - 1] = 0xff; + ValidateJpeg(orig_pixels, kSize); // Failure normally expected. + free_aligned_buffer_page_end(orig_pixels); + } +} + +TEST_F(LibYUVConvertTest, MJPGToI420) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ? + benchmark_width_ * benchmark_height_ : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_page_end(orig_pixels, kSize); + align_buffer_page_end(dst_y_opt, benchmark_width_ * benchmark_height_); + align_buffer_page_end(dst_u_opt, + SUBSAMPLE(benchmark_width_, 2) * + SUBSAMPLE(benchmark_height_, 2)); + align_buffer_page_end(dst_v_opt, + SUBSAMPLE(benchmark_width_, 2) * + SUBSAMPLE(benchmark_height_, 2)); + + // EOI, SOI to make MJPG appear valid. + memset(orig_pixels, 0, kSize); + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + + for (int times = 0; times < benchmark_iterations_; ++times) { + int ret = MJPGToI420(orig_pixels, kSize, + dst_y_opt, benchmark_width_, + dst_u_opt, SUBSAMPLE(benchmark_width_, 2), + dst_v_opt, SUBSAMPLE(benchmark_width_, 2), + benchmark_width_, benchmark_height_, + benchmark_width_, benchmark_height_); + // Expect failure because image is not really valid. + EXPECT_EQ(1, ret); + } + + free_aligned_buffer_page_end(dst_y_opt); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_opt); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, MJPGToARGB) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ? + benchmark_width_ * benchmark_height_ : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_page_end(orig_pixels, kSize); + align_buffer_page_end(dst_argb_opt, benchmark_width_ * benchmark_height_ * 4); + + // EOI, SOI to make MJPG appear valid. + memset(orig_pixels, 0, kSize); + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + + for (int times = 0; times < benchmark_iterations_; ++times) { + int ret = MJPGToARGB(orig_pixels, kSize, + dst_argb_opt, benchmark_width_ * 4, + benchmark_width_, benchmark_height_, + benchmark_width_, benchmark_height_); + // Expect failure because image is not really valid. + EXPECT_EQ(1, ret); + } + + free_aligned_buffer_page_end(dst_argb_opt); + free_aligned_buffer_page_end(orig_pixels); +} + +#endif // HAVE_JPEG + +TEST_F(LibYUVConvertTest, NV12Crop) { + const int SUBSAMP_X = 2; + const int SUBSAMP_Y = 2; + const int kWidth = benchmark_width_; + const int kHeight = benchmark_height_; + const int crop_y = + ((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1; + const int kDestWidth = benchmark_width_; + const int kDestHeight = benchmark_height_ - crop_y * 2; + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); + const int sample_size = kWidth * kHeight + + kStrideUV * + SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; + align_buffer_page_end(src_y, sample_size); + uint8* src_uv = src_y + kWidth * kHeight; + + align_buffer_page_end(dst_y, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u_2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v_2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + for (int i = 0; i < kHeight * kWidth; ++i) { + src_y[i] = (fastrand() & 0xff); + } + for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) * + kStrideUV) * 2; ++i) { + src_uv[i] = (fastrand() & 0xff); + } + memset(dst_y, 1, kDestWidth * kDestHeight); + memset(dst_u, 2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v, 3, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_y_2, 1, kDestWidth * kDestHeight); + memset(dst_u_2, 2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v_2, 3, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + ConvertToI420(src_y, sample_size, + dst_y_2, kDestWidth, + dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X), + dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X), + 0, crop_y, + kWidth, kHeight, + kDestWidth, kDestHeight, + libyuv::kRotate0, libyuv::FOURCC_NV12); + + NV12ToI420(src_y + crop_y * kWidth, kWidth, + src_uv + (crop_y / 2) * kStrideUV * 2, + kStrideUV * 2, + dst_y, kDestWidth, + dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X), + dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X), + kDestWidth, kDestHeight); + + for (int i = 0; i < kDestHeight; ++i) { + for (int j = 0; j < kDestWidth; ++j) { + EXPECT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], + dst_u_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], + dst_v_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); + free_aligned_buffer_page_end(dst_y_2); + free_aligned_buffer_page_end(dst_u_2); + free_aligned_buffer_page_end(dst_v_2); + free_aligned_buffer_page_end(src_y); +} + +TEST_F(LibYUVConvertTest, TestYToARGB) { + uint8 y[32]; + uint8 expectedg[32]; + for (int i = 0; i < 32; ++i) { + y[i] = i * 5 + 17; + expectedg[i] = static_cast<int>((y[i] - 16) * 1.164f + 0.5f); + } + uint8 argb[32 * 4]; + YToARGB(y, 0, argb, 0, 32, 1); + + for (int i = 0; i < 32; ++i) { + printf("%2d %d: %d <-> %d,%d,%d,%d\n", i, y[i], expectedg[i], + argb[i * 4 + 0], + argb[i * 4 + 1], + argb[i * 4 + 2], + argb[i * 4 + 3]); + } + for (int i = 0; i < 32; ++i) { + EXPECT_EQ(expectedg[i], argb[i * 4 + 0]); + } +} + +static const uint8 kNoDither4x4[16] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, +}; + +TEST_F(LibYUVConvertTest, TestNoDither) { + align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_rgb565dither, + benchmark_width_ * benchmark_height_ * 2); + MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); + ARGBToRGB565(src_argb, benchmark_width_ * 4, + dst_rgb565, benchmark_width_ * 2, + benchmark_width_, benchmark_height_); + ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, + dst_rgb565dither, benchmark_width_ * 2, + kNoDither4x4, benchmark_width_, benchmark_height_); + for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) { + EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]); + } + + free_aligned_buffer_page_end(src_argb); + free_aligned_buffer_page_end(dst_rgb565); + free_aligned_buffer_page_end(dst_rgb565dither); +} + +// Ordered 4x4 dither for 888 to 565. Values from 0 to 7. +static const uint8 kDither565_4x4[16] = { + 0, 4, 1, 5, + 6, 2, 7, 3, + 1, 5, 0, 4, + 7, 3, 6, 2, +}; + +TEST_F(LibYUVConvertTest, TestDither) { + align_buffer_page_end(src_argb, benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_rgb565dither, + benchmark_width_ * benchmark_height_ * 2); + align_buffer_page_end(dst_argb, benchmark_width_ * benchmark_height_ * 4); + align_buffer_page_end(dst_argbdither, + benchmark_width_ * benchmark_height_ * 4); + MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); + MemRandomize(dst_argb, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(dst_argbdither, benchmark_width_ * benchmark_height_ * 4); + ARGBToRGB565(src_argb, benchmark_width_ * 4, + dst_rgb565, benchmark_width_ * 2, + benchmark_width_, benchmark_height_); + ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, + dst_rgb565dither, benchmark_width_ * 2, + kDither565_4x4, benchmark_width_, benchmark_height_); + RGB565ToARGB(dst_rgb565, benchmark_width_ * 2, + dst_argb, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + RGB565ToARGB(dst_rgb565dither, benchmark_width_ * 2, + dst_argbdither, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + + for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) { + EXPECT_NEAR(dst_argb[i], dst_argbdither[i], 9); + } + free_aligned_buffer_page_end(src_argb); + free_aligned_buffer_page_end(dst_rgb565); + free_aligned_buffer_page_end(dst_rgb565dither); + free_aligned_buffer_page_end(dst_argb); + free_aligned_buffer_page_end(dst_argbdither); +} + +#define TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C) \ +TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, \ + src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, \ + dst_argb_c + OFF, kStrideB, \ + NULL, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, \ + src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, \ + dst_argb_opt + OFF, kStrideB, \ + NULL, kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ + align_buffer_page_end(dst_argb32_c, kWidth * BPP_C * kHeight); \ + align_buffer_page_end(dst_argb32_opt, kWidth * BPP_C * kHeight); \ + memset(dst_argb32_c, 2, kWidth * BPP_C * kHeight); \ + memset(dst_argb32_opt, 102, kWidth * BPP_C * kHeight); \ + FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, \ + dst_argb32_c, kWidth * BPP_C , \ + kWidth, kHeight); \ + FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, \ + dst_argb32_opt, kWidth * BPP_C , \ + kWidth, kHeight); \ + for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ + int abs_diff = \ + abs(static_cast<int>(dst_argb32_c[i]) - \ + static_cast<int>(dst_argb32_opt[i])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + free_aligned_buffer_page_end(dst_argb32_c); \ + free_aligned_buffer_page_end(dst_argb32_opt); \ +} + +#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, DIFF, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C) + +TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4) + +#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \ +TEST_F(LibYUVConvertTest, NAME) { \ + const int kWidth = benchmark_width_; \ + const int kHeight = benchmark_height_; \ + \ + align_buffer_page_end(orig_uyvy, \ + 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ + align_buffer_page_end(orig_y, kWidth * kHeight); \ + align_buffer_page_end(orig_u, \ + SUBSAMPLE(kWidth, 2) * \ + SUBSAMPLE(kHeight, 2)); \ + align_buffer_page_end(orig_v, \ + SUBSAMPLE(kWidth, 2) * \ + SUBSAMPLE(kHeight, 2)); \ + \ + align_buffer_page_end(dst_y_orig, kWidth * kHeight); \ + align_buffer_page_end(dst_uv_orig, 2 * \ + SUBSAMPLE(kWidth, 2) * \ + SUBSAMPLE(kHeight, 2)); \ + \ + align_buffer_page_end(dst_y, kWidth * kHeight); \ + align_buffer_page_end(dst_uv, 2 * \ + SUBSAMPLE(kWidth, 2) * \ + SUBSAMPLE(kHeight, 2)); \ + \ + MemRandomize(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ + \ + /* Convert UYVY to NV12 in 2 steps for reference */ \ + libyuv::UYVYTOI420(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), \ + orig_y, kWidth, \ + orig_u, SUBSAMPLE(kWidth, 2), \ + orig_v, SUBSAMPLE(kWidth, 2), \ + kWidth, kHeight); \ + libyuv::I420ToNV12(orig_y, kWidth, \ + orig_u, SUBSAMPLE(kWidth, 2), \ + orig_v, SUBSAMPLE(kWidth, 2), \ + dst_y_orig, kWidth, \ + dst_uv_orig, 2 * SUBSAMPLE(kWidth, 2), \ + kWidth, kHeight); \ + \ + /* Convert to NV12 */ \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + libyuv::UYVYTONV12(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), \ + dst_y, kWidth, \ + dst_uv, 2 * SUBSAMPLE(kWidth, 2), \ + kWidth, kHeight); \ + } \ + \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + EXPECT_EQ(orig_y[i], dst_y[i]); \ + } \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + EXPECT_EQ(dst_y_orig[i], dst_y[i]); \ + } \ + for (int i = 0; i < 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2); ++i) { \ + EXPECT_EQ(dst_uv_orig[i], dst_uv[i]); \ + } \ + \ + free_aligned_buffer_page_end(orig_uyvy); \ + free_aligned_buffer_page_end(orig_y); \ + free_aligned_buffer_page_end(orig_u); \ + free_aligned_buffer_page_end(orig_v); \ + free_aligned_buffer_page_end(dst_y_orig); \ + free_aligned_buffer_page_end(dst_uv_orig); \ + free_aligned_buffer_page_end(dst_y); \ + free_aligned_buffer_page_end(dst_uv); \ +} + +TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12) +TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12) + +#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + W1280, N, NEG, OFF, FMT_C, BPP_C) \ +TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB * kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, \ + dst_argb_b + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + } \ + /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_page_end(dst_argb_c, kStrideC * kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC * kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, \ + src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, \ + dst_argb_c + OFF, kStrideC, \ + kWidth, NEG kHeight); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, \ + dst_argb_bc + OFF, kStrideC, \ + kWidth, kHeight); \ + for (int i = 0; i < kStrideC * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_bc); \ +} + +#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Unaligned, +, 1, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) + +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTPLANARTOE(J420, 2, 2, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(J420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3) +TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3) +TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2) +TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(J422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(J422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(H422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4) +TESTPLANARTOE(I411, 4, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4) + +#define TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + W1280, N, NEG, OFF, FMT_C, BPP_C, ATTEN) \ +TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ + const int kSizeUV = \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(src_a, kWidth * kHeight + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB * kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + src_a[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_a + OFF, kWidth, \ + dst_argb_b + OFF, kStrideB, \ + kWidth, NEG kHeight, ATTEN); \ + } \ + int max_diff = 0; \ + /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_page_end(dst_argb_c, kStrideC * kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC * kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, \ + src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_a + OFF, kWidth, \ + dst_argb_c + OFF, kStrideC, \ + kWidth, NEG kHeight, ATTEN); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, \ + dst_argb_bc + OFF, kStrideC, \ + kWidth, kHeight); \ + for (int i = 0; i < kStrideC * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(src_a); \ + free_aligned_buffer_page_end(dst_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_bc); \ +} + +#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Unaligned, +, 1, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Invert, -, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ + benchmark_width_, _Premult, +, 0, FMT_C, BPP_C, 1) + +TESTQPLANARTOE(I420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) + +} // namespace libyuv diff --git a/files/unit_test/cpu_test.cc b/files/unit_test/cpu_test.cc index 52810e80..0cd06f9b 100644 --- a/files/unit_test/cpu_test.cc +++ b/files/unit_test/cpu_test.cc @@ -4,7 +4,7 @@ * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ @@ -18,8 +18,8 @@ namespace libyuv { -TEST_F(libyuvTest, TestCpuHas) { - int cpu_flags = TestCpuFlag(~kCpuInitialized); +TEST_F(LibYUVBaseTest, TestCpuHas) { + int cpu_flags = TestCpuFlag(-1); printf("Cpu Flags %x\n", cpu_flags); int has_arm = TestCpuFlag(kCpuHasARM); printf("Has ARM %x\n", has_arm); @@ -39,14 +39,46 @@ TEST_F(libyuvTest, TestCpuHas) { printf("Has AVX %x\n", has_avx); int has_avx2 = TestCpuFlag(kCpuHasAVX2); printf("Has AVX2 %x\n", has_avx2); + int has_erms = TestCpuFlag(kCpuHasERMS); + printf("Has ERMS %x\n", has_erms); + int has_fma3 = TestCpuFlag(kCpuHasFMA3); + printf("Has FMA3 %x\n", has_fma3); + int has_avx3 = TestCpuFlag(kCpuHasAVX3); + printf("Has AVX3 %x\n", has_avx3); + int has_mips = TestCpuFlag(kCpuHasMIPS); + printf("Has MIPS %x\n", has_mips); + int has_dspr2 = TestCpuFlag(kCpuHasDSPR2); + printf("Has DSPR2 %x\n", has_dspr2); +} + +TEST_F(LibYUVBaseTest, TestCpuCompilerEnabled) { +#if defined(__aarch64__) + printf("Arm64 build\n"); +#endif +#if defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON) + printf("Neon build enabled\n"); +#endif +#if defined(__x86_64__) || defined(_M_X64) + printf("x64 build\n"); +#endif +#ifdef _MSC_VER +printf("_MSC_VER %d\n", _MSC_VER); +#endif +#if !defined(LIBYUV_DISABLE_X86) && (defined(GCC_HAS_AVX2) || \ + defined(CLANG_HAS_AVX2) || defined(VISUALC_HAS_AVX2)) + printf("Has AVX2 1\n"); +#else + printf("Has AVX2 0\n"); + // If compiler does not support AVX2, the following function not expected: +#endif } #if defined(__i386__) || defined(__x86_64__) || \ defined(_M_IX86) || defined(_M_X64) -TEST_F(libyuvTest, TestCpuId) { +TEST_F(LibYUVBaseTest, TestCpuId) { int has_x86 = TestCpuFlag(kCpuHasX86); if (has_x86) { - int cpu_info[4]; + uint32 cpu_info[4]; // Vendor ID: // AuthenticAMD AMD processor // CentaurHauls Centaur processor @@ -58,7 +90,7 @@ TEST_F(libyuvTest, TestCpuId) { // RiseRiseRise Rise Technology processor // SiS SiS SiS SiS processor // UMC UMC UMC UMC processor - CpuId(cpu_info, 0); + CpuId(0, 0, cpu_info); cpu_info[0] = cpu_info[1]; // Reorder output cpu_info[1] = cpu_info[3]; cpu_info[3] = 0; @@ -73,7 +105,7 @@ TEST_F(libyuvTest, TestCpuId) { // 13:12 - Processor Type // 19:16 - Extended Model // 27:20 - Extended Family - CpuId(cpu_info, 1); + CpuId(1, 0, cpu_info); int family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0); int model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0); printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, @@ -82,18 +114,25 @@ TEST_F(libyuvTest, TestCpuId) { } #endif -TEST_F(libyuvTest, TestLinuxNeon) { - int testdata = ArmCpuCaps("unit_test/testdata/arm_v7.txt"); - if (testdata) { - EXPECT_EQ(kCpuInitialized, - ArmCpuCaps("unit_test/testdata/arm_v7.txt")); - EXPECT_EQ((kCpuInitialized | kCpuHasNEON), - ArmCpuCaps("unit_test/testdata/tegra3.txt")); +static int FileExists(const char* file_name) { + FILE* f = fopen(file_name, "r"); + if (!f) { + return 0; + } + fclose(f); + return 1; +} + +TEST_F(LibYUVBaseTest, TestLinuxNeon) { + if (FileExists("../../unit_test/testdata/arm_v7.txt")) { + EXPECT_EQ(0, ArmCpuCaps("../../unit_test/testdata/arm_v7.txt")); + EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/tegra3.txt")); + EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/juno.txt")); } else { - printf("WARNING: unable to load \"unit_test/testdata/arm_v7.txt\"\n"); + printf("WARNING: unable to load \"../../unit_test/testdata/arm_v7.txt\"\n"); } #if defined(__linux__) && defined(__ARM_NEON__) - EXPECT_NE(0, ArmCpuCaps("/proc/cpuinfo")); + EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("/proc/cpuinfo")); #endif } diff --git a/files/unit_test/math_test.cc b/files/unit_test/math_test.cc new file mode 100644 index 00000000..19af9f6b --- /dev/null +++ b/files/unit_test/math_test.cc @@ -0,0 +1,155 @@ +/* + * Copyright 2013 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "libyuv/basic_types.h" +#include "libyuv/cpu_id.h" +#include "libyuv/scale.h" +#include "libyuv/scale_row.h" +#include "../unit_test/unit_test.h" + +namespace libyuv { + +TEST_F(LibYUVBaseTest, TestFixedDiv) { + int num[1280]; + int div[1280]; + int result_opt[1280]; + int result_c[1280]; + + EXPECT_EQ(0x10000, libyuv::FixedDiv(1, 1)); + EXPECT_EQ(0x7fff0000, libyuv::FixedDiv(0x7fff, 1)); + // TODO(fbarchard): Avoid the following that throw exceptions. + // EXPECT_EQ(0x100000000, libyuv::FixedDiv(0x10000, 1)); + // EXPECT_EQ(0x80000000, libyuv::FixedDiv(0x8000, 1)); + + EXPECT_EQ(0x20000, libyuv::FixedDiv(640 * 2, 640)); + EXPECT_EQ(0x30000, libyuv::FixedDiv(640 * 3, 640)); + EXPECT_EQ(0x40000, libyuv::FixedDiv(640 * 4, 640)); + EXPECT_EQ(0x50000, libyuv::FixedDiv(640 * 5, 640)); + EXPECT_EQ(0x60000, libyuv::FixedDiv(640 * 6, 640)); + EXPECT_EQ(0x70000, libyuv::FixedDiv(640 * 7, 640)); + EXPECT_EQ(0x80000, libyuv::FixedDiv(640 * 8, 640)); + EXPECT_EQ(0xa0000, libyuv::FixedDiv(640 * 10, 640)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(960 * 2, 960)); + EXPECT_EQ(0x08000, libyuv::FixedDiv(640 / 2, 640)); + EXPECT_EQ(0x04000, libyuv::FixedDiv(640 / 4, 640)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(1080 * 2, 1080)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(200000, 100000)); + EXPECT_EQ(0x18000, libyuv::FixedDiv(150000, 100000)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(40000, 20000)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(-40000, -20000)); + EXPECT_EQ(-0x20000, libyuv::FixedDiv(40000, -20000)); + EXPECT_EQ(-0x20000, libyuv::FixedDiv(-40000, 20000)); + EXPECT_EQ(0x10000, libyuv::FixedDiv(4095, 4095)); + EXPECT_EQ(0x10000, libyuv::FixedDiv(4096, 4096)); + EXPECT_EQ(0x10000, libyuv::FixedDiv(4097, 4097)); + EXPECT_EQ(123 * 65536, libyuv::FixedDiv(123, 1)); + + for (int i = 1; i < 4100; ++i) { + EXPECT_EQ(0x10000, libyuv::FixedDiv(i, i)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(i * 2, i)); + EXPECT_EQ(0x30000, libyuv::FixedDiv(i * 3, i)); + EXPECT_EQ(0x40000, libyuv::FixedDiv(i * 4, i)); + EXPECT_EQ(0x08000, libyuv::FixedDiv(i, i * 2)); + EXPECT_NEAR(16384 * 65536 / i, libyuv::FixedDiv(16384, i), 1); + } + EXPECT_EQ(123 * 65536, libyuv::FixedDiv(123, 1)); + + MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div)); + for (int j = 0; j < 1280; ++j) { + if (div[j] == 0) { + div[j] = 1280; + } + num[j] &= 0xffff; // Clamp to avoid divide overflow. + } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + for (int j = 0; j < 1280; ++j) { + result_opt[j] = libyuv::FixedDiv(num[j], div[j]); + } + } + for (int j = 0; j < 1280; ++j) { + result_c[j] = libyuv::FixedDiv_C(num[j], div[j]); + EXPECT_NEAR(result_c[j], result_opt[j], 1); + } +} + +TEST_F(LibYUVBaseTest, TestFixedDiv_Opt) { + int num[1280]; + int div[1280]; + int result_opt[1280]; + int result_c[1280]; + + MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div)); + for (int j = 0; j < 1280; ++j) { + num[j] &= 4095; // Make numerator smaller. + div[j] &= 4095; // Make divisor smaller. + if (div[j] == 0) { + div[j] = 1280; + } + } + + int has_x86 = TestCpuFlag(kCpuHasX86); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + if (has_x86) { + for (int j = 0; j < 1280; ++j) { + result_opt[j] = libyuv::FixedDiv(num[j], div[j]); + } + } else { + for (int j = 0; j < 1280; ++j) { + result_opt[j] = libyuv::FixedDiv_C(num[j], div[j]); + } + } + } + for (int j = 0; j < 1280; ++j) { + result_c[j] = libyuv::FixedDiv_C(num[j], div[j]); + EXPECT_NEAR(result_c[j], result_opt[j], 1); + } +} + +TEST_F(LibYUVBaseTest, TestFixedDiv1_Opt) { + int num[1280]; + int div[1280]; + int result_opt[1280]; + int result_c[1280]; + + MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div)); + for (int j = 0; j < 1280; ++j) { + num[j] &= 4095; // Make numerator smaller. + div[j] &= 4095; // Make divisor smaller. + if (div[j] <= 1) { + div[j] = 1280; + } + } + + int has_x86 = TestCpuFlag(kCpuHasX86); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + if (has_x86) { + for (int j = 0; j < 1280; ++j) { + result_opt[j] = libyuv::FixedDiv1(num[j], div[j]); + } + } else { + for (int j = 0; j < 1280; ++j) { + result_opt[j] = libyuv::FixedDiv1_C(num[j], div[j]); + } + } + } + for (int j = 0; j < 1280; ++j) { + result_c[j] = libyuv::FixedDiv1_C(num[j], div[j]); + EXPECT_NEAR(result_c[j], result_opt[j], 1); + } +} + +} // namespace libyuv diff --git a/files/unit_test/planar_test.cc b/files/unit_test/planar_test.cc index e9053a35..bc0eebb5 100644 --- a/files/unit_test/planar_test.cc +++ b/files/unit_test/planar_test.cc @@ -4,460 +4,257 @@ * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include <stdlib.h> #include <time.h> +#include "libyuv/compare.h" +#include "libyuv/convert.h" #include "libyuv/convert_argb.h" #include "libyuv/convert_from.h" -#include "libyuv/compare.h" +#include "libyuv/convert_from_argb.h" #include "libyuv/cpu_id.h" -#include "libyuv/format_conversion.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #include "../unit_test/unit_test.h" -#if defined(_MSC_VER) -#define SIMD_ALIGNED(var) __declspec(align(16)) var -#else // __GNUC__ -#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) -#endif - namespace libyuv { -#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, N, NEG) \ -TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \ - const int kWidth = 1280; \ - const int kHeight = 720; \ - const int kStride = (kWidth * 8 * BPP_B + 7) / 8; \ - align_buffer_16(src_y, kWidth * kHeight); \ - align_buffer_16(src_u, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \ - align_buffer_16(src_v, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \ - align_buffer_16(dst_argb_c, kStride * kHeight); \ - align_buffer_16(dst_argb_opt, kStride * kHeight); \ - srandom(time(NULL)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[(i * kWidth) + j] = (random() & 0xff); \ - for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) \ - for (int j = 0; j < kWidth / SUBSAMP_X; ++j) { \ - src_u[(i * kWidth / SUBSAMP_X) + j] = (random() & 0xff); \ - src_v[(i * kWidth / SUBSAMP_X) + j] = (random() & 0xff); \ - } \ - MaskCpuFlags(kCpuInitialized); \ - FMT_PLANAR##To##FMT_B(src_y, kWidth, \ - src_u, kWidth / SUBSAMP_X, \ - src_v, kWidth / SUBSAMP_X, \ - dst_argb_c, kStride, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y, kWidth, \ - src_u, kWidth / SUBSAMP_X, \ - src_v, kWidth / SUBSAMP_X, \ - dst_argb_opt, kStride, \ - kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth * BPP_B; ++j) { \ - int abs_diff = \ - abs(static_cast<int>(dst_argb_c[i * kWidth * BPP_B + j]) - \ - static_cast<int>(dst_argb_opt[i * kWidth * BPP_B + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 2); \ - free_aligned_buffer_16(src_y) \ - free_aligned_buffer_16(src_u) \ - free_aligned_buffer_16(src_v) \ - free_aligned_buffer_16(dst_argb_c) \ - free_aligned_buffer_16(dst_argb_opt) \ -} - -#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, , +) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, Invert, -) - -TESTPLANARTOB(I420, 2, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, BGRA, 4) -TESTPLANARTOB(I420, 2, 2, ABGR, 4) -TESTPLANARTOB(I420, 2, 2, RGBA, 4) -TESTPLANARTOB(I420, 2, 2, RAW, 3) -TESTPLANARTOB(I420, 2, 2, RGB24, 3) -TESTPLANARTOB(I420, 2, 2, RGB565, 2) -TESTPLANARTOB(I420, 2, 2, ARGB1555, 2) -TESTPLANARTOB(I420, 2, 2, ARGB4444, 2) -TESTPLANARTOB(I422, 2, 1, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, BGRA, 4) -TESTPLANARTOB(I422, 2, 1, ABGR, 4) -TESTPLANARTOB(I422, 2, 1, RGBA, 4) -TESTPLANARTOB(I411, 4, 1, ARGB, 4) -TESTPLANARTOB(I444, 1, 1, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, YUY2, 2) -TESTPLANARTOB(I420, 2, 2, UYVY, 2) -// TODO(fbarchard): Re-enable test and fix valgrind. -// TESTPLANARTOB(I420, 2, 2, V210, 16 / 6) -TESTPLANARTOB(I420, 2, 2, I400, 1) -TESTPLANARTOB(I420, 2, 2, BayerBGGR, 1) -TESTPLANARTOB(I420, 2, 2, BayerRGGB, 1) -TESTPLANARTOB(I420, 2, 2, BayerGBRG, 1) -TESTPLANARTOB(I420, 2, 2, BayerGRBG, 1) - -#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - N, NEG) \ -TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \ - const int kWidth = 1280; \ - const int kHeight = 720; \ - align_buffer_16(src_y, kWidth * kHeight); \ - align_buffer_16(src_uv, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y * 2); \ - align_buffer_16(dst_argb_c, (kWidth * BPP_B) * kHeight); \ - align_buffer_16(dst_argb_opt, (kWidth * BPP_B) * kHeight); \ - srandom(time(NULL)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[(i * kWidth) + j] = (random() & 0xff); \ - for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) \ - for (int j = 0; j < kWidth / SUBSAMP_X * 2; ++j) { \ - src_uv[(i * kWidth / SUBSAMP_X) * 2 + j] = (random() & 0xff); \ - } \ - MaskCpuFlags(kCpuInitialized); \ - FMT_PLANAR##To##FMT_B(src_y, kWidth, \ - src_uv, kWidth / SUBSAMP_X * 2, \ - dst_argb_c, kWidth * BPP_B, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y, kWidth, \ - src_uv, kWidth / SUBSAMP_X * 2, \ - dst_argb_opt, kWidth * BPP_B, \ - kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth * BPP_B; ++j) { \ - int abs_diff = \ - abs(static_cast<int>(dst_argb_c[i * kWidth * BPP_B + j]) - \ - static_cast<int>(dst_argb_opt[i * kWidth * BPP_B + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 3); \ - free_aligned_buffer_16(src_y) \ - free_aligned_buffer_16(src_uv) \ - free_aligned_buffer_16(dst_argb_c) \ - free_aligned_buffer_16(dst_argb_opt) \ -} - -#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, , +) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, Invert, -) - -TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4) -TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4) -TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2) -TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2) - -#define TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, N, NEG) \ -TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N##_OptVsC) { \ - const int kWidth = 1280; \ - const int kHeight = 720; \ - const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \ - align_buffer_16(src_argb, kStride * kHeight); \ - align_buffer_16(dst_y_c, kWidth * kHeight); \ - align_buffer_16(dst_u_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \ - align_buffer_16(dst_v_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \ - align_buffer_16(dst_y_opt, kWidth * kHeight); \ - align_buffer_16(dst_u_opt, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \ - align_buffer_16(dst_v_opt, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \ - srandom(time(NULL)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kStride; ++j) \ - src_argb[(i * kStride) + j] = (random() & 0xff); \ - MaskCpuFlags(kCpuInitialized); \ - FMT_A##To##FMT_PLANAR(src_argb, kStride, \ - dst_y_c, kWidth, \ - dst_u_c, kWidth / SUBSAMP_X, \ - dst_v_c, kWidth / SUBSAMP_X, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_PLANAR(src_argb, kStride, \ - dst_y_opt, kWidth, \ - dst_u_opt, kWidth / SUBSAMP_X, \ - dst_v_opt, kWidth / SUBSAMP_X, \ - kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - int abs_diff = \ - abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ - static_cast<int>(dst_y_opt[i * kWidth + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 2); \ - for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) { \ - for (int j = 0; j < kWidth / SUBSAMP_X; ++j) { \ - int abs_diff = \ - abs(static_cast<int>(dst_u_c[i * kWidth / SUBSAMP_X + j]) - \ - static_cast<int>(dst_u_opt[i * kWidth / SUBSAMP_X + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 2); \ - for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) { \ - for (int j = 0; j < kWidth / SUBSAMP_X; ++j) { \ - int abs_diff = \ - abs(static_cast<int>(dst_v_c[i * kWidth / SUBSAMP_X + j]) - \ - static_cast<int>(dst_v_opt[i * kWidth / SUBSAMP_X + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 2); \ - free_aligned_buffer_16(dst_y_c) \ - free_aligned_buffer_16(dst_u_c) \ - free_aligned_buffer_16(dst_v_c) \ - free_aligned_buffer_16(dst_y_opt) \ - free_aligned_buffer_16(dst_u_opt) \ - free_aligned_buffer_16(dst_v_opt) \ - free_aligned_buffer_16(src_argb) \ -} - -#define TESTATOPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, , +) \ - TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, Invert, -) - -TESTATOPLANAR(ARGB, 4, I420, 2, 2) -TESTATOPLANAR(BGRA, 4, I420, 2, 2) -TESTATOPLANAR(ABGR, 4, I420, 2, 2) -TESTATOPLANAR(RGBA, 4, I420, 2, 2) -TESTATOPLANAR(RAW, 3, I420, 2, 2) -TESTATOPLANAR(RGB24, 3, I420, 2, 2) -TESTATOPLANAR(RGB565, 2, I420, 2, 2) -TESTATOPLANAR(ARGB1555, 2, I420, 2, 2) -TESTATOPLANAR(ARGB4444, 2, I420, 2, 2) -// TESTATOPLANAR(ARGB, 4, I411, 4, 1) -TESTATOPLANAR(ARGB, 4, I422, 2, 1) -// TESTATOPLANAR(ARGB, 4, I444, 1, 1) -// TODO(fbarchard): Implement and test 411 and 444 -TESTATOPLANAR(YUY2, 2, I420, 2, 2) -TESTATOPLANAR(UYVY, 2, I420, 2, 2) -TESTATOPLANAR(YUY2, 2, I422, 2, 1) -TESTATOPLANAR(UYVY, 2, I422, 2, 1) -TESTATOPLANAR(V210, 16 / 6, I420, 2, 2) -TESTATOPLANAR(I400, 1, I420, 2, 2) -TESTATOPLANAR(BayerBGGR, 1, I420, 2, 2) -TESTATOPLANAR(BayerRGGB, 1, I420, 2, 2) -TESTATOPLANAR(BayerGBRG, 1, I420, 2, 2) -TESTATOPLANAR(BayerGRBG, 1, I420, 2, 2) - -#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, N, NEG) \ -TEST_F(libyuvTest, FMT_A##To##FMT_B##N##_OptVsC) { \ - const int kWidth = 1280; \ - const int kHeight = 720; \ - align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight); \ - align_buffer_16(dst_argb_c, (kWidth * BPP_B) * kHeight); \ - align_buffer_16(dst_argb_opt, (kWidth * BPP_B) * kHeight); \ - srandom(time(NULL)); \ - for (int i = 0; i < kHeight * kWidth * BPP_A; ++i) { \ - src_argb[i] = (random() & 0xff); \ - } \ - MaskCpuFlags(kCpuInitialized); \ - FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \ - dst_argb_c, kWidth * BPP_B, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \ - dst_argb_opt, kWidth * BPP_B, \ - kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight * kWidth * BPP_B; ++i) { \ - int abs_diff = \ - abs(static_cast<int>(dst_argb_c[i]) - \ - static_cast<int>(dst_argb_opt[i])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - EXPECT_LE(max_diff, 2); \ - free_aligned_buffer_16(src_argb) \ - free_aligned_buffer_16(dst_argb_c) \ - free_aligned_buffer_16(dst_argb_opt) \ -} -#define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, , +) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, Invert, -) - -TESTATOB(I400, 1, 1, I400, 1) -TESTATOB(ARGB, 4, 4, ARGB, 4) -TESTATOB(ARGB, 4, 4, BGRA, 4) -TESTATOB(ARGB, 4, 4, ABGR, 4) -TESTATOB(ARGB, 4, 4, RGBA, 4) -TESTATOB(ARGB, 4, 4, RAW, 3) -TESTATOB(ARGB, 4, 4, RGB24, 3) -TESTATOB(ARGB, 4, 4, RGB565, 2) -TESTATOB(ARGB, 4, 4, ARGB1555, 2) -TESTATOB(ARGB, 4, 4, ARGB4444, 2) -TESTATOB(BGRA, 4, 4, ARGB, 4) -TESTATOB(ABGR, 4, 4, ARGB, 4) -TESTATOB(RGBA, 4, 4, ARGB, 4) -TESTATOB(RAW, 3, 3, ARGB, 4) -TESTATOB(RGB24, 3, 3, ARGB, 4) -TESTATOB(RGB565, 2, 2, ARGB, 4) -TESTATOB(ARGB1555, 2, 2, ARGB, 4) -TESTATOB(ARGB4444, 2, 2, ARGB, 4) -TESTATOB(YUY2, 2, 2, ARGB, 4) -TESTATOB(UYVY, 2, 2, ARGB, 4) -TESTATOB(M420, 3 / 2, 1, ARGB, 4) - -static const int kReadPad = 16; // Allow overread of 16 bytes. -#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B) \ -TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) { \ - srandom(time(NULL)); \ - for (int times = 0; times < benchmark_iterations_; ++times) { \ - const int kWidth = (random() & 63) + 1; \ - const int kHeight = (random() & 31) + 1; \ - align_buffer_page_end(src_argb, (kWidth * BPP_A) * kHeight + kReadPad); \ - align_buffer_page_end(dst_argb_c, (kWidth * BPP_B) * kHeight); \ - align_buffer_page_end(dst_argb_opt, (kWidth * BPP_B) * kHeight); \ - for (int i = 0; i < kHeight * kWidth * BPP_A; ++i) { \ - src_argb[i] = (random() & 0xff); \ - } \ - MaskCpuFlags(kCpuInitialized); \ - FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \ - dst_argb_c, kWidth * BPP_B, \ - kWidth, kHeight); \ - MaskCpuFlags(-1); \ - FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \ - dst_argb_opt, kWidth * BPP_B, \ - kWidth, kHeight); \ - int max_diff = 0; \ - for (int i = 0; i < kHeight * kWidth * BPP_B; ++i) { \ - int abs_diff = \ - abs(static_cast<int>(dst_argb_c[i]) - \ - static_cast<int>(dst_argb_opt[i])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - EXPECT_LE(max_diff, 2); \ - free_aligned_buffer_page_end(src_argb) \ - free_aligned_buffer_page_end(dst_argb_c) \ - free_aligned_buffer_page_end(dst_argb_opt) \ - } \ -} - -TESTATOBRANDOM(ARGB, 4, 4, ARGB, 4) -TESTATOBRANDOM(ARGB, 4, 4, BGRA, 4) -TESTATOBRANDOM(ARGB, 4, 4, ABGR, 4) -TESTATOBRANDOM(ARGB, 4, 4, RGBA, 4) -TESTATOBRANDOM(ARGB, 4, 4, RAW, 3) -TESTATOBRANDOM(ARGB, 4, 4, RGB24, 3) -TESTATOBRANDOM(ARGB, 4, 4, RGB565, 2) -TESTATOBRANDOM(ARGB, 4, 4, ARGB1555, 2) -TESTATOBRANDOM(ARGB, 4, 4, ARGB4444, 2) - -TESTATOBRANDOM(BGRA, 4, 4, ARGB, 4) -TESTATOBRANDOM(ABGR, 4, 4, ARGB, 4) -TESTATOBRANDOM(RGBA, 4, 4, ARGB, 4) -TESTATOBRANDOM(RAW, 3, 3, ARGB, 4) -TESTATOBRANDOM(RGB24, 3, 3, ARGB, 4) -TESTATOBRANDOM(RGB565, 2, 2, ARGB, 4) -TESTATOBRANDOM(ARGB1555, 2, 2, ARGB, 4) -TESTATOBRANDOM(ARGB4444, 2, 2, ARGB, 4) - -TEST_F(libyuvTest, TestAttenuate) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); - SIMD_ALIGNED(uint8 atten_pixels[256][4]); - SIMD_ALIGNED(uint8 unatten_pixels[256][4]); - SIMD_ALIGNED(uint8 atten2_pixels[256][4]); +TEST_F(LibYUVPlanarTest, TestAttenuate) { + const int kSize = 1280 * 4; + align_buffer_page_end(orig_pixels, kSize); + align_buffer_page_end(atten_pixels, kSize); + align_buffer_page_end(unatten_pixels, kSize); + align_buffer_page_end(atten2_pixels, kSize); // Test unattenuation clamps - orig_pixels[0][0] = 200u; - orig_pixels[0][1] = 129u; - orig_pixels[0][2] = 127u; - orig_pixels[0][3] = 128u; + orig_pixels[0 * 4 + 0] = 200u; + orig_pixels[0 * 4 + 1] = 129u; + orig_pixels[0 * 4 + 2] = 127u; + orig_pixels[0 * 4 + 3] = 128u; // Test unattenuation transparent and opaque are unaffected - orig_pixels[1][0] = 16u; - orig_pixels[1][1] = 64u; - orig_pixels[1][2] = 192u; - orig_pixels[1][3] = 0u; - orig_pixels[2][0] = 16u; - orig_pixels[2][1] = 64u; - orig_pixels[2][2] = 192u; - orig_pixels[2][3] = 255u; - orig_pixels[3][0] = 16u; - orig_pixels[3][1] = 64u; - orig_pixels[3][2] = 192u; - orig_pixels[3][3] = 128u; - ARGBUnattenuate(&orig_pixels[0][0], 0, &unatten_pixels[0][0], 0, 4, 1); - EXPECT_EQ(255u, unatten_pixels[0][0]); - EXPECT_EQ(255u, unatten_pixels[0][1]); - EXPECT_EQ(254u, unatten_pixels[0][2]); - EXPECT_EQ(128u, unatten_pixels[0][3]); - EXPECT_EQ(16u, unatten_pixels[1][0]); - EXPECT_EQ(64u, unatten_pixels[1][1]); - EXPECT_EQ(192u, unatten_pixels[1][2]); - EXPECT_EQ(0u, unatten_pixels[1][3]); - EXPECT_EQ(16u, unatten_pixels[2][0]); - EXPECT_EQ(64u, unatten_pixels[2][1]); - EXPECT_EQ(192u, unatten_pixels[2][2]); - EXPECT_EQ(255u, unatten_pixels[2][3]); - EXPECT_EQ(32u, unatten_pixels[3][0]); - EXPECT_EQ(128u, unatten_pixels[3][1]); - EXPECT_EQ(255u, unatten_pixels[3][2]); - EXPECT_EQ(128u, unatten_pixels[3][3]); - - for (int i = 0; i < 256; ++i) { - orig_pixels[i][0] = i; - orig_pixels[i][1] = i / 2; - orig_pixels[i][2] = i / 3; - orig_pixels[i][3] = i; + orig_pixels[1 * 4 + 0] = 16u; + orig_pixels[1 * 4 + 1] = 64u; + orig_pixels[1 * 4 + 2] = 192u; + orig_pixels[1 * 4 + 3] = 0u; + orig_pixels[2 * 4 + 0] = 16u; + orig_pixels[2 * 4 + 1] = 64u; + orig_pixels[2 * 4 + 2] = 192u; + orig_pixels[2 * 4 + 3] = 255u; + orig_pixels[3 * 4 + 0] = 16u; + orig_pixels[3 * 4 + 1] = 64u; + orig_pixels[3 * 4 + 2] = 192u; + orig_pixels[3 * 4 + 3] = 128u; + ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1); + EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]); + EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]); + EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]); + EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]); + EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]); + EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]); + EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]); + EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]); + EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]); + EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]); + EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]); + EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i * 4 + 0] = i; + orig_pixels[i * 4 + 1] = i / 2; + orig_pixels[i * 4 + 2] = i / 3; + orig_pixels[i * 4 + 3] = i; } - ARGBAttenuate(&orig_pixels[0][0], 0, &atten_pixels[0][0], 0, 256, 1); - ARGBUnattenuate(&atten_pixels[0][0], 0, &unatten_pixels[0][0], 0, 256, 1); - for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { - ARGBAttenuate(&unatten_pixels[0][0], 0, &atten2_pixels[0][0], 0, 256, 1); + ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1); + ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1); } - for (int i = 0; i < 256; ++i) { - EXPECT_NEAR(atten_pixels[i][0], atten2_pixels[i][0], 2); - EXPECT_NEAR(atten_pixels[i][1], atten2_pixels[i][1], 2); - EXPECT_NEAR(atten_pixels[i][2], atten2_pixels[i][2], 2); - EXPECT_NEAR(atten_pixels[i][3], atten2_pixels[i][3], 2); + for (int i = 0; i < 1280; ++i) { + EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2); + EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2); + EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2); + EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2); } // Make sure transparent, 50% and opaque are fully accurate. - EXPECT_EQ(0, atten_pixels[0][0]); - EXPECT_EQ(0, atten_pixels[0][1]); - EXPECT_EQ(0, atten_pixels[0][2]); - EXPECT_EQ(0, atten_pixels[0][3]); - EXPECT_EQ(64, atten_pixels[128][0]); - EXPECT_EQ(32, atten_pixels[128][1]); - EXPECT_EQ(21, atten_pixels[128][2]); - EXPECT_EQ(128, atten_pixels[128][3]); - EXPECT_EQ(255, atten_pixels[255][0]); - EXPECT_EQ(127, atten_pixels[255][1]); - EXPECT_EQ(85, atten_pixels[255][2]); - EXPECT_EQ(255, atten_pixels[255][3]); -} - -TEST_F(libyuvTest, TestARGBComputeCumulativeSum) { + EXPECT_EQ(0, atten_pixels[0 * 4 + 0]); + EXPECT_EQ(0, atten_pixels[0 * 4 + 1]); + EXPECT_EQ(0, atten_pixels[0 * 4 + 2]); + EXPECT_EQ(0, atten_pixels[0 * 4 + 3]); + EXPECT_EQ(64, atten_pixels[128 * 4 + 0]); + EXPECT_EQ(32, atten_pixels[128 * 4 + 1]); + EXPECT_EQ(21, atten_pixels[128 * 4 + 2]); + EXPECT_EQ(128, atten_pixels[128 * 4 + 3]); + EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1); + EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1); + EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1); + EXPECT_EQ(255, atten_pixels[255 * 4 + 3]); + + free_aligned_buffer_page_end(atten2_pixels); + free_aligned_buffer_page_end(unatten_pixels); + free_aligned_buffer_page_end(atten_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +static int TestAttenuateI(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBAttenuate(src_argb + off, kStride, + dst_argb_c, kStride, + width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBAttenuate(src_argb + off, kStride, + dst_argb_opt, kStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast<int>(dst_argb_c[i]) - + static_cast<int>(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) { + int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0); + EXPECT_LE(max_diff, 2); +} + +TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) { + int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 1); + EXPECT_LE(max_diff, 2); +} + +TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) { + int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + -1, 0); + EXPECT_LE(max_diff, 2); +} + +TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) { + int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0); + EXPECT_LE(max_diff, 2); +} + +static int TestUnattenuateI(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb[i + off] = (fastrand() & 0xff); + } + ARGBAttenuate(src_argb + off, kStride, + src_argb + off, kStride, + width, height); + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBUnattenuate(src_argb + off, kStride, + dst_argb_c, kStride, + width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBUnattenuate(src_argb + off, kStride, + dst_argb_opt, kStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast<int>(dst_argb_c[i]) - + static_cast<int>(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) { + int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0); + EXPECT_LE(max_diff, 2); +} + +TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) { + int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 1); + EXPECT_LE(max_diff, 2); +} + +TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) { + int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + -1, 0); + EXPECT_LE(max_diff, 2); +} + +TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { + int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0); + EXPECT_LE(max_diff, 2); +} + +TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { SIMD_ALIGNED(uint8 orig_pixels[16][16][4]); SIMD_ALIGNED(int32 added_pixels[16][16][4]); @@ -484,8 +281,9 @@ TEST_F(libyuvTest, TestARGBComputeCumulativeSum) { } } -TEST_F(libyuvTest, TestARGBGray) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); +TEST_F(LibYUVPlanarTest, TestARGBGray) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); // Test blue orig_pixels[0][0] = 255u; @@ -502,45 +300,62 @@ TEST_F(libyuvTest, TestARGBGray) { orig_pixels[2][1] = 0u; orig_pixels[2][2] = 255u; orig_pixels[2][3] = 255u; + // Test black + orig_pixels[3][0] = 0u; + orig_pixels[3][1] = 0u; + orig_pixels[3][2] = 0u; + orig_pixels[3][3] = 255u; + // Test white + orig_pixels[4][0] = 255u; + orig_pixels[4][1] = 255u; + orig_pixels[4][2] = 255u; + orig_pixels[4][3] = 255u; // Test color - orig_pixels[3][0] = 16u; - orig_pixels[3][1] = 64u; - orig_pixels[3][2] = 192u; - orig_pixels[3][3] = 224u; + orig_pixels[5][0] = 16u; + orig_pixels[5][1] = 64u; + orig_pixels[5][2] = 192u; + orig_pixels[5][3] = 224u; // Do 16 to test asm version. ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1); - EXPECT_EQ(27u, orig_pixels[0][0]); - EXPECT_EQ(27u, orig_pixels[0][1]); - EXPECT_EQ(27u, orig_pixels[0][2]); + EXPECT_EQ(30u, orig_pixels[0][0]); + EXPECT_EQ(30u, orig_pixels[0][1]); + EXPECT_EQ(30u, orig_pixels[0][2]); EXPECT_EQ(128u, orig_pixels[0][3]); - EXPECT_EQ(151u, orig_pixels[1][0]); - EXPECT_EQ(151u, orig_pixels[1][1]); - EXPECT_EQ(151u, orig_pixels[1][2]); + EXPECT_EQ(149u, orig_pixels[1][0]); + EXPECT_EQ(149u, orig_pixels[1][1]); + EXPECT_EQ(149u, orig_pixels[1][2]); EXPECT_EQ(0u, orig_pixels[1][3]); - EXPECT_EQ(75u, orig_pixels[2][0]); - EXPECT_EQ(75u, orig_pixels[2][1]); - EXPECT_EQ(75u, orig_pixels[2][2]); + EXPECT_EQ(76u, orig_pixels[2][0]); + EXPECT_EQ(76u, orig_pixels[2][1]); + EXPECT_EQ(76u, orig_pixels[2][2]); EXPECT_EQ(255u, orig_pixels[2][3]); - EXPECT_EQ(96u, orig_pixels[3][0]); - EXPECT_EQ(96u, orig_pixels[3][1]); - EXPECT_EQ(96u, orig_pixels[3][2]); - EXPECT_EQ(224u, orig_pixels[3][3]); - - for (int i = 0; i < 256; ++i) { + EXPECT_EQ(0u, orig_pixels[3][0]); + EXPECT_EQ(0u, orig_pixels[3][1]); + EXPECT_EQ(0u, orig_pixels[3][2]); + EXPECT_EQ(255u, orig_pixels[3][3]); + EXPECT_EQ(255u, orig_pixels[4][0]); + EXPECT_EQ(255u, orig_pixels[4][1]); + EXPECT_EQ(255u, orig_pixels[4][2]); + EXPECT_EQ(255u, orig_pixels[4][3]); + EXPECT_EQ(96u, orig_pixels[5][0]); + EXPECT_EQ(96u, orig_pixels[5][1]); + EXPECT_EQ(96u, orig_pixels[5][2]); + EXPECT_EQ(224u, orig_pixels[5][3]); + for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; orig_pixels[i][1] = i / 2; orig_pixels[i][2] = i / 3; orig_pixels[i][3] = i; } - - for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { - ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1); } } -TEST_F(libyuvTest, TestARGBGrayTo) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); - SIMD_ALIGNED(uint8 gray_pixels[256][4]); +TEST_F(LibYUVPlanarTest, TestARGBGrayTo) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8 gray_pixels[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); // Test blue orig_pixels[0][0] = 255u; @@ -557,44 +372,61 @@ TEST_F(libyuvTest, TestARGBGrayTo) { orig_pixels[2][1] = 0u; orig_pixels[2][2] = 255u; orig_pixels[2][3] = 255u; + // Test black + orig_pixels[3][0] = 0u; + orig_pixels[3][1] = 0u; + orig_pixels[3][2] = 0u; + orig_pixels[3][3] = 255u; + // Test white + orig_pixels[4][0] = 255u; + orig_pixels[4][1] = 255u; + orig_pixels[4][2] = 255u; + orig_pixels[4][3] = 255u; // Test color - orig_pixels[3][0] = 16u; - orig_pixels[3][1] = 64u; - orig_pixels[3][2] = 192u; - orig_pixels[3][3] = 224u; + orig_pixels[5][0] = 16u; + orig_pixels[5][1] = 64u; + orig_pixels[5][2] = 192u; + orig_pixels[5][3] = 224u; // Do 16 to test asm version. ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1); - EXPECT_EQ(27u, gray_pixels[0][0]); - EXPECT_EQ(27u, gray_pixels[0][1]); - EXPECT_EQ(27u, gray_pixels[0][2]); + EXPECT_EQ(30u, gray_pixels[0][0]); + EXPECT_EQ(30u, gray_pixels[0][1]); + EXPECT_EQ(30u, gray_pixels[0][2]); EXPECT_EQ(128u, gray_pixels[0][3]); - EXPECT_EQ(151u, gray_pixels[1][0]); - EXPECT_EQ(151u, gray_pixels[1][1]); - EXPECT_EQ(151u, gray_pixels[1][2]); + EXPECT_EQ(149u, gray_pixels[1][0]); + EXPECT_EQ(149u, gray_pixels[1][1]); + EXPECT_EQ(149u, gray_pixels[1][2]); EXPECT_EQ(0u, gray_pixels[1][3]); - EXPECT_EQ(75u, gray_pixels[2][0]); - EXPECT_EQ(75u, gray_pixels[2][1]); - EXPECT_EQ(75u, gray_pixels[2][2]); + EXPECT_EQ(76u, gray_pixels[2][0]); + EXPECT_EQ(76u, gray_pixels[2][1]); + EXPECT_EQ(76u, gray_pixels[2][2]); EXPECT_EQ(255u, gray_pixels[2][3]); - EXPECT_EQ(96u, gray_pixels[3][0]); - EXPECT_EQ(96u, gray_pixels[3][1]); - EXPECT_EQ(96u, gray_pixels[3][2]); - EXPECT_EQ(224u, gray_pixels[3][3]); - - for (int i = 0; i < 256; ++i) { + EXPECT_EQ(0u, gray_pixels[3][0]); + EXPECT_EQ(0u, gray_pixels[3][1]); + EXPECT_EQ(0u, gray_pixels[3][2]); + EXPECT_EQ(255u, gray_pixels[3][3]); + EXPECT_EQ(255u, gray_pixels[4][0]); + EXPECT_EQ(255u, gray_pixels[4][1]); + EXPECT_EQ(255u, gray_pixels[4][2]); + EXPECT_EQ(255u, gray_pixels[4][3]); + EXPECT_EQ(96u, gray_pixels[5][0]); + EXPECT_EQ(96u, gray_pixels[5][1]); + EXPECT_EQ(96u, gray_pixels[5][2]); + EXPECT_EQ(224u, gray_pixels[5][3]); + for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; orig_pixels[i][1] = i / 2; orig_pixels[i][2] = i / 3; orig_pixels[i][3] = i; } - - for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { - ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 256, 1); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1); } } -TEST_F(libyuvTest, TestARGBSepia) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); +TEST_F(LibYUVPlanarTest, TestARGBSepia) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); // Test blue orig_pixels[0][0] = 255u; @@ -611,11 +443,21 @@ TEST_F(libyuvTest, TestARGBSepia) { orig_pixels[2][1] = 0u; orig_pixels[2][2] = 255u; orig_pixels[2][3] = 255u; + // Test black + orig_pixels[3][0] = 0u; + orig_pixels[3][1] = 0u; + orig_pixels[3][2] = 0u; + orig_pixels[3][3] = 255u; + // Test white + orig_pixels[4][0] = 255u; + orig_pixels[4][1] = 255u; + orig_pixels[4][2] = 255u; + orig_pixels[4][3] = 255u; // Test color - orig_pixels[3][0] = 16u; - orig_pixels[3][1] = 64u; - orig_pixels[3][2] = 192u; - orig_pixels[3][3] = 224u; + orig_pixels[5][0] = 16u; + orig_pixels[5][1] = 64u; + orig_pixels[5][2] = 192u; + orig_pixels[5][3] = 224u; // Do 16 to test asm version. ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1); EXPECT_EQ(33u, orig_pixels[0][0]); @@ -630,32 +472,119 @@ TEST_F(libyuvTest, TestARGBSepia) { EXPECT_EQ(89u, orig_pixels[2][1]); EXPECT_EQ(99u, orig_pixels[2][2]); EXPECT_EQ(255u, orig_pixels[2][3]); - EXPECT_EQ(88u, orig_pixels[3][0]); - EXPECT_EQ(114u, orig_pixels[3][1]); - EXPECT_EQ(127u, orig_pixels[3][2]); - EXPECT_EQ(224u, orig_pixels[3][3]); + EXPECT_EQ(0u, orig_pixels[3][0]); + EXPECT_EQ(0u, orig_pixels[3][1]); + EXPECT_EQ(0u, orig_pixels[3][2]); + EXPECT_EQ(255u, orig_pixels[3][3]); + EXPECT_EQ(239u, orig_pixels[4][0]); + EXPECT_EQ(255u, orig_pixels[4][1]); + EXPECT_EQ(255u, orig_pixels[4][2]); + EXPECT_EQ(255u, orig_pixels[4][3]); + EXPECT_EQ(88u, orig_pixels[5][0]); + EXPECT_EQ(114u, orig_pixels[5][1]); + EXPECT_EQ(127u, orig_pixels[5][2]); + EXPECT_EQ(224u, orig_pixels[5][3]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1); + } +} + +TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); + + // Matrix for Sepia. + SIMD_ALIGNED(static const int8 kRGBToSepia[]) = { + 17 / 2, 68 / 2, 35 / 2, 0, + 22 / 2, 88 / 2, 45 / 2, 0, + 24 / 2, 98 / 2, 50 / 2, 0, + 0, 0, 0, 64, // Copy alpha. + }; + memset(orig_pixels, 0, sizeof(orig_pixels)); - for (int i = 0; i < 256; ++i) { + // Test blue + orig_pixels[0][0] = 255u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 128u; + // Test green + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 255u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 0u; + // Test red + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 255u; + orig_pixels[2][3] = 255u; + // Test color + orig_pixels[3][0] = 16u; + orig_pixels[3][1] = 64u; + orig_pixels[3][2] = 192u; + orig_pixels[3][3] = 224u; + // Do 16 to test asm version. + ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + &kRGBToSepia[0], 16, 1); + EXPECT_EQ(31u, dst_pixels_opt[0][0]); + EXPECT_EQ(43u, dst_pixels_opt[0][1]); + EXPECT_EQ(47u, dst_pixels_opt[0][2]); + EXPECT_EQ(128u, dst_pixels_opt[0][3]); + EXPECT_EQ(135u, dst_pixels_opt[1][0]); + EXPECT_EQ(175u, dst_pixels_opt[1][1]); + EXPECT_EQ(195u, dst_pixels_opt[1][2]); + EXPECT_EQ(0u, dst_pixels_opt[1][3]); + EXPECT_EQ(67u, dst_pixels_opt[2][0]); + EXPECT_EQ(87u, dst_pixels_opt[2][1]); + EXPECT_EQ(99u, dst_pixels_opt[2][2]); + EXPECT_EQ(255u, dst_pixels_opt[2][3]); + EXPECT_EQ(87u, dst_pixels_opt[3][0]); + EXPECT_EQ(112u, dst_pixels_opt[3][1]); + EXPECT_EQ(127u, dst_pixels_opt[3][2]); + EXPECT_EQ(224u, dst_pixels_opt[3][3]); + + for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; orig_pixels[i][1] = i / 2; orig_pixels[i][2] = i / 3; orig_pixels[i][3] = i; } + MaskCpuFlags(disable_cpu_flags_); + ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, + &kRGBToSepia[0], 1280, 1); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + &kRGBToSepia[0], 1280, 1); + } - for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { - ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 256, 1); + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); + EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); + EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); + EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); } } -TEST_F(libyuvTest, TestARGBColorMatrix) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); +TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); // Matrix for Sepia. - static const int8 kARGBToSepia[] = { + SIMD_ALIGNED(static const int8 kRGBToSepia[]) = { 17, 68, 35, 0, 22, 88, 45, 0, 24, 98, 50, 0, + 0, 0, 0, 0, // Unused but makes matrix 16 bytes. }; + memset(orig_pixels, 0, sizeof(orig_pixels)); // Test blue orig_pixels[0][0] = 255u; @@ -678,8 +607,8 @@ TEST_F(libyuvTest, TestARGBColorMatrix) { orig_pixels[3][2] = 192u; orig_pixels[3][3] = 224u; // Do 16 to test asm version. - ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepia[0], 0, 0, 16, 1); - EXPECT_EQ(33u, orig_pixels[0][0]); + RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1); + EXPECT_EQ(31u, orig_pixels[0][0]); EXPECT_EQ(43u, orig_pixels[0][1]); EXPECT_EQ(47u, orig_pixels[0][2]); EXPECT_EQ(128u, orig_pixels[0][3]); @@ -687,29 +616,28 @@ TEST_F(libyuvTest, TestARGBColorMatrix) { EXPECT_EQ(175u, orig_pixels[1][1]); EXPECT_EQ(195u, orig_pixels[1][2]); EXPECT_EQ(0u, orig_pixels[1][3]); - EXPECT_EQ(69u, orig_pixels[2][0]); - EXPECT_EQ(89u, orig_pixels[2][1]); + EXPECT_EQ(67u, orig_pixels[2][0]); + EXPECT_EQ(87u, orig_pixels[2][1]); EXPECT_EQ(99u, orig_pixels[2][2]); EXPECT_EQ(255u, orig_pixels[2][3]); - EXPECT_EQ(88u, orig_pixels[3][0]); - EXPECT_EQ(114u, orig_pixels[3][1]); + EXPECT_EQ(87u, orig_pixels[3][0]); + EXPECT_EQ(112u, orig_pixels[3][1]); EXPECT_EQ(127u, orig_pixels[3][2]); EXPECT_EQ(224u, orig_pixels[3][3]); - for (int i = 0; i < 256; ++i) { + for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; orig_pixels[i][1] = i / 2; orig_pixels[i][2] = i / 3; orig_pixels[i][3] = i; } - - for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { - ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepia[0], 0, 0, 256, 1); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1); } } -TEST_F(libyuvTest, TestARGBColorTable) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); +TEST_F(LibYUVPlanarTest, TestARGBColorTable) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); // Matrix for Sepia. @@ -755,68 +683,127 @@ TEST_F(libyuvTest, TestARGBColorTable) { EXPECT_EQ(11u, orig_pixels[3][2]); EXPECT_EQ(16u, orig_pixels[3][3]); - for (int i = 0; i < 256; ++i) { + for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; orig_pixels[i][1] = i / 2; orig_pixels[i][2] = i / 3; orig_pixels[i][3] = i; } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1); + } +} + +// Same as TestARGBColorTable except alpha does not change. +TEST_F(LibYUVPlanarTest, TestRGBColorTable) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + + // Matrix for Sepia. + static const uint8 kARGBTable[256 * 4] = { + 1u, 2u, 3u, 4u, + 5u, 6u, 7u, 8u, + 9u, 10u, 11u, 12u, + 13u, 14u, 15u, 16u, + }; + + orig_pixels[0][0] = 0u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 0u; + orig_pixels[1][0] = 1u; + orig_pixels[1][1] = 1u; + orig_pixels[1][2] = 1u; + orig_pixels[1][3] = 1u; + orig_pixels[2][0] = 2u; + orig_pixels[2][1] = 2u; + orig_pixels[2][2] = 2u; + orig_pixels[2][3] = 2u; + orig_pixels[3][0] = 0u; + orig_pixels[3][1] = 1u; + orig_pixels[3][2] = 2u; + orig_pixels[3][3] = 3u; + // Do 16 to test asm version. + RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1); + EXPECT_EQ(1u, orig_pixels[0][0]); + EXPECT_EQ(2u, orig_pixels[0][1]); + EXPECT_EQ(3u, orig_pixels[0][2]); + EXPECT_EQ(0u, orig_pixels[0][3]); // Alpha unchanged. + EXPECT_EQ(5u, orig_pixels[1][0]); + EXPECT_EQ(6u, orig_pixels[1][1]); + EXPECT_EQ(7u, orig_pixels[1][2]); + EXPECT_EQ(1u, orig_pixels[1][3]); // Alpha unchanged. + EXPECT_EQ(9u, orig_pixels[2][0]); + EXPECT_EQ(10u, orig_pixels[2][1]); + EXPECT_EQ(11u, orig_pixels[2][2]); + EXPECT_EQ(2u, orig_pixels[2][3]); // Alpha unchanged. + EXPECT_EQ(1u, orig_pixels[3][0]); + EXPECT_EQ(6u, orig_pixels[3][1]); + EXPECT_EQ(11u, orig_pixels[3][2]); + EXPECT_EQ(3u, orig_pixels[3][3]); // Alpha unchanged. - for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { - ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 256, 1); + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1); } } -TEST_F(libyuvTest, TestARGBQuantize) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); +TEST_F(LibYUVPlanarTest, TestARGBQuantize) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - for (int i = 0; i < 256; ++i) { + for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; orig_pixels[i][1] = i / 2; orig_pixels[i][2] = i / 3; orig_pixels[i][3] = i; } ARGBQuantize(&orig_pixels[0][0], 0, - (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0, 256, 1); + (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0, 1280, 1); - for (int i = 0; i < 256; ++i) { - EXPECT_EQ(i / 8 * 8 + 8 / 2, orig_pixels[i][0]); - EXPECT_EQ(i / 2 / 8 * 8 + 8 / 2, orig_pixels[i][1]); - EXPECT_EQ(i / 3 / 8 * 8 + 8 / 2, orig_pixels[i][2]); - EXPECT_EQ(i, orig_pixels[i][3]); + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]); + EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]); + EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]); + EXPECT_EQ(i & 255, orig_pixels[i][3]); } - for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { ARGBQuantize(&orig_pixels[0][0], 0, - (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0, 256, 1); + (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0, 1280, 1); } } -TEST_F(libyuvTest, TestARGBMirror) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); - SIMD_ALIGNED(uint8 dst_pixels[256][4]); +TEST_F(LibYUVPlanarTest, TestARGBMirror) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8 dst_pixels[1280][4]); - for (int i = 0; i < 256; ++i) { + for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; orig_pixels[i][1] = i / 2; orig_pixels[i][2] = i / 3; orig_pixels[i][3] = i / 4; } - ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 256, 1); + ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1); - for (int i = 0; i < 256; ++i) { - EXPECT_EQ(i, dst_pixels[255 - i][0]); - EXPECT_EQ(i / 2, dst_pixels[255 - i][1]); - EXPECT_EQ(i / 3, dst_pixels[255 - i][2]); - EXPECT_EQ(i / 4, dst_pixels[255 - i][3]); + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(i & 255, dst_pixels[1280 - 1 - i][0]); + EXPECT_EQ((i / 2) & 255, dst_pixels[1280 - 1 - i][1]); + EXPECT_EQ((i / 3) & 255, dst_pixels[1280 - 1 - i][2]); + EXPECT_EQ((i / 4) & 255, dst_pixels[1280 - 1 - i][3]); } - for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { - ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 256, 1); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1); } } -TEST_F(libyuvTest, TestShade) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); - SIMD_ALIGNED(uint8 shade_pixels[256][4]); +TEST_F(LibYUVPlanarTest, TestShade) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8 shade_pixels[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); orig_pixels[0][0] = 10u; orig_pixels[0][1] = 20u; @@ -834,7 +821,8 @@ TEST_F(libyuvTest, TestShade) { orig_pixels[3][1] = 0u; orig_pixels[3][2] = 0u; orig_pixels[3][3] = 0u; - ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 4, 1, 0x80ffffff); + // Do 8 pixels to allow opt version to be used. + ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff); EXPECT_EQ(10u, shade_pixels[0][0]); EXPECT_EQ(20u, shade_pixels[0][1]); EXPECT_EQ(40u, shade_pixels[0][2]); @@ -852,22 +840,30 @@ TEST_F(libyuvTest, TestShade) { EXPECT_EQ(0u, shade_pixels[3][2]); EXPECT_EQ(0u, shade_pixels[3][3]); - ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 4, 1, 0x80808080); + ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080); EXPECT_EQ(5u, shade_pixels[0][0]); EXPECT_EQ(10u, shade_pixels[0][1]); EXPECT_EQ(20u, shade_pixels[0][2]); EXPECT_EQ(40u, shade_pixels[0][3]); - for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { - ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 256, 1, + ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080); + EXPECT_EQ(5u, shade_pixels[0][0]); + EXPECT_EQ(5u, shade_pixels[0][1]); + EXPECT_EQ(5u, shade_pixels[0][2]); + EXPECT_EQ(5u, shade_pixels[0][3]); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1, 0x80808080); } } -TEST_F(libyuvTest, TestInterpolate) { - SIMD_ALIGNED(uint8 orig_pixels_0[256][4]); - SIMD_ALIGNED(uint8 orig_pixels_1[256][4]); - SIMD_ALIGNED(uint8 interpolate_pixels[256][4]); +TEST_F(LibYUVPlanarTest, TestARGBInterpolate) { + SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); + SIMD_ALIGNED(uint8 orig_pixels_1[1280][4]); + SIMD_ALIGNED(uint8 interpolate_pixels[1280][4]); + memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); + memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); orig_pixels_0[0][0] = 16u; orig_pixels_0[0][1] = 32u; @@ -912,15 +908,15 @@ TEST_F(libyuvTest, TestInterpolate) { EXPECT_EQ(0u, interpolate_pixels[1][0]); EXPECT_EQ(0u, interpolate_pixels[1][1]); EXPECT_EQ(0u, interpolate_pixels[1][2]); - EXPECT_NEAR(128u, interpolate_pixels[1][3], 1); // C = 127, SSE = 128. + EXPECT_EQ(128u, interpolate_pixels[1][3]); EXPECT_EQ(0u, interpolate_pixels[2][0]); EXPECT_EQ(0u, interpolate_pixels[2][1]); EXPECT_EQ(0u, interpolate_pixels[2][2]); EXPECT_EQ(0u, interpolate_pixels[2][3]); - EXPECT_NEAR(128u, interpolate_pixels[3][0], 1); - EXPECT_NEAR(128u, interpolate_pixels[3][1], 1); - EXPECT_NEAR(128u, interpolate_pixels[3][2], 1); - EXPECT_NEAR(128u, interpolate_pixels[3][3], 1); + EXPECT_EQ(128u, interpolate_pixels[3][0]); + EXPECT_EQ(128u, interpolate_pixels[3][1]); + EXPECT_EQ(128u, interpolate_pixels[3][2]); + EXPECT_EQ(128u, interpolate_pixels[3][3]); ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, &interpolate_pixels[0][0], 0, 4, 1, 0); @@ -937,20 +933,418 @@ TEST_F(libyuvTest, TestInterpolate) { EXPECT_EQ(16u, interpolate_pixels[0][2]); EXPECT_EQ(32u, interpolate_pixels[0][3]); - for (int i = 0; i < benchmark_iterations_ * (1280 * 720 / 256); ++i) { + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, - &interpolate_pixels[0][0], 0, 256, 1, 128); + &interpolate_pixels[0][0], 0, 1280, 1, 128); } } -TEST_F(libyuvTest, TestAffine) { - SIMD_ALIGNED(uint8 orig_pixels_0[256][4]); - SIMD_ALIGNED(uint8 interpolate_pixels_C[256][4]); -#if defined(HAS_ARGBAFFINEROW_SSE2) - SIMD_ALIGNED(uint8 interpolate_pixels_Opt[256][4]); -#endif +TEST_F(LibYUVPlanarTest, TestInterpolatePlane) { + SIMD_ALIGNED(uint8 orig_pixels_0[1280]); + SIMD_ALIGNED(uint8 orig_pixels_1[1280]); + SIMD_ALIGNED(uint8 interpolate_pixels[1280]); + memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); + memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); + + orig_pixels_0[0] = 16u; + orig_pixels_0[1] = 32u; + orig_pixels_0[2] = 64u; + orig_pixels_0[3] = 128u; + orig_pixels_0[4] = 0u; + orig_pixels_0[5] = 0u; + orig_pixels_0[6] = 0u; + orig_pixels_0[7] = 255u; + orig_pixels_0[8] = 0u; + orig_pixels_0[9] = 0u; + orig_pixels_0[10] = 0u; + orig_pixels_0[11] = 0u; + orig_pixels_0[12] = 0u; + orig_pixels_0[13] = 0u; + orig_pixels_0[14] = 0u; + orig_pixels_0[15] = 0u; + + orig_pixels_1[0] = 0u; + orig_pixels_1[1] = 0u; + orig_pixels_1[2] = 0u; + orig_pixels_1[3] = 0u; + orig_pixels_1[4] = 0u; + orig_pixels_1[5] = 0u; + orig_pixels_1[6] = 0u; + orig_pixels_1[7] = 0u; + orig_pixels_1[8] = 0u; + orig_pixels_1[9] = 0u; + orig_pixels_1[10] = 0u; + orig_pixels_1[11] = 0u; + orig_pixels_1[12] = 255u; + orig_pixels_1[13] = 255u; + orig_pixels_1[14] = 255u; + orig_pixels_1[15] = 255u; + + InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 128); + EXPECT_EQ(8u, interpolate_pixels[0]); + EXPECT_EQ(16u, interpolate_pixels[1]); + EXPECT_EQ(32u, interpolate_pixels[2]); + EXPECT_EQ(64u, interpolate_pixels[3]); + EXPECT_EQ(0u, interpolate_pixels[4]); + EXPECT_EQ(0u, interpolate_pixels[5]); + EXPECT_EQ(0u, interpolate_pixels[6]); + EXPECT_EQ(128u, interpolate_pixels[7]); + EXPECT_EQ(0u, interpolate_pixels[8]); + EXPECT_EQ(0u, interpolate_pixels[9]); + EXPECT_EQ(0u, interpolate_pixels[10]); + EXPECT_EQ(0u, interpolate_pixels[11]); + EXPECT_EQ(128u, interpolate_pixels[12]); + EXPECT_EQ(128u, interpolate_pixels[13]); + EXPECT_EQ(128u, interpolate_pixels[14]); + EXPECT_EQ(128u, interpolate_pixels[15]); + + InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 0); + EXPECT_EQ(16u, interpolate_pixels[0]); + EXPECT_EQ(32u, interpolate_pixels[1]); + EXPECT_EQ(64u, interpolate_pixels[2]); + EXPECT_EQ(128u, interpolate_pixels[3]); + + InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 192); + + EXPECT_EQ(4u, interpolate_pixels[0]); + EXPECT_EQ(8u, interpolate_pixels[1]); + EXPECT_EQ(16u, interpolate_pixels[2]); + EXPECT_EQ(32u, interpolate_pixels[3]); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 1280, 1, 123); + } +} + +#define TESTTERP(FMT_A, BPP_A, STRIDE_A, \ + FMT_B, BPP_B, STRIDE_B, \ + W1280, TERP, N, NEG, OFF) \ +TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb_a, kStrideA * kHeight + OFF); \ + align_buffer_page_end(src_argb_b, kStrideA * kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB * kHeight); \ + align_buffer_page_end(dst_argb_opt, kStrideB * kHeight); \ + for (int i = 0; i < kStrideA * kHeight; ++i) { \ + src_argb_a[i + OFF] = (fastrand() & 0xff); \ + src_argb_b[i + OFF] = (fastrand() & 0xff); \ + } \ + MaskCpuFlags(disable_cpu_flags_); \ + ARGBInterpolate(src_argb_a + OFF, kStrideA, \ + src_argb_b + OFF, kStrideA, \ + dst_argb_c, kStrideB, \ + kWidth, NEG kHeight, TERP); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + ARGBInterpolate(src_argb_a + OFF, kStrideA, \ + src_argb_b + OFF, kStrideA, \ + dst_argb_opt, kStrideB, \ + kWidth, NEG kHeight, TERP); \ + } \ + for (int i = 0; i < kStrideB * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb_a); \ + free_aligned_buffer_page_end(src_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ +} + +#define TESTINTERPOLATE(TERP) \ + TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ - 1, TERP, _Any, +, 0) \ + TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \ + TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \ + TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0) + +TESTINTERPOLATE(0) +TESTINTERPOLATE(64) +TESTINTERPOLATE(128) +TESTINTERPOLATE(192) +TESTINTERPOLATE(255) + +static int TestBlend(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(src_argb_b, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + src_argb_b[i + off] = (fastrand() & 0xff); + } + ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width, + height); + ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width, + height); + memset(dst_argb_c, 255, kStride * height); + memset(dst_argb_opt, 255, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBBlend(src_argb_a + off, kStride, + src_argb_b + off, kStride, + dst_argb_c, kStride, + width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBBlend(src_argb_a + off, kStride, + src_argb_b + off, kStride, + dst_argb_opt, kStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast<int>(dst_argb_c[i]) - + static_cast<int>(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(src_argb_b); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBBlend_Any) { + int max_diff = TestBlend(benchmark_width_ - 4, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} - for (int i = 0; i < 256; ++i) { +TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) { + int max_diff = TestBlend(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) { + int max_diff = TestBlend(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { + int max_diff = TestBlend(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +static void TestBlendPlane(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 1; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(src_argb_b, kStride * height + off); + align_buffer_page_end(src_argb_alpha, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height + off); + align_buffer_page_end(dst_argb_opt, kStride * height + off); + memset(dst_argb_c, 255, kStride * height + off); + memset(dst_argb_opt, 255, kStride * height + off); + + // Test source is maintained exactly if alpha is 255. + for (int i = 0; i < width; ++i) { + src_argb_a[i + off] = i & 255; + src_argb_b[i + off] = 255 - (i & 255); + } + memset(src_argb_alpha + off, 255, width); + BlendPlane(src_argb_a + off, width, + src_argb_b + off, width, + src_argb_alpha + off, width, + dst_argb_opt + off, width, + width, 1); + for (int i = 0; i < width; ++i) { + EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]); + } + // Test destination is maintained exactly if alpha is 0. + memset(src_argb_alpha + off, 0, width); + BlendPlane(src_argb_a + off, width, + src_argb_b + off, width, + src_argb_alpha + off, width, + dst_argb_opt + off, width, + width, 1); + for (int i = 0; i < width; ++i) { + EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]); + } + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + src_argb_b[i + off] = (fastrand() & 0xff); + src_argb_alpha[i + off] = (fastrand() & 0xff); + } + + MaskCpuFlags(disable_cpu_flags); + BlendPlane(src_argb_a + off, width, + src_argb_b + off, width, + src_argb_alpha + off, width, + dst_argb_c + off, width, + width, height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + BlendPlane(src_argb_a + off, width, + src_argb_b + off, width, + src_argb_alpha + off, width, + dst_argb_opt + off, width, + width, height); + } + for (int i = 0; i < kStride * height; ++i) { + EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]); + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(src_argb_b); + free_aligned_buffer_page_end(src_argb_alpha); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return; +} + +TEST_F(LibYUVPlanarTest, BlendPlane_Opt) { + TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); +} +TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) { + TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); +} +TEST_F(LibYUVPlanarTest, BlendPlane_Any) { + TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); +} +TEST_F(LibYUVPlanarTest, BlendPlane_Invert) { + TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 1); +} + +#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) + +static void TestI420Blend(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off) { + width = ((width) > 0) ? (width) : 1; + const int kStrideUV = SUBSAMPLE(width, 2); + const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2); + align_buffer_page_end(src_y0, width * height + off); + align_buffer_page_end(src_u0, kSizeUV + off); + align_buffer_page_end(src_v0, kSizeUV + off); + align_buffer_page_end(src_y1, width * height + off); + align_buffer_page_end(src_u1, kSizeUV + off); + align_buffer_page_end(src_v1, kSizeUV + off); + align_buffer_page_end(src_a, width * height + off); + align_buffer_page_end(dst_y_c, width * height + off); + align_buffer_page_end(dst_u_c, kSizeUV + off); + align_buffer_page_end(dst_v_c, kSizeUV + off); + align_buffer_page_end(dst_y_opt, width * height + off); + align_buffer_page_end(dst_u_opt, kSizeUV + off); + align_buffer_page_end(dst_v_opt, kSizeUV + off); + + MemRandomize(src_y0, width * height + off); + MemRandomize(src_u0, kSizeUV + off); + MemRandomize(src_v0, kSizeUV + off); + MemRandomize(src_y1, width * height + off); + MemRandomize(src_u1, kSizeUV + off); + MemRandomize(src_v1, kSizeUV + off); + MemRandomize(src_a, width * height + off); + memset(dst_y_c, 255, width * height + off); + memset(dst_u_c, 255, kSizeUV + off); + memset(dst_v_c, 255, kSizeUV + off); + memset(dst_y_opt, 255, width * height + off); + memset(dst_u_opt, 255, kSizeUV + off); + memset(dst_v_opt, 255, kSizeUV + off); + + MaskCpuFlags(disable_cpu_flags); + I420Blend(src_y0 + off, width, + src_u0 + off, kStrideUV, + src_v0 + off, kStrideUV, + src_y1 + off, width, + src_u1 + off, kStrideUV, + src_v1 + off, kStrideUV, + src_a + off, width, + dst_y_c + off, width, + dst_u_c + off, kStrideUV, + dst_v_c + off, kStrideUV, + width, height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + I420Blend(src_y0 + off, width, + src_u0 + off, kStrideUV, + src_v0 + off, kStrideUV, + src_y1 + off, width, + src_u1 + off, kStrideUV, + src_v1 + off, kStrideUV, + src_a + off, width, + dst_y_opt + off, width, + dst_u_opt + off, kStrideUV, + dst_v_opt + off, kStrideUV, + width, height); + } + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]); + } + for (int i = 0; i < kSizeUV; ++i) { + EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]); + EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]); + } + free_aligned_buffer_page_end(src_y0); + free_aligned_buffer_page_end(src_u0); + free_aligned_buffer_page_end(src_v0); + free_aligned_buffer_page_end(src_y1); + free_aligned_buffer_page_end(src_u1); + free_aligned_buffer_page_end(src_v1); + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(dst_y_c); + free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_y_opt); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_opt); + return; +} + +TEST_F(LibYUVPlanarTest, I420Blend_Opt) { + TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); +} +TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) { + TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); +} + +// TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable. +TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) { + TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); +} +TEST_F(LibYUVPlanarTest, I420Blend_Invert) { + TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); +} + +TEST_F(LibYUVPlanarTest, TestAffine) { + SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); + SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]); + + for (int i = 0; i < 1280; ++i) { for (int j = 0; j < 4; ++j) { orig_pixels_0[i][j] = i; } @@ -959,47 +1353,1009 @@ TEST_F(libyuvTest, TestAffine) { float uv_step[4] = { 0.f, 0.f, 0.75f, 0.f }; ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], - uv_step, 256); + uv_step, 1280); EXPECT_EQ(0u, interpolate_pixels_C[0][0]); EXPECT_EQ(96u, interpolate_pixels_C[128][0]); EXPECT_EQ(191u, interpolate_pixels_C[255][3]); #if defined(HAS_ARGBAFFINEROW_SSE2) + SIMD_ALIGNED(uint8 interpolate_pixels_Opt[1280][4]); ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0], - uv_step, 256); - EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 256 * 4)); -#endif + uv_step, 1280); + EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4)); -#if defined(HAS_ARGBAFFINEROW_SSE2) int has_sse2 = TestCpuFlag(kCpuHasSSE2); if (has_sse2) { - for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0], - uv_step, 256); + uv_step, 1280); } - } else { + } #endif - for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { - ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], - uv_step, 256); +} + +TEST_F(LibYUVPlanarTest, TestCopyPlane) { + int err = 0; + int yw = benchmark_width_; + int yh = benchmark_height_; + int b = 12; + int i, j; + + int y_plane_size = (yw + b * 2) * (yh + b * 2); + align_buffer_page_end(orig_y, y_plane_size); + align_buffer_page_end(dst_c, y_plane_size); + align_buffer_page_end(dst_opt, y_plane_size); + + memset(orig_y, 0, y_plane_size); + memset(dst_c, 0, y_plane_size); + memset(dst_opt, 0, y_plane_size); + + // Fill image buffers with random data. + for (i = b; i < (yh + b); ++i) { + for (j = b; j < (yw + b); ++j) { + orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff; } -#if defined(HAS_ARGBAFFINEROW_SSE2) } -#endif + + // Fill destination buffers with random data. + for (i = 0; i < y_plane_size; ++i) { + uint8 random_number = fastrand() & 0x7f; + dst_c[i] = random_number; + dst_opt[i] = dst_c[i]; + } + + int y_off = b * (yw + b * 2) + b; + + int y_st = yw + b * 2; + int stride = 8; + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + double c_time = get_time(); + for (j = 0; j < benchmark_iterations_; j++) { + CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh); + } + c_time = (get_time() - c_time) / benchmark_iterations_; + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + double opt_time = get_time(); + for (j = 0; j < benchmark_iterations_; j++) { + CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh); + } + opt_time = (get_time() - opt_time) / benchmark_iterations_; + + for (i = 0; i < y_plane_size; ++i) { + if (dst_c[i] != dst_opt[i]) + ++err; + } + + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(dst_c); + free_aligned_buffer_page_end(dst_opt); + + EXPECT_EQ(0, err); } -TEST_F(libyuvTest, Test565) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); - SIMD_ALIGNED(uint8 pixels565[256][2]); +static int TestMultiply(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(src_argb_b, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + src_argb_b[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBMultiply(src_argb_a + off, kStride, + src_argb_b + off, kStride, + dst_argb_c, kStride, + width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBMultiply(src_argb_a + off, kStride, + src_argb_b + off, kStride, + dst_argb_opt, kStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast<int>(dst_argb_c[i]) - + static_cast<int>(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(src_argb_b); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} - for (int i = 0; i < 256; ++i) { - for (int j = 0; j < 4; ++j) { - orig_pixels[i][j] = i; +TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) { + int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) { + int max_diff = TestMultiply(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) { + int max_diff = TestMultiply(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) { + int max_diff = TestMultiply(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +static int TestAdd(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(src_argb_b, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + src_argb_b[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBAdd(src_argb_a + off, kStride, + src_argb_b + off, kStride, + dst_argb_c, kStride, + width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBAdd(src_argb_a + off, kStride, + src_argb_b + off, kStride, + dst_argb_opt, kStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast<int>(dst_argb_c[i]) - + static_cast<int>(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(src_argb_b); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBAdd_Any) { + int max_diff = TestAdd(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) { + int max_diff = TestAdd(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) { + int max_diff = TestAdd(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) { + int max_diff = TestAdd(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +static int TestSubtract(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(src_argb_b, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + src_argb_b[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBSubtract(src_argb_a + off, kStride, + src_argb_b + off, kStride, + dst_argb_c, kStride, + width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBSubtract(src_argb_a + off, kStride, + src_argb_b + off, kStride, + dst_argb_opt, kStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast<int>(dst_argb_c[i]) - + static_cast<int>(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(src_argb_b); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) { + int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) { + int max_diff = TestSubtract(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) { + int max_diff = TestSubtract(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) { + int max_diff = TestSubtract(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 1); +} + +static int TestSobel(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + memset(src_argb_a, 0, kStride * height + off); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBSobel(src_argb_a + off, kStride, + dst_argb_c, kStride, + width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBSobel(src_argb_a + off, kStride, + dst_argb_opt, kStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast<int>(dst_argb_c[i]) - + static_cast<int>(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBSobel_Any) { + int max_diff = TestSobel(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) { + int max_diff = TestSobel(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) { + int max_diff = TestSobel(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) { + int max_diff = TestSobel(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_EQ(0, max_diff); +} + +static int TestSobelToPlane(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off) { + if (width < 1) { + width = 1; + } + const int kSrcBpp = 4; + const int kDstBpp = 1; + const int kSrcStride = (width * kSrcBpp + 15) & ~15; + const int kDstStride = (width * kDstBpp + 15) & ~15; + align_buffer_page_end(src_argb_a, kSrcStride * height + off); + align_buffer_page_end(dst_argb_c, kDstStride * height); + align_buffer_page_end(dst_argb_opt, kDstStride * height); + memset(src_argb_a, 0, kSrcStride * height + off); + for (int i = 0; i < kSrcStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kDstStride * height); + memset(dst_argb_opt, 0, kDstStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBSobelToPlane(src_argb_a + off, kSrcStride, + dst_argb_c, kDstStride, + width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBSobelToPlane(src_argb_a + off, kSrcStride, + dst_argb_opt, kDstStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kDstStride * height; ++i) { + int abs_diff = + abs(static_cast<int>(dst_argb_c[i]) - + static_cast<int>(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) { + int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) { + int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) { + int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + -1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) { + int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0); + EXPECT_EQ(0, max_diff); +} + +static int TestSobelXY(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + memset(src_argb_a, 0, kStride * height + off); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBSobelXY(src_argb_a + off, kStride, + dst_argb_c, kStride, + width, invert * height); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBSobelXY(src_argb_a + off, kStride, + dst_argb_opt, kStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast<int>(dst_argb_c[i]) - + static_cast<int>(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) { + int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) { + int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) { + int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) { + int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_EQ(0, max_diff); +} + +static int TestBlur(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off, int radius) { + if (width < 1) { + width = 1; + } + const int kBpp = 4; + const int kStride = width * kBpp; + align_buffer_page_end(src_argb_a, kStride * height + off); + align_buffer_page_end(dst_cumsum, width * height * 16); + align_buffer_page_end(dst_argb_c, kStride * height); + align_buffer_page_end(dst_argb_opt, kStride * height); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (fastrand() & 0xff); + } + memset(dst_cumsum, 0, width * height * 16); + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(disable_cpu_flags); + ARGBBlur(src_argb_a + off, kStride, + dst_argb_c, kStride, + reinterpret_cast<int32*>(dst_cumsum), width * 4, + width, invert * height, radius); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBBlur(src_argb_a + off, kStride, + dst_argb_opt, kStride, + reinterpret_cast<int32*>(dst_cumsum), width * 4, + width, invert * height, radius); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast<int>(dst_argb_c[i]) - + static_cast<int>(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; } } - ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1); - uint32 checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); - EXPECT_EQ(610919429u, checksum); + free_aligned_buffer_page_end(src_argb_a); + free_aligned_buffer_page_end(dst_cumsum); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +static const int kBlurSize = 55; +TEST_F(LibYUVPlanarTest, ARGBBlur_Any) { + int max_diff = TestBlur(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0, kBlurSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) { + int max_diff = TestBlur(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 1, kBlurSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) { + int max_diff = TestBlur(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + -1, 0, kBlurSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) { + int max_diff = TestBlur(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0, kBlurSize); + EXPECT_LE(max_diff, 1); +} + +static const int kBlurSmallSize = 5; +TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) { + int max_diff = TestBlur(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0, kBlurSmallSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) { + int max_diff = TestBlur(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 1, kBlurSmallSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) { + int max_diff = TestBlur(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + -1, 0, kBlurSmallSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) { + int max_diff = TestBlur(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0, kBlurSmallSize); + EXPECT_LE(max_diff, 1); +} + +TEST_F(LibYUVPlanarTest, TestARGBPolynomial) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + + SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = { + 0.94230f, -3.03300f, -2.92500f, 0.f, // C0 + 0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x + 0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x + 0.0f, 0.000006965f, 0.000008781f, 0.f, // C3 x * x * x + }; + + // Test blue + orig_pixels[0][0] = 255u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 128u; + // Test green + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 255u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 0u; + // Test red + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 255u; + orig_pixels[2][3] = 255u; + // Test white + orig_pixels[3][0] = 255u; + orig_pixels[3][1] = 255u; + orig_pixels[3][2] = 255u; + orig_pixels[3][3] = 255u; + // Test color + orig_pixels[4][0] = 16u; + orig_pixels[4][1] = 64u; + orig_pixels[4][2] = 192u; + orig_pixels[4][3] = 224u; + // Do 16 to test asm version. + ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + &kWarmifyPolynomial[0], 16, 1); + EXPECT_EQ(235u, dst_pixels_opt[0][0]); + EXPECT_EQ(0u, dst_pixels_opt[0][1]); + EXPECT_EQ(0u, dst_pixels_opt[0][2]); + EXPECT_EQ(128u, dst_pixels_opt[0][3]); + EXPECT_EQ(0u, dst_pixels_opt[1][0]); + EXPECT_EQ(233u, dst_pixels_opt[1][1]); + EXPECT_EQ(0u, dst_pixels_opt[1][2]); + EXPECT_EQ(0u, dst_pixels_opt[1][3]); + EXPECT_EQ(0u, dst_pixels_opt[2][0]); + EXPECT_EQ(0u, dst_pixels_opt[2][1]); + EXPECT_EQ(241u, dst_pixels_opt[2][2]); + EXPECT_EQ(255u, dst_pixels_opt[2][3]); + EXPECT_EQ(235u, dst_pixels_opt[3][0]); + EXPECT_EQ(233u, dst_pixels_opt[3][1]); + EXPECT_EQ(241u, dst_pixels_opt[3][2]); + EXPECT_EQ(255u, dst_pixels_opt[3][3]); + EXPECT_EQ(10u, dst_pixels_opt[4][0]); + EXPECT_EQ(59u, dst_pixels_opt[4][1]); + EXPECT_EQ(188u, dst_pixels_opt[4][2]); + EXPECT_EQ(224u, dst_pixels_opt[4][3]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + + MaskCpuFlags(disable_cpu_flags_); + ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, + &kWarmifyPolynomial[0], 1280, 1); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + &kWarmifyPolynomial[0], 1280, 1); + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); + EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); + EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); + EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); + } +} + +TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { + SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + + align_buffer_page_end(lumacolortable, 32768); + int v = 0; + for (int i = 0; i < 32768; ++i) { + lumacolortable[i] = v; + v += 3; + } + // Test blue + orig_pixels[0][0] = 255u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 128u; + // Test green + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 255u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 0u; + // Test red + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 255u; + orig_pixels[2][3] = 255u; + // Test color + orig_pixels[3][0] = 16u; + orig_pixels[3][1] = 64u; + orig_pixels[3][2] = 192u; + orig_pixels[3][3] = 224u; + // Do 16 to test asm version. + ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + &lumacolortable[0], 16, 1); + EXPECT_EQ(253u, dst_pixels_opt[0][0]); + EXPECT_EQ(0u, dst_pixels_opt[0][1]); + EXPECT_EQ(0u, dst_pixels_opt[0][2]); + EXPECT_EQ(128u, dst_pixels_opt[0][3]); + EXPECT_EQ(0u, dst_pixels_opt[1][0]); + EXPECT_EQ(253u, dst_pixels_opt[1][1]); + EXPECT_EQ(0u, dst_pixels_opt[1][2]); + EXPECT_EQ(0u, dst_pixels_opt[1][3]); + EXPECT_EQ(0u, dst_pixels_opt[2][0]); + EXPECT_EQ(0u, dst_pixels_opt[2][1]); + EXPECT_EQ(253u, dst_pixels_opt[2][2]); + EXPECT_EQ(255u, dst_pixels_opt[2][3]); + EXPECT_EQ(48u, dst_pixels_opt[3][0]); + EXPECT_EQ(192u, dst_pixels_opt[3][1]); + EXPECT_EQ(64u, dst_pixels_opt[3][2]); + EXPECT_EQ(224u, dst_pixels_opt[3][3]); + + for (int i = 0; i < 1280; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + + MaskCpuFlags(disable_cpu_flags_); + ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, + lumacolortable, 1280, 1); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, + lumacolortable, 1280, 1); + } + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); + EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); + EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); + EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); + } + + free_aligned_buffer_page_end(lumacolortable); +} + +TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) { + const int kSize = benchmark_width_ * benchmark_height_ * 4; + align_buffer_page_end(orig_pixels, kSize); + align_buffer_page_end(dst_pixels_opt, kSize); + align_buffer_page_end(dst_pixels_c, kSize); + + MemRandomize(orig_pixels, kSize); + MemRandomize(dst_pixels_opt, kSize); + memcpy(dst_pixels_c, dst_pixels_opt, kSize); + + MaskCpuFlags(disable_cpu_flags_); + ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, + dst_pixels_c, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, + dst_pixels_opt, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + } + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(dst_pixels_c); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 4); + align_buffer_page_end(dst_pixels_opt, kPixels); + align_buffer_page_end(dst_pixels_c, kPixels); + + MemRandomize(src_pixels, kPixels * 4); + MemRandomize(dst_pixels_opt, kPixels); + memcpy(dst_pixels_c, dst_pixels_opt, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, + dst_pixels_c, benchmark_width_, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, + dst_pixels_opt, benchmark_width_, + benchmark_width_, benchmark_height_); + } + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(dst_pixels_c); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(src_pixels); +} + +TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 4); + align_buffer_page_end(dst_pixels_c, kPixels * 4); + + MemRandomize(orig_pixels, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 4); + memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4); + + MaskCpuFlags(disable_cpu_flags_); + ARGBCopyYToAlpha(orig_pixels, benchmark_width_, + dst_pixels_c, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + ARGBCopyYToAlpha(orig_pixels, benchmark_width_, + dst_pixels_opt, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + } + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(dst_pixels_c); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(orig_pixels); +} + +static int TestARGBRect(int width, int height, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info, + int invert, int off, int bpp) { + if (width < 1) { + width = 1; + } + const int kStride = width * bpp; + const int kSize = kStride * height; + const uint32 v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff); + + align_buffer_page_end(dst_argb_c, kSize + off); + align_buffer_page_end(dst_argb_opt, kSize + off); + + MemRandomize(dst_argb_c + off, kSize); + memcpy(dst_argb_opt + off, dst_argb_c + off, kSize); + + MaskCpuFlags(disable_cpu_flags); + if (bpp == 4) { + ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32); + } else { + SetPlane(dst_argb_c + off, kStride, width, invert * height, v32); + } + + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; ++i) { + if (bpp == 4) { + ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32); + } else { + SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32); + } + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast<int>(dst_argb_c[i + off]) - + static_cast<int>(dst_argb_opt[i + off])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, ARGBRect_Any) { + int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0, 4); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) { + int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 1, 4); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBRect_Invert) { + int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + -1, 0, 4); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, ARGBRect_Opt) { + int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0, 4); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, SetPlane_Any) { + int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) { + int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 1, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, SetPlane_Invert) { + int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + -1, 0, 1); + EXPECT_EQ(0, max_diff); +} + +TEST_F(LibYUVPlanarTest, SetPlane_Opt) { + int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, + +1, 0, 1); + EXPECT_EQ(0, max_diff); } } // namespace libyuv diff --git a/files/unit_test/rotate_argb_test.cc b/files/unit_test/rotate_argb_test.cc index fe8435e1..9c83c356 100644 --- a/files/unit_test/rotate_argb_test.cc +++ b/files/unit_test/rotate_argb_test.cc @@ -4,12 +4,11 @@ * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include <stdlib.h> -#include <time.h> #include "libyuv/cpu_id.h" #include "libyuv/rotate_argb.h" @@ -17,179 +16,181 @@ namespace libyuv { -static int ARGBTestRotate(int src_width, int src_height, - int dst_width, int dst_height, - libyuv::RotationMode mode, int runs) { - const int b = 128; - int src_argb_plane_size = (src_width + b * 2) * (src_height + b * 2) * 4; - int src_stride_argb = (b * 2 + src_width) * 4; - - align_buffer_16(src_argb, src_argb_plane_size) - memset(src_argb, 1, src_argb_plane_size); - - int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; - int dst_stride_argb = (b * 2 + dst_width) * 4; - - srandom(time(NULL)); - - int i, j; - for (i = b; i < (src_height + b); ++i) { - for (j = b; j < (src_width + b) * 4; ++j) { - src_argb[(i * src_stride_argb) + j] = (random() & 0xff); - } +void TestRotateBpp(int src_width, int src_height, + int dst_width, int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + const int kBpp) { + if (src_width < 1) { + src_width = 1; + } + if (src_height < 1) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_stride_argb = src_width * kBpp; + int src_argb_plane_size = src_stride_argb * abs(src_height); + align_buffer_page_end(src_argb, src_argb_plane_size); + for (int i = 0; i < src_argb_plane_size; ++i) { + src_argb[i] = fastrand() & 0xff; } - align_buffer_16(dst_argb_c, dst_argb_plane_size) - align_buffer_16(dst_argb_opt, dst_argb_plane_size) + int dst_stride_argb = dst_width * kBpp; + int dst_argb_plane_size = dst_stride_argb * dst_height; + align_buffer_page_end(dst_argb_c, dst_argb_plane_size); + align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); memset(dst_argb_c, 2, dst_argb_plane_size); memset(dst_argb_opt, 3, dst_argb_plane_size); - // Warm up both versions for consistent benchmarks. - MaskCpuFlags(0); // Disable all CPU optimization. - ARGBRotate(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, - dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, - src_width, src_height, mode); - MaskCpuFlags(-1); // Enable all CPU optimization. - ARGBRotate(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, - dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, - src_width, src_height, mode); - - MaskCpuFlags(0); // Disable all CPU optimization. - double c_time = get_time(); - for (i = 0; i < runs; ++i) { - ARGBRotate(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, - dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, + if (kBpp == 1) { + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + RotatePlane(src_argb, src_stride_argb, + dst_argb_c, dst_stride_argb, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + RotatePlane(src_argb, src_stride_argb, + dst_argb_opt, dst_stride_argb, + src_width, src_height, mode); + } + } else if (kBpp == 4) { + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + ARGBRotate(src_argb, src_stride_argb, + dst_argb_c, dst_stride_argb, src_width, src_height, mode); - } - c_time = (get_time() - c_time) / runs; - MaskCpuFlags(-1); // Enable all CPU optimization. - double opt_time = get_time(); - for (i = 0; i < runs; ++i) { - ARGBRotate(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, - dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, - src_width, src_height, mode); - } - opt_time = (get_time() - opt_time) / runs; - - // Report performance of C vs OPT - printf("filter %d - %8d us C - %8d us OPT\n", - mode, static_cast<int>(c_time*1e6), static_cast<int>(opt_time*1e6)); - - // C version may be a little off from the optimized. Order of - // operations may introduce rounding somewhere. So do a difference - // of the buffers and look to see that the max difference isn't - // over 2. - int max_diff = 0; - for (i = b; i < (dst_height + b); ++i) { - for (j = b * 4; j < (dst_width + b) * 4; ++j) { - int abs_diff = abs(dst_argb_c[(i * dst_stride_argb) + j] - - dst_argb_opt[(i * dst_stride_argb) + j]); - if (abs_diff > max_diff) - max_diff = abs_diff; + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBRotate(src_argb, src_stride_argb, + dst_argb_opt, dst_stride_argb, + src_width, src_height, mode); } } - free_aligned_buffer_16(dst_argb_c) - free_aligned_buffer_16(dst_argb_opt) - free_aligned_buffer_16(src_argb) - return max_diff; -} - -TEST_F(libyuvTest, ARGBRotate0) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = 1280; - const int dst_height = 720; + // Rotation should be exact. + for (int i = 0; i < dst_argb_plane_size; ++i) { + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); + } - int err = ARGBTestRotate(src_width, src_height, - dst_width, dst_height, kRotate0, - benchmark_iterations_); - EXPECT_GE(1, err); + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + free_aligned_buffer_page_end(src_argb); } -TEST_F(libyuvTest, ARGBRotate90) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = 720; - const int dst_height = 1280; +static void ARGBTestRotate(int src_width, int src_height, + int dst_width, int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + TestRotateBpp(src_width, src_height, + dst_width, dst_height, + mode, benchmark_iterations, + disable_cpu_flags, benchmark_cpu_info, 4); +} - int err = ARGBTestRotate(src_width, src_height, - dst_width, dst_height, kRotate90, - benchmark_iterations_); - EXPECT_GE(1, err); +TEST_F(LibYUVRotateTest, ARGBRotate0_Opt) { + ARGBTestRotate(benchmark_width_, benchmark_height_, + benchmark_width_, benchmark_height_, + kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, ARGBRotate180) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = 1280; - const int dst_height = 720; +TEST_F(LibYUVRotateTest, ARGBRotate90_Opt) { + ARGBTestRotate(benchmark_width_, benchmark_height_, + benchmark_height_, benchmark_width_, + kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} - int err = ARGBTestRotate(src_width, src_height, - dst_width, dst_height, kRotate180, - benchmark_iterations_); - EXPECT_GE(1, err); +TEST_F(LibYUVRotateTest, ARGBRotate180_Opt) { + ARGBTestRotate(benchmark_width_, benchmark_height_, + benchmark_width_, benchmark_height_, + kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, ARGBRotate270) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = 720; - const int dst_height = 1280; +TEST_F(LibYUVRotateTest, ARGBRotate270_Opt) { + ARGBTestRotate(benchmark_width_, benchmark_height_, + benchmark_height_, benchmark_width_, + kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} - int err = ARGBTestRotate(src_width, src_height, - dst_width, dst_height, kRotate270, - benchmark_iterations_); - EXPECT_GE(1, err); +static void TestRotatePlane(int src_width, int src_height, + int dst_width, int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + TestRotateBpp(src_width, src_height, + dst_width, dst_height, + mode, benchmark_iterations, + disable_cpu_flags, benchmark_cpu_info, 1); } -TEST_F(libyuvTest, ARGBRotate0_Odd) { - const int src_width = 1277; - const int src_height = 719; - const int dst_width = 1277; - const int dst_height = 719; +TEST_F(LibYUVRotateTest, RotatePlane0_Opt) { + TestRotatePlane(benchmark_width_, benchmark_height_, + benchmark_width_, benchmark_height_, + kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} - int err = ARGBTestRotate(src_width, src_height, - dst_width, dst_height, kRotate0, - benchmark_iterations_); - EXPECT_GE(1, err); +TEST_F(LibYUVRotateTest, RotatePlane90_Opt) { + TestRotatePlane(benchmark_width_, benchmark_height_, + benchmark_height_, benchmark_width_, + kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, ARGBRotate90_Odd) { - const int src_width = 1277; - const int src_height = 719; - const int dst_width = 719; - const int dst_height = 1277; +TEST_F(LibYUVRotateTest, RotatePlane180_Opt) { + TestRotatePlane(benchmark_width_, benchmark_height_, + benchmark_width_, benchmark_height_, + kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} - int err = ARGBTestRotate(src_width, src_height, - dst_width, dst_height, kRotate90, - benchmark_iterations_); - EXPECT_GE(1, err); +TEST_F(LibYUVRotateTest, RotatePlane270_Opt) { + TestRotatePlane(benchmark_width_, benchmark_height_, + benchmark_height_, benchmark_width_, + kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, ARGBRotate180_Odd) { - const int src_width = 1277; - const int src_height = 719; - const int dst_width = 1277; - const int dst_height = 719; +TEST_F(LibYUVRotateTest, DISABLED_RotatePlane0_Odd) { + TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, + kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} - int err = ARGBTestRotate(src_width, src_height, - dst_width, dst_height, kRotate180, - benchmark_iterations_); - EXPECT_GE(1, err); +TEST_F(LibYUVRotateTest, DISABLED_RotatePlane90_Odd) { + TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, + kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, ARGBRotate270_Odd) { - const int src_width = 1277; - const int src_height = 719; - const int dst_width = 719; - const int dst_height = 1277; +TEST_F(LibYUVRotateTest, DISABLED_RotatePlane180_Odd) { + TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, + kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} - int err = ARGBTestRotate(src_width, src_height, - dst_width, dst_height, kRotate270, - benchmark_iterations_); - EXPECT_GE(1, err); +TEST_F(LibYUVRotateTest, DISABLED_RotatePlane270_Odd) { + TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, + kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } } // namespace libyuv diff --git a/files/unit_test/rotate_test.cc b/files/unit_test/rotate_test.cc index 788e511e..07e2f73a 100644 --- a/files/unit_test/rotate_test.cc +++ b/files/unit_test/rotate_test.cc @@ -1,1549 +1,296 @@ /* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include <stdlib.h> -#include <time.h> +#include "libyuv/cpu_id.h" #include "libyuv/rotate.h" #include "../unit_test/unit_test.h" namespace libyuv { -void PrintArray(uint8 *array, int w, int h) { - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; ++j) { - printf("%4d", (signed char)array[i * w + j]); - } - printf("\n"); - } +static void I420TestRotate(int src_width, int src_height, + int dst_width, int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i420_y_size = src_width * Abs(src_height); + int src_i420_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2); + int src_i420_size = src_i420_y_size + src_i420_uv_size * 2; + align_buffer_page_end(src_i420, src_i420_size); + for (int i = 0; i < src_i420_size; ++i) { + src_i420[i] = fastrand() & 0xff; + } + + int dst_i420_y_size = dst_width * dst_height; + int dst_i420_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2); + int dst_i420_size = dst_i420_y_size + dst_i420_uv_size * 2; + align_buffer_page_end(dst_i420_c, dst_i420_size); + align_buffer_page_end(dst_i420_opt, dst_i420_size); + memset(dst_i420_c, 2, dst_i420_size); + memset(dst_i420_opt, 3, dst_i420_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I420Rotate(src_i420, src_width, + src_i420 + src_i420_y_size, (src_width + 1) / 2, + src_i420 + src_i420_y_size + src_i420_uv_size, (src_width + 1) / 2, + dst_i420_c, dst_width, + dst_i420_c + dst_i420_y_size, (dst_width + 1) / 2, + dst_i420_c + dst_i420_y_size + dst_i420_uv_size, + (dst_width + 1) / 2, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I420Rotate(src_i420, src_width, + src_i420 + src_i420_y_size, (src_width + 1) / 2, + src_i420 + src_i420_y_size + src_i420_uv_size, + (src_width + 1) / 2, + dst_i420_opt, dst_width, + dst_i420_opt + dst_i420_y_size, (dst_width + 1) / 2, + dst_i420_opt + dst_i420_y_size + dst_i420_uv_size, + (dst_width + 1) / 2, + src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i420_size; ++i) { + EXPECT_EQ(dst_i420_c[i], dst_i420_opt[i]); + } + + free_aligned_buffer_page_end(dst_i420_c); + free_aligned_buffer_page_end(dst_i420_opt); + free_aligned_buffer_page_end(src_i420); } -TEST_F(libyuvTest, Transpose) { - int iw, ih, ow, oh; - int err = 0; - - for (iw = 8; iw < rotate_max_w_ && !err; ++iw) { - for (ih = 8; ih < rotate_max_h_ && !err; ++ih) { - int i; - ow = ih; - oh = iw; - - align_buffer_16(input, iw * ih) - align_buffer_16(output_1, ow * oh) - align_buffer_16(output_2, iw * ih) - - for (i = 0; i < iw * ih; ++i) { - input[i] = i; - } - - TransposePlane(input, iw, output_1, ow, iw, ih); - TransposePlane(output_1, ow, output_2, oh, ow, oh); - - for (i = 0; i < iw * ih; ++i) { - if (input[i] != output_2[i]) { - err++; - } - } - - if (err) { - printf("input %dx%d \n", iw, ih); - PrintArray(input, iw, ih); - - printf("transpose 1\n"); - PrintArray(output_1, ow, oh); - - printf("transpose 2\n"); - PrintArray(output_2, iw, ih); - } - - free_aligned_buffer_16(input) - free_aligned_buffer_16(output_1) - free_aligned_buffer_16(output_2) - } - } - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, I420Rotate0_Opt) { + I420TestRotate(benchmark_width_, benchmark_height_, + benchmark_width_, benchmark_height_, + kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, TransposeUV) { - int iw, ih, ow, oh; - int err = 0; - - for (iw = 16; iw < rotate_max_w_ && !err; iw += 2) { - for (ih = 8; ih < rotate_max_h_ && !err; ++ih) { - int i; - - ow = ih; - oh = iw >> 1; - - align_buffer_16(input, iw * ih) - align_buffer_16(output_a1, ow * oh) - align_buffer_16(output_b1, ow * oh) - align_buffer_16(output_a2, iw * ih) - align_buffer_16(output_b2, iw * ih) - - for (i = 0; i < iw * ih; i += 2) { - input[i] = i >> 1; - input[i + 1] = -(i >> 1); - } - - TransposeUV(input, iw, output_a1, ow, output_b1, ow, iw >> 1, ih); - - TransposePlane(output_a1, ow, output_a2, oh, ow, oh); - TransposePlane(output_b1, ow, output_b2, oh, ow, oh); - - for (i = 0; i < iw * ih; i += 2) { - if (input[i] != output_a2[i >> 1]) { - err++; - } - if (input[i + 1] != output_b2[i >> 1]) { - err++; - } - } - - if (err) { - printf("input %dx%d \n", iw, ih); - PrintArray(input, iw, ih); - - printf("transpose 1\n"); - PrintArray(output_a1, ow, oh); - PrintArray(output_b1, ow, oh); - - printf("transpose 2\n"); - PrintArray(output_a2, oh, ow); - PrintArray(output_b2, oh, ow); - } - - free_aligned_buffer_16(input) - free_aligned_buffer_16(output_a1) - free_aligned_buffer_16(output_b1) - free_aligned_buffer_16(output_a2) - free_aligned_buffer_16(output_b2) - } - } - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, I420Rotate90_Opt) { + I420TestRotate(benchmark_width_, benchmark_height_, + benchmark_height_, benchmark_width_, + kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, RotatePlane90) { - int iw, ih, ow, oh; - int err = 0; - - for (iw = 8; iw < rotate_max_w_ && !err; ++iw) { - for (ih = 8; ih < rotate_max_h_ && !err; ++ih) { - int i; - - ow = ih; - oh = iw; - - align_buffer_16(input, iw * ih) - align_buffer_16(output_0, iw * ih) - align_buffer_16(output_90, ow * oh) - align_buffer_16(output_180, iw * ih) - align_buffer_16(output_270, ow * oh) - - for (i = 0; i < iw * ih; ++i) { - input[i] = i; - } - - RotatePlane90(input, iw, output_90, ow, iw, ih); - RotatePlane90(output_90, ow, output_180, oh, ow, oh); - RotatePlane90(output_180, oh, output_270, ow, oh, ow); - RotatePlane90(output_270, ow, output_0, iw, ow, oh); - - for (i = 0; i < iw * ih; ++i) { - if (input[i] != output_0[i]) { - err++; - } - } - - if (err) { - printf("input %dx%d \n", iw, ih); - PrintArray(input, iw, ih); - - printf("output 90\n"); - PrintArray(output_90, ow, oh); - - printf("output 180\n"); - PrintArray(output_180, iw, ih); - - printf("output 270\n"); - PrintArray(output_270, ow, oh); - - printf("output 0\n"); - PrintArray(output_0, iw, ih); - } - - free_aligned_buffer_16(input) - free_aligned_buffer_16(output_0) - free_aligned_buffer_16(output_90) - free_aligned_buffer_16(output_180) - free_aligned_buffer_16(output_270) - } - } - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, I420Rotate180_Opt) { + I420TestRotate(benchmark_width_, benchmark_height_, + benchmark_width_, benchmark_height_, + kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, RotateUV90) { - int iw, ih, ow, oh; - int err = 0; - - for (iw = 16; iw < rotate_max_w_ && !err; iw += 2) { - for (ih = 8; ih < rotate_max_h_ && !err; ++ih) { - int i; - - ow = ih; - oh = iw >> 1; - - align_buffer_16(input, iw * ih) - align_buffer_16(output_0_u, ow * oh) - align_buffer_16(output_0_v, ow * oh) - align_buffer_16(output_90_u, ow * oh) - align_buffer_16(output_90_v, ow * oh) - align_buffer_16(output_180_u, ow * oh) - align_buffer_16(output_180_v, ow * oh) - - for (i = 0; i < iw * ih; i += 2) { - input[i] = i >> 1; - input[i + 1] = -(i >> 1); - } - - RotateUV90(input, iw, output_90_u, ow, output_90_v, ow, iw >> 1, ih); - - RotatePlane90(output_90_u, ow, output_180_u, oh, ow, oh); - RotatePlane90(output_90_v, ow, output_180_v, oh, ow, oh); - - RotatePlane180(output_180_u, ow, output_0_u, ow, ow, oh); - RotatePlane180(output_180_v, ow, output_0_v, ow, ow, oh); - - for (i = 0; i < (ow * oh); ++i) { - if (output_0_u[i] != (uint8)i) { - err++; - } - if (output_0_v[i] != (uint8)(-i)) { - err++; - } - } - - if (err) { - printf("input %dx%d \n", iw, ih); - PrintArray(input, iw, ih); - - printf("output 90_u\n"); - PrintArray(output_90_u, ow, oh); - - printf("output 90_v\n"); - PrintArray(output_90_v, ow, oh); - - printf("output 180_u\n"); - PrintArray(output_180_u, oh, ow); - - printf("output 180_v\n"); - PrintArray(output_180_v, oh, ow); - - printf("output 0_u\n"); - PrintArray(output_0_u, oh, ow); - - printf("output 0_v\n"); - PrintArray(output_0_v, oh, ow); - } - - free_aligned_buffer_16(input) - free_aligned_buffer_16(output_0_u) - free_aligned_buffer_16(output_0_v) - free_aligned_buffer_16(output_90_u) - free_aligned_buffer_16(output_90_v) - free_aligned_buffer_16(output_180_u) - free_aligned_buffer_16(output_180_v) - } - } - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, I420Rotate270_Opt) { + I420TestRotate(benchmark_width_, benchmark_height_, + benchmark_height_, benchmark_width_, + kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, RotateUV180) { - int iw, ih, ow, oh; - int err = 0; - - for (iw = 16; iw < rotate_max_w_ && !err; iw += 2) { - for (ih = 8; ih < rotate_max_h_ && !err; ++ih) { - int i; - - ow = iw >> 1; - oh = ih; - - align_buffer_16(input, iw * ih) - align_buffer_16(output_0_u, ow * oh) - align_buffer_16(output_0_v, ow * oh) - align_buffer_16(output_90_u, ow * oh) - align_buffer_16(output_90_v, ow * oh) - align_buffer_16(output_180_u, ow * oh) - align_buffer_16(output_180_v, ow * oh) - - for (i = 0; i < iw * ih; i += 2) { - input[i] = i >> 1; - input[i + 1] = -(i >> 1); - } - - RotateUV180(input, iw, output_180_u, ow, output_180_v, ow, iw >> 1, ih); - - RotatePlane90(output_180_u, ow, output_90_u, oh, ow, oh); - RotatePlane90(output_180_v, ow, output_90_v, oh, ow, oh); - - RotatePlane90(output_90_u, oh, output_0_u, ow, oh, ow); - RotatePlane90(output_90_v, oh, output_0_v, ow, oh, ow); - - for (i = 0; i < (ow * oh); ++i) { - if (output_0_u[i] != (uint8)i) { - err++; - } - if (output_0_v[i] != (uint8)(-i)) { - err++; - } - } - - if (err) { - printf("input %dx%d \n", iw, ih); - PrintArray(input, iw, ih); - - printf("output 180_u\n"); - PrintArray(output_180_u, oh, ow); - - printf("output 180_v\n"); - PrintArray(output_180_v, oh, ow); - - printf("output 90_u\n"); - PrintArray(output_90_u, oh, ow); - - printf("output 90_v\n"); - PrintArray(output_90_v, oh, ow); - - printf("output 0_u\n"); - PrintArray(output_0_u, ow, oh); - - printf("output 0_v\n"); - PrintArray(output_0_v, ow, oh); - } - - free_aligned_buffer_16(input) - free_aligned_buffer_16(output_0_u) - free_aligned_buffer_16(output_0_v) - free_aligned_buffer_16(output_90_u) - free_aligned_buffer_16(output_90_v) - free_aligned_buffer_16(output_180_u) - free_aligned_buffer_16(output_180_v) - } - } - - EXPECT_EQ(0, err); +// TODO(fbarchard): Remove odd width tests. +// Odd width tests work but disabled because they use C code and can be +// tested by passing an odd width command line or environment variable. +TEST_F(LibYUVRotateTest, DISABLED_I420Rotate0_Odd) { + I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, + kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, RotateUV270) { - int iw, ih, ow, oh; - int err = 0; - - for (iw = 16; iw < rotate_max_w_ && !err; iw += 2) { - for (ih = 8; ih < rotate_max_h_ && !err; ++ih) { - int i; - - ow = ih; - oh = iw >> 1; - - align_buffer_16(input, iw * ih) - align_buffer_16(output_0_u, ow * oh) - align_buffer_16(output_0_v, ow * oh) - align_buffer_16(output_270_u, ow * oh) - align_buffer_16(output_270_v, ow * oh) - align_buffer_16(output_180_u, ow * oh) - align_buffer_16(output_180_v, ow * oh) - - for (i = 0; i < iw * ih; i += 2) { - input[i] = i >> 1; - input[i + 1] = -(i >> 1); - } - - RotateUV270(input, iw, output_270_u, ow, output_270_v, ow, - iw >> 1, ih); - - RotatePlane270(output_270_u, ow, output_180_u, oh, ow, oh); - RotatePlane270(output_270_v, ow, output_180_v, oh, ow, oh); - - RotatePlane180(output_180_u, ow, output_0_u, ow, ow, oh); - RotatePlane180(output_180_v, ow, output_0_v, ow, ow, oh); - - for (i = 0; i < (ow * oh); ++i) { - if (output_0_u[i] != (uint8)i) { - err++; - } - if (output_0_v[i] != (uint8)(-i)) { - err++; - } - } - - if (err) { - printf("input %dx%d \n", iw, ih); - PrintArray(input, iw, ih); - - printf("output 270_u\n"); - PrintArray(output_270_u, ow, oh); - - printf("output 270_v\n"); - PrintArray(output_270_v, ow, oh); - - printf("output 180_u\n"); - PrintArray(output_180_u, oh, ow); - - printf("output 180_v\n"); - PrintArray(output_180_v, oh, ow); - - printf("output 0_u\n"); - PrintArray(output_0_u, oh, ow); - - printf("output 0_v\n"); - PrintArray(output_0_v, oh, ow); - } - - free_aligned_buffer_16(input) - free_aligned_buffer_16(output_0_u) - free_aligned_buffer_16(output_0_v) - free_aligned_buffer_16(output_270_u) - free_aligned_buffer_16(output_270_v) - free_aligned_buffer_16(output_180_u) - free_aligned_buffer_16(output_180_v) - } - } - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, DISABLED_I420Rotate90_Odd) { + I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, + kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, RotatePlane180) { - int iw, ih, ow, oh; - int err = 0; - - for (iw = 8; iw < rotate_max_w_ && !err; ++iw) - for (ih = 8; ih < rotate_max_h_ && !err; ++ih) { - int i; - - ow = iw; - oh = ih; - - align_buffer_16(input, iw * ih) - align_buffer_16(output_0, iw * ih) - align_buffer_16(output_180, iw * ih) - - for (i = 0; i < iw * ih; ++i) { - input[i] = i; - } - - RotatePlane180(input, iw, output_180, ow, iw, ih); - RotatePlane180(output_180, ow, output_0, iw, ow, oh); - - for (i = 0; i < iw * ih; ++i) { - if (input[i] != output_0[i]) { - err++; - } - } - - if (err) { - printf("input %dx%d \n", iw, ih); - PrintArray(input, iw, ih); - - printf("output 180\n"); - PrintArray(output_180, iw, ih); - - printf("output 0\n"); - PrintArray(output_0, iw, ih); - } - - free_aligned_buffer_16(input) - free_aligned_buffer_16(output_0) - free_aligned_buffer_16(output_180) - } - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, DISABLED_I420Rotate180_Odd) { + I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, + kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, RotatePlane270) { - int iw, ih, ow, oh; - int err = 0; - - for (iw = 8; iw < rotate_max_w_ && !err; ++iw) { - for (ih = 8; ih < rotate_max_h_ && !err; ++ih) { - int i; - - ow = ih; - oh = iw; - - align_buffer_16(input, iw * ih) - align_buffer_16(output_0, iw * ih) - align_buffer_16(output_90, ow * oh) - align_buffer_16(output_180, iw * ih) - align_buffer_16(output_270, ow * oh) - - for (i = 0; i < iw * ih; ++i) - input[i] = i; - - RotatePlane270(input, iw, output_270, ow, iw, ih); - RotatePlane270(output_270, ow, output_180, oh, ow, oh); - RotatePlane270(output_180, oh, output_90, ow, oh, ow); - RotatePlane270(output_90, ow, output_0, iw, ow, oh); - - for (i = 0; i < iw * ih; ++i) { - if (input[i] != output_0[i]) { - err++; - } - } - - if (err) { - printf("input %dx%d \n", iw, ih); - PrintArray(input, iw, ih); - - printf("output 270\n"); - PrintArray(output_270, ow, oh); - - printf("output 180\n"); - PrintArray(output_180, iw, ih); - - printf("output 90\n"); - PrintArray(output_90, ow, oh); - - printf("output 0\n"); - PrintArray(output_0, iw, ih); - } - - free_aligned_buffer_16(input) - free_aligned_buffer_16(output_0) - free_aligned_buffer_16(output_90) - free_aligned_buffer_16(output_180) - free_aligned_buffer_16(output_270) - } - } - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) { + I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, + kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, RotatePlane90and270) { - int iw, ih, ow, oh; - int err = 0; - - for (iw = 16; iw < rotate_max_w_ && !err; iw += 4) - for (ih = 16; ih < rotate_max_h_ && !err; ih += 4) { - int i; - - ow = ih; - oh = iw; - - align_buffer_16(input, iw * ih) - align_buffer_16(output_0, iw * ih) - align_buffer_16(output_90, ow * oh) - - for (i = 0; i < iw * ih; ++i) { - input[i] = i; - } - - RotatePlane90(input, iw, output_90, ow, iw, ih); - RotatePlane270(output_90, ow, output_0, iw, ow, oh); - - for (i = 0; i < iw * ih; ++i) { - if (input[i] != output_0[i]) { - err++; - } - } - - if (err) { - printf("intput %dx%d\n", iw, ih); - PrintArray(input, iw, ih); - - printf("output \n"); - PrintArray(output_90, ow, oh); - - printf("output \n"); - PrintArray(output_0, iw, ih); - } - - free_aligned_buffer_16(input) - free_aligned_buffer_16(output_0) - free_aligned_buffer_16(output_90) - } - - EXPECT_EQ(0, err); +static void NV12TestRotate(int src_width, int src_height, + int dst_width, int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { // allow negative for inversion test. + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_nv12_y_size = src_width * Abs(src_height); + int src_nv12_uv_size = + ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2) * 2; + int src_nv12_size = src_nv12_y_size + src_nv12_uv_size; + align_buffer_page_end(src_nv12, src_nv12_size); + for (int i = 0; i < src_nv12_size; ++i) { + src_nv12[i] = fastrand() & 0xff; + } + + int dst_i420_y_size = dst_width * dst_height; + int dst_i420_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2); + int dst_i420_size = dst_i420_y_size + dst_i420_uv_size * 2; + align_buffer_page_end(dst_i420_c, dst_i420_size); + align_buffer_page_end(dst_i420_opt, dst_i420_size); + memset(dst_i420_c, 2, dst_i420_size); + memset(dst_i420_opt, 3, dst_i420_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + NV12ToI420Rotate(src_nv12, src_width, + src_nv12 + src_nv12_y_size, (src_width + 1) & ~1, + dst_i420_c, dst_width, + dst_i420_c + dst_i420_y_size, (dst_width + 1) / 2, + dst_i420_c + dst_i420_y_size + dst_i420_uv_size, + (dst_width + 1) / 2, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + NV12ToI420Rotate(src_nv12, src_width, + src_nv12 + src_nv12_y_size, (src_width + 1) & ~1, + dst_i420_opt, dst_width, + dst_i420_opt + dst_i420_y_size, (dst_width + 1) / 2, + dst_i420_opt + dst_i420_y_size + dst_i420_uv_size, + (dst_width + 1) / 2, + src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i420_size; ++i) { + EXPECT_EQ(dst_i420_c[i], dst_i420_opt[i]); + } + + free_aligned_buffer_page_end(dst_i420_c); + free_aligned_buffer_page_end(dst_i420_opt); + free_aligned_buffer_page_end(src_nv12); } -TEST_F(libyuvTest, RotatePlane90Pitch) { - int iw, ih; - int err = 0; - - for (iw = 16; iw < rotate_max_w_ && !err; iw += 4) - for (ih = 16; ih < rotate_max_h_ && !err; ih += 4) { - int i; - - int ow = ih; - int oh = iw; - - align_buffer_16(input, iw * ih) - align_buffer_16(output_0, iw * ih) - align_buffer_16(output_90, ow * oh) - - for (i = 0; i < iw * ih; ++i) { - input[i] = i; - } - - RotatePlane90(input, iw, - output_90 + (ow >> 1), ow, - iw >> 1, ih >> 1); - RotatePlane90(input + (iw >> 1), iw, - output_90 + (ow >> 1) + ow * (oh >> 1), ow, - iw >> 1, ih >> 1); - RotatePlane90(input + iw * (ih >> 1), iw, - output_90, ow, - iw >> 1, ih >> 1); - RotatePlane90(input + (iw >> 1) + iw * (ih >> 1), iw, - output_90 + ow * (oh >> 1), ow, - iw >> 1, ih >> 1); - - RotatePlane270(output_90, ih, output_0, iw, ow, oh); - - for (i = 0; i < iw * ih; ++i) { - if (input[i] != output_0[i]) { - err++; - } - } - - if (err) { - printf("intput %dx%d\n", iw, ih); - PrintArray(input, iw, ih); - - printf("output \n"); - PrintArray(output_90, ow, oh); - - printf("output \n"); - PrintArray(output_0, iw, ih); - } - - free_aligned_buffer_16(input) - free_aligned_buffer_16(output_0) - free_aligned_buffer_16(output_90) - } - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, NV12Rotate0_Opt) { + NV12TestRotate(benchmark_width_, benchmark_height_, + benchmark_width_, benchmark_height_, + kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, RotatePlane270Pitch) { - int iw, ih, ow, oh; - int err = 0; - - for (iw = 16; iw < rotate_max_w_ && !err; iw += 4) { - for (ih = 16; ih < rotate_max_h_ && !err; ih += 4) { - int i; - - ow = ih; - oh = iw; - - align_buffer_16(input, iw * ih) - align_buffer_16(output_0, iw * ih) - align_buffer_16(output_270, ow * oh) - - for (i = 0; i < iw * ih; ++i) { - input[i] = i; - } - - RotatePlane270(input, iw, - output_270 + ow * (oh >> 1), ow, - iw >> 1, ih >> 1); - RotatePlane270(input + (iw >> 1), iw, - output_270, ow, - iw >> 1, ih >> 1); - RotatePlane270(input + iw * (ih >> 1), iw, - output_270 + (ow >> 1) + ow * (oh >> 1), ow, - iw >> 1, ih >> 1); - RotatePlane270(input + (iw >> 1) + iw * (ih >> 1), iw, - output_270 + (ow >> 1), ow, - iw >> 1, ih >> 1); - - RotatePlane90(output_270, ih, output_0, iw, ow, oh); - - for (i = 0; i < iw * ih; ++i) { - if (input[i] != output_0[i]) { - err++; - } - } - - if (err) { - printf("intput %dx%d\n", iw, ih); - PrintArray(input, iw, ih); - - printf("output \n"); - PrintArray(output_270, ow, oh); - - printf("output \n"); - PrintArray(output_0, iw, ih); - } - - free_aligned_buffer_16(input) - free_aligned_buffer_16(output_0) - free_aligned_buffer_16(output_270) - } - } - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, NV12Rotate90_Opt) { + NV12TestRotate(benchmark_width_, benchmark_height_, + benchmark_height_, benchmark_width_, + kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, I420Rotate90) { - int err = 0; - - int yw = 1024; - int yh = 768; - int b = 128; - int uvw = (yw + 1) >> 1; - int uvh = (yh + 1) >> 1; - - int i, j; - - int y_plane_size = (yw + b * 2) * (yh + b * 2); - int uv_plane_size = (uvw + b * 2) * (uvh + b * 2); - - srandom(time(NULL)); - - align_buffer_16(orig_y, y_plane_size) - align_buffer_16(orig_u, uv_plane_size) - align_buffer_16(orig_v, uv_plane_size) - align_buffer_16(ro0_y, y_plane_size) - align_buffer_16(ro0_u, uv_plane_size) - align_buffer_16(ro0_v, uv_plane_size) - align_buffer_16(ro90_y, y_plane_size) - align_buffer_16(ro90_u, uv_plane_size) - align_buffer_16(ro90_v, uv_plane_size) - align_buffer_16(ro270_y, y_plane_size) - align_buffer_16(ro270_u, uv_plane_size) - align_buffer_16(ro270_v, uv_plane_size) - memset(orig_y, 0, y_plane_size); - memset(orig_u, 0, uv_plane_size); - memset(orig_v, 0, uv_plane_size); - memset(ro0_y, 0, y_plane_size); - memset(ro0_u, 0, uv_plane_size); - memset(ro0_v, 0, uv_plane_size); - memset(ro90_y, 0, y_plane_size); - memset(ro90_u, 0, uv_plane_size); - memset(ro90_v, 0, uv_plane_size); - memset(ro270_y, 0, y_plane_size); - memset(ro270_u, 0, uv_plane_size); - memset(ro270_v, 0, uv_plane_size); - - // fill image buffers with random data - for (i = b; i < (yh + b); ++i) { - for (j = b; j < (yw + b); ++j) { - orig_y[i * (yw + b * 2) + j] = random() & 0xff; - } - } - - for (i = b; i < (uvh + b); ++i) { - for (j = b; j < (uvw + b); ++j) { - orig_u[i * (uvw + b * 2) + j] = random() & 0xff; - orig_v[i * (uvw + b * 2) + j] = random() & 0xff; - } - } - - int y_off_0 = b * (yw + b * 2) + b; - int uv_off_0 = b * (uvw + b * 2) + b; - int y_off_90 = b * (yh + b * 2) + b; - int uv_off_90 = b * (uvh + b * 2) + b; - - int y_st_0 = yw + b * 2; - int uv_st_0 = uvw + b * 2; - int y_st_90 = yh + b * 2; - int uv_st_90 = uvh + b * 2; - - I420Rotate(orig_y+y_off_0, y_st_0, - orig_u+uv_off_0, uv_st_0, - orig_v+uv_off_0, uv_st_0, - ro90_y+y_off_90, y_st_90, - ro90_u+uv_off_90, uv_st_90, - ro90_v+uv_off_90, uv_st_90, - yw, yh, - kRotateClockwise); - - I420Rotate(ro90_y+y_off_90, y_st_90, - ro90_u+uv_off_90, uv_st_90, - ro90_v+uv_off_90, uv_st_90, - ro270_y+y_off_90, y_st_90, - ro270_u+uv_off_90, uv_st_90, - ro270_v+uv_off_90, uv_st_90, - yh, yw, - kRotate180); - - I420Rotate(ro270_y+y_off_90, y_st_90, - ro270_u+uv_off_90, uv_st_90, - ro270_v+uv_off_90, uv_st_90, - ro0_y+y_off_0, y_st_0, - ro0_u+uv_off_0, uv_st_0, - ro0_v+uv_off_0, uv_st_0, - yh, yw, - kRotateClockwise); - - for (i = 0; i < y_plane_size; ++i) { - if (orig_y[i] != ro0_y[i]) { - ++err; - } - } - - for (i = 0; i < uv_plane_size; ++i) { - if (orig_u[i] != ro0_u[i]) { - ++err; - } - if (orig_v[i] != ro0_v[i]) { - ++err; - } - } - - free_aligned_buffer_16(orig_y) - free_aligned_buffer_16(orig_u) - free_aligned_buffer_16(orig_v) - free_aligned_buffer_16(ro0_y) - free_aligned_buffer_16(ro0_u) - free_aligned_buffer_16(ro0_v) - free_aligned_buffer_16(ro90_y) - free_aligned_buffer_16(ro90_u) - free_aligned_buffer_16(ro90_v) - free_aligned_buffer_16(ro270_y) - free_aligned_buffer_16(ro270_u) - free_aligned_buffer_16(ro270_v) - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, NV12Rotate180_Opt) { + NV12TestRotate(benchmark_width_, benchmark_height_, + benchmark_width_, benchmark_height_, + kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, I420Rotate270) { - int err = 0; - - int yw = 1024; - int yh = 768; - int b = 128; - int uvw = (yw + 1) >> 1; - int uvh = (yh + 1) >> 1; - - int i, j; - - int y_plane_size = (yw + b * 2) * (yh + b * 2); - int uv_plane_size = (uvw + b * 2) * (uvh + b * 2); - - srandom(time(NULL)); - - align_buffer_16(orig_y, y_plane_size) - align_buffer_16(orig_u, uv_plane_size) - align_buffer_16(orig_v, uv_plane_size) - align_buffer_16(ro0_y, y_plane_size) - align_buffer_16(ro0_u, uv_plane_size) - align_buffer_16(ro0_v, uv_plane_size) - align_buffer_16(ro90_y, y_plane_size) - align_buffer_16(ro90_u, uv_plane_size) - align_buffer_16(ro90_v, uv_plane_size) - align_buffer_16(ro270_y, y_plane_size) - align_buffer_16(ro270_u, uv_plane_size) - align_buffer_16(ro270_v, uv_plane_size) - memset(orig_y, 0, y_plane_size); - memset(orig_u, 0, uv_plane_size); - memset(orig_v, 0, uv_plane_size); - memset(ro0_y, 0, y_plane_size); - memset(ro0_u, 0, uv_plane_size); - memset(ro0_v, 0, uv_plane_size); - memset(ro90_y, 0, y_plane_size); - memset(ro90_u, 0, uv_plane_size); - memset(ro90_v, 0, uv_plane_size); - memset(ro270_y, 0, y_plane_size); - memset(ro270_u, 0, uv_plane_size); - memset(ro270_v, 0, uv_plane_size); - - // fill image buffers with random data - for (i = b; i < (yh + b); ++i) { - for (j = b; j < (yw + b); ++j) { - orig_y[i * (yw + b * 2) + j] = random() & 0xff; - } - } - - for (i = b; i < (uvh + b); ++i) { - for (j = b; j < (uvw + b); ++j) { - orig_u[i * (uvw + b * 2) + j] = random() & 0xff; - orig_v[i * (uvw + b * 2) + j] = random() & 0xff; - } - } - - int y_off_0 = b * (yw + b * 2) + b; - int uv_off_0 = b * (uvw + b * 2) + b; - int y_off_90 = b * (yh + b * 2) + b; - int uv_off_90 = b * (uvh + b * 2) + b; - - int y_st_0 = yw + b * 2; - int uv_st_0 = uvw + b * 2; - int y_st_90 = yh + b * 2; - int uv_st_90 = uvh + b * 2; - - I420Rotate(orig_y+y_off_0, y_st_0, - orig_u+uv_off_0, uv_st_0, - orig_v+uv_off_0, uv_st_0, - ro270_y+y_off_90, y_st_90, - ro270_u+uv_off_90, uv_st_90, - ro270_v+uv_off_90, uv_st_90, - yw, yh, - kRotateCounterClockwise); - - I420Rotate(ro270_y+y_off_90, y_st_90, - ro270_u+uv_off_90, uv_st_90, - ro270_v+uv_off_90, uv_st_90, - ro90_y+y_off_90, y_st_90, - ro90_u+uv_off_90, uv_st_90, - ro90_v+uv_off_90, uv_st_90, - yh, yw, - kRotate180); - - I420Rotate(ro90_y+y_off_90, y_st_90, - ro90_u+uv_off_90, uv_st_90, - ro90_v+uv_off_90, uv_st_90, - ro0_y+y_off_0, y_st_0, - ro0_u+uv_off_0, uv_st_0, - ro0_v+uv_off_0, uv_st_0, - yh, yw, - kRotateCounterClockwise); - - for (i = 0; i < y_plane_size; ++i) { - if (orig_y[i] != ro0_y[i]) { - ++err; - } - } - - for (i = 0; i < uv_plane_size; ++i) { - if (orig_u[i] != ro0_u[i]) { - ++err; - } - if (orig_v[i] != ro0_v[i]) { - ++err; - } - } - - free_aligned_buffer_16(orig_y) - free_aligned_buffer_16(orig_u) - free_aligned_buffer_16(orig_v) - free_aligned_buffer_16(ro0_y) - free_aligned_buffer_16(ro0_u) - free_aligned_buffer_16(ro0_v) - free_aligned_buffer_16(ro90_y) - free_aligned_buffer_16(ro90_u) - free_aligned_buffer_16(ro90_v) - free_aligned_buffer_16(ro270_y) - free_aligned_buffer_16(ro270_u) - free_aligned_buffer_16(ro270_v) - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, NV12Rotate270_Opt) { + NV12TestRotate(benchmark_width_, benchmark_height_, + benchmark_height_, benchmark_width_, + kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, NV12ToI420Rotate90) { - int err = 0; - - int yw = 1024; - int yh = 768; - int b = 128; - int uvw = (yw + 1) >> 1; - int uvh = (yh + 1) >> 1; - int i, j; - - int y_plane_size = (yw + b * 2) * (yh + b * 2); - int uv_plane_size = (uvw + b * 2) * (uvh + b * 2); - int o_uv_plane_size = (uvw * 2 + b * 2) * (uvh + b * 2); - - srandom(time(NULL)); - - align_buffer_16(orig_y, y_plane_size) - align_buffer_16(orig_uv, o_uv_plane_size) - align_buffer_16(ro0_y, y_plane_size) - align_buffer_16(ro0_u, uv_plane_size) - align_buffer_16(ro0_v, uv_plane_size) - align_buffer_16(ro90_y, y_plane_size) - align_buffer_16(ro90_u, uv_plane_size) - align_buffer_16(ro90_v, uv_plane_size) - memset(orig_y, 0, y_plane_size); - memset(orig_uv, 0, uv_plane_size); - memset(ro0_y, 0, y_plane_size); - memset(ro0_u, 0, uv_plane_size); - memset(ro0_v, 0, uv_plane_size); - memset(ro90_y, 0, y_plane_size); - memset(ro90_u, 0, uv_plane_size); - memset(ro90_v, 0, uv_plane_size); - - // fill image buffers with random data - for (i = b; i < (yh + b); ++i) { - for (j = b; j < (yw + b); ++j) { - orig_y[i * (yw + b * 2) + j] = random() & 0xff; - } - } - - for (i = b; i < (uvh + b); ++i) { - for (j = b; j < (uvw * 2 + b); j += 2) { - uint8 random_number = random() & 0x7f; - orig_uv[i * (uvw * 2 + b * 2) + j] = random_number; - orig_uv[i * (uvw * 2 + b * 2) + j + 1] = -random_number; - } - } - - int y_off_0 = b * (yw + b * 2) + b; - int uv_off_0 = b * (uvw + b * 2) + b; - int y_off_90 = b * (yh + b * 2) + b; - int uv_off_90 = b * (uvh + b * 2) + b; - - int y_st_0 = yw + b * 2; - int uv_st_0 = uvw + b * 2; - int y_st_90 = yh + b * 2; - int uv_st_90 = uvh + b * 2; - - NV12ToI420Rotate(orig_y+y_off_0, y_st_0, - orig_uv+y_off_0, y_st_0, - ro90_y+y_off_90, y_st_90, - ro90_u+uv_off_90, uv_st_90, - ro90_v+uv_off_90, uv_st_90, - yw, yh, - kRotateClockwise); - - I420Rotate(ro90_y+y_off_90, y_st_90, - ro90_u+uv_off_90, uv_st_90, - ro90_v+uv_off_90, uv_st_90, - ro0_y+y_off_0, y_st_0, - ro0_u+uv_off_0, uv_st_0, - ro0_v+uv_off_0, uv_st_0, - yh, yw, - kRotateCounterClockwise); - - for (i = 0; i < y_plane_size; ++i) { - if (orig_y[i] != ro0_y[i]) - ++err; - } - - int zero_cnt = 0; - - for (i = 0; i < uv_plane_size; ++i) { - if ((signed char)ro0_u[i] != -(signed char)ro0_v[i]) { - ++err; - } - if (ro0_u[i] != 0) { - ++zero_cnt; - } - } - - if (!zero_cnt) { - ++err; - } - - free_aligned_buffer_16(orig_y) - free_aligned_buffer_16(orig_uv) - free_aligned_buffer_16(ro0_y) - free_aligned_buffer_16(ro0_u) - free_aligned_buffer_16(ro0_v) - free_aligned_buffer_16(ro90_y) - free_aligned_buffer_16(ro90_u) - free_aligned_buffer_16(ro90_v) - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate0_Odd) { + NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, + kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, NV12ToI420Rotate270) { - int err = 0; - - int yw = 1024; - int yh = 768; - int b = 128; - int uvw = (yw + 1) >> 1; - int uvh = (yh + 1) >> 1; - - int i, j; - - int y_plane_size = (yw + b * 2) * (yh + b * 2); - int uv_plane_size = (uvw + b * 2) * (uvh + b * 2); - int o_uv_plane_size = (uvw * 2 + b * 2) * (uvh + b * 2); - - srandom(time(NULL)); - - align_buffer_16(orig_y, y_plane_size) - align_buffer_16(orig_uv, o_uv_plane_size) - align_buffer_16(ro0_y, y_plane_size) - align_buffer_16(ro0_u, uv_plane_size) - align_buffer_16(ro0_v, uv_plane_size) - align_buffer_16(ro270_y, y_plane_size) - align_buffer_16(ro270_u, uv_plane_size) - align_buffer_16(ro270_v, uv_plane_size) - memset(orig_y, 0, y_plane_size); - memset(orig_uv, 0, o_uv_plane_size); - memset(ro0_y, 0, y_plane_size); - memset(ro0_u, 0, uv_plane_size); - memset(ro0_v, 0, uv_plane_size); - memset(ro270_y, 0, y_plane_size); - memset(ro270_u, 0, uv_plane_size); - memset(ro270_v, 0, uv_plane_size); - - // fill image buffers with random data - for (i = b; i < (yh + b); ++i) { - for (j = b; j < (yw + b); ++j) { - orig_y[i * (yw + b * 2) + j] = random() & 0xff; - } - } - - for (i = b; i < (uvh + b); ++i) { - for (j = b; j < (uvw * 2 + b); j += 2) { - uint8 random_number = random() & 0x7f; - orig_uv[i * (uvw * 2 + b * 2) + j] = random_number; - orig_uv[i * (uvw * 2 + b * 2) + j + 1] = -random_number; - } - } - - int y_off_0 = b * (yw + b * 2) + b; - int uv_off_0 = b * (uvw + b * 2) + b; - int y_off_270 = b * (yh + b * 2) + b; - int uv_off_270 = b * (uvh + b * 2) + b; - - int y_st_0 = yw + b * 2; - int uv_st_0 = uvw + b * 2; - int y_st_270 = yh + b * 2; - int uv_st_270 = uvh + b * 2; - - NV12ToI420Rotate(orig_y+y_off_0, y_st_0, - orig_uv+y_off_0, y_st_0, - ro270_y+y_off_270, y_st_270, - ro270_u+uv_off_270, uv_st_270, - ro270_v+uv_off_270, uv_st_270, - yw, yh, - kRotateCounterClockwise); - - I420Rotate(ro270_y+y_off_270, y_st_270, - ro270_u+uv_off_270, uv_st_270, - ro270_v+uv_off_270, uv_st_270, - ro0_y+y_off_0, y_st_0, - ro0_u+uv_off_0, uv_st_0, - ro0_v+uv_off_0, uv_st_0, - yh, yw, - kRotateClockwise); - - for (i = 0; i < y_plane_size; ++i) { - if (orig_y[i] != ro0_y[i]) - ++err; - } - - int zero_cnt = 0; - - for (i = 0; i < uv_plane_size; ++i) { - if ((signed char)ro0_u[i] != -(signed char)ro0_v[i]) { - ++err; - } - if (ro0_u[i] != 0) { - ++zero_cnt; - } - } - - if (!zero_cnt) { - ++err; - } - - free_aligned_buffer_16(orig_y) - free_aligned_buffer_16(orig_uv) - free_aligned_buffer_16(ro0_y) - free_aligned_buffer_16(ro0_u) - free_aligned_buffer_16(ro0_v) - free_aligned_buffer_16(ro270_y) - free_aligned_buffer_16(ro270_u) - free_aligned_buffer_16(ro270_v) - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate90_Odd) { + NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, + kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, NV12ToI420Rotate180) { - int err = 0; - - int yw = 1024; - int yh = 768; - int b = 128; - int uvw = (yw + 1) >> 1; - int uvh = (yh + 1) >> 1; - - int i, j; - - int y_plane_size = (yw + b * 2) * (yh + b * 2); - int uv_plane_size = (uvw + b * 2) * (uvh + b * 2); - int o_uv_plane_size = (uvw * 2 + b * 2) * (uvh + b * 2); - - srandom(time(NULL)); - - align_buffer_16(orig_y, y_plane_size) - align_buffer_16(orig_uv, o_uv_plane_size) - align_buffer_16(ro0_y, y_plane_size) - align_buffer_16(ro0_u, uv_plane_size) - align_buffer_16(ro0_v, uv_plane_size) - align_buffer_16(ro180_y, y_plane_size) - align_buffer_16(ro180_u, uv_plane_size) - align_buffer_16(ro180_v, uv_plane_size) - memset(orig_y, 0, y_plane_size); - memset(orig_uv, 0, o_uv_plane_size); - memset(ro0_y, 0, y_plane_size); - memset(ro0_u, 0, uv_plane_size); - memset(ro0_v, 0, uv_plane_size); - memset(ro180_y, 0, y_plane_size); - memset(ro180_u, 0, uv_plane_size); - memset(ro180_v, 0, uv_plane_size); - - // fill image buffers with random data - for (i = b; i < (yh + b); ++i) { - for (j = b; j < (yw + b); ++j) { - orig_y[i * (yw + b * 2) + j] = random() & 0xff; - } - } - - for (i = b; i < (uvh + b); ++i) { - for (j = b; j < (uvw * 2 + b); j += 2) { - uint8 random_number = random() & 0x7f; - orig_uv[i * (uvw * 2 + b * 2) + j] = random_number; - orig_uv[i * (uvw * 2 + b * 2) + j + 1] = -random_number; - } - } - - int y_off = b * (yw + b * 2) + b; - int uv_off = b * (uvw + b * 2) + b; - - int y_st = yw + b * 2; - int uv_st = uvw + b * 2; - - NV12ToI420Rotate(orig_y+y_off, y_st, - orig_uv+y_off, y_st, - ro180_y+y_off, y_st, - ro180_u+uv_off, uv_st, - ro180_v+uv_off, uv_st, - yw, yh, - kRotate180); - - I420Rotate(ro180_y+y_off, y_st, - ro180_u+uv_off, uv_st, - ro180_v+uv_off, uv_st, - ro0_y+y_off, y_st, - ro0_u+uv_off, uv_st, - ro0_v+uv_off, uv_st, - yw, yh, - kRotate180); - - for (i = 0; i < y_plane_size; ++i) { - if (orig_y[i] != ro0_y[i]) { - ++err; - } - } - - int zero_cnt = 0; - - for (i = 0; i < uv_plane_size; ++i) { - if ((signed char)ro0_u[i] != -(signed char)ro0_v[i]) { - ++err; - } - if (ro0_u[i] != 0) { - ++zero_cnt; - } - } - - if (!zero_cnt) { - ++err; - } - - free_aligned_buffer_16(orig_y) - free_aligned_buffer_16(orig_uv) - free_aligned_buffer_16(ro0_y) - free_aligned_buffer_16(ro0_u) - free_aligned_buffer_16(ro0_v) - free_aligned_buffer_16(ro180_y) - free_aligned_buffer_16(ro180_u) - free_aligned_buffer_16(ro180_v) - - EXPECT_EQ(0, err); +TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate180_Odd) { + NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, + kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, NV12ToI420RotateNegHeight90) { - int y_err = 0, uv_err = 0; - - int yw = 1024; - int yh = 768; - int b = 128; - int uvw = (yw + 1) >> 1; - int uvh = (yh + 1) >> 1; - int i, j; - - int y_plane_size = (yw + b * 2) * (yh + b * 2); - int uv_plane_size = (uvw + b * 2) * (uvh + b * 2); - int o_uv_plane_size = (uvw * 2 + b * 2) * (uvh + b * 2); - - srandom(time(NULL)); - - align_buffer_16(orig_y, y_plane_size) - align_buffer_16(orig_uv, o_uv_plane_size) - align_buffer_16(roa_y, y_plane_size) - align_buffer_16(roa_u, uv_plane_size) - align_buffer_16(roa_v, uv_plane_size) - align_buffer_16(rob_y, y_plane_size) - align_buffer_16(rob_u, uv_plane_size) - align_buffer_16(rob_v, uv_plane_size) - align_buffer_16(roc_y, y_plane_size) - align_buffer_16(roc_u, uv_plane_size) - align_buffer_16(roc_v, uv_plane_size) - memset(orig_y, 0, y_plane_size); - memset(orig_uv, 0, o_uv_plane_size); - memset(roa_y, 0, y_plane_size); - memset(roa_u, 0, uv_plane_size); - memset(roa_v, 0, uv_plane_size); - memset(rob_y, 0, y_plane_size); - memset(rob_u, 0, uv_plane_size); - memset(rob_v, 0, uv_plane_size); - memset(roc_y, 0, y_plane_size); - memset(roc_u, 0, uv_plane_size); - memset(roc_v, 0, uv_plane_size); - - // fill image buffers with random data - for (i = b; i < (yh + b); ++i) { - for (j = b; j < (yw + b); ++j) { - orig_y[i * (yw + b * 2) + j] = random() & 0xff; - } - } - - for (i = b; i < (uvh + b); ++i) { - for (j = b; j < (uvw * 2 + b); j += 2) { - uint8 random_number = random() & 0x7f; - orig_uv[i * (uvw * 2 + b * 2) + j] = random_number; - orig_uv[i * (uvw * 2 + b * 2) + j + 1] = -random_number; - } - } - - int y_off_0 = b * (yw + b * 2) + b; - int uv_off_0 = b * (uvw + b * 2) + b; - int y_off_90 = b * (yh + b * 2) + b; - int uv_off_90 = b * (uvh + b * 2) + b; - - int y_st_0 = yw + b * 2; - int uv_st_0 = uvw + b * 2; - int y_st_90 = yh + b * 2; - int uv_st_90 = uvh + b * 2; - - NV12ToI420Rotate(orig_y+y_off_0, y_st_0, - orig_uv+y_off_0, y_st_0, - roa_y+y_off_90, y_st_90, - roa_u+uv_off_90, uv_st_90, - roa_v+uv_off_90, uv_st_90, - yw, -yh, - kRotateClockwise); - - I420Rotate(roa_y+y_off_90, y_st_90, - roa_u+uv_off_90, uv_st_90, - roa_v+uv_off_90, uv_st_90, - rob_y+y_off_0, y_st_0, - rob_u+uv_off_0, uv_st_0, - rob_v+uv_off_0, uv_st_0, - yh, -yw, - kRotateCounterClockwise); - - I420Rotate(rob_y+y_off_0, y_st_0, - rob_u+uv_off_0, uv_st_0, - rob_v+uv_off_0, uv_st_0, - roc_y+y_off_0, y_st_0, - roc_u+uv_off_0, uv_st_0, - roc_v+uv_off_0, uv_st_0, - yw, yh, - kRotate180); - - for (i = 0; i < y_plane_size; ++i) { - if (orig_y[i] != roc_y[i]) { - ++y_err; - } - } - - if (y_err) { - printf("input %dx%d \n", yw, yh); - PrintArray(orig_y, y_st_0, yh + b * 2); - - printf("rotate a\n"); - PrintArray(roa_y, y_st_90, y_st_0); - - printf("rotate b\n"); - PrintArray(rob_y, y_st_90, y_st_0); - - printf("rotate c\n"); - PrintArray(roc_y, y_st_0, y_st_90); - } - - int zero_cnt = 0; - - for (i = 0; i < uv_plane_size; ++i) { - if ((signed char)roc_u[i] != -(signed char)roc_v[i]) { - ++uv_err; - } - if (rob_u[i] != 0) { - ++zero_cnt; - } - } - - if (!zero_cnt) { - ++uv_err; - } - - if (uv_err) { - printf("input %dx%d \n", uvw * 2, uvh); - PrintArray(orig_uv, y_st_0, uvh + b * 2); - - printf("rotate a\n"); - PrintArray(roa_u, uv_st_90, uv_st_0); - PrintArray(roa_v, uv_st_90, uv_st_0); - - printf("rotate b\n"); - PrintArray(rob_u, uv_st_90, uv_st_0); - PrintArray(rob_v, uv_st_90, uv_st_0); - - printf("rotate c\n"); - PrintArray(roc_u, uv_st_0, uv_st_90); - PrintArray(roc_v, uv_st_0, uv_st_90); - } - - free_aligned_buffer_16(orig_y) - free_aligned_buffer_16(orig_uv) - free_aligned_buffer_16(roa_y) - free_aligned_buffer_16(roa_u) - free_aligned_buffer_16(roa_v) - free_aligned_buffer_16(rob_y) - free_aligned_buffer_16(rob_u) - free_aligned_buffer_16(rob_v) - free_aligned_buffer_16(roc_y) - free_aligned_buffer_16(roc_u) - free_aligned_buffer_16(roc_v) - - EXPECT_EQ(0, y_err + uv_err); +TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate270_Odd) { + NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, + kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); } -TEST_F(libyuvTest, NV12ToI420RotateNegHeight180) { - int y_err = 0, uv_err = 0; - - int yw = 1024; - int yh = 768; - int b = 128; - int uvw = (yw + 1) >> 1; - int uvh = (yh + 1) >> 1; - int i, j; - - int y_plane_size = (yw + b * 2) * (yh + b * 2); - int uv_plane_size = (uvw + b * 2) * (uvh + b * 2); - int o_uv_plane_size = (uvw * 2 + b * 2) * (uvh + b * 2); - - srandom(time(NULL)); - - align_buffer_16(orig_y, y_plane_size) - align_buffer_16(orig_uv, o_uv_plane_size) - align_buffer_16(roa_y, y_plane_size) - align_buffer_16(roa_u, uv_plane_size) - align_buffer_16(roa_v, uv_plane_size) - align_buffer_16(rob_y, y_plane_size) - align_buffer_16(rob_u, uv_plane_size) - align_buffer_16(rob_v, uv_plane_size) - memset(orig_y, 0, y_plane_size); - memset(orig_uv, 0, o_uv_plane_size); - memset(roa_y, 0, y_plane_size); - memset(roa_u, 0, uv_plane_size); - memset(roa_v, 0, uv_plane_size); - memset(rob_y, 0, y_plane_size); - memset(rob_u, 0, uv_plane_size); - memset(rob_v, 0, uv_plane_size); - - // fill image buffers with random data - for (i = b; i < (yh + b); ++i) { - for (j = b; j < (yw + b); ++j) { - orig_y[i * (yw + b * 2) + j] = random() & 0xff; - } - } - - for (i = b; i < (uvh + b); ++i) { - for (j = b; j < (uvw * 2 + b); j += 2) { - uint8 random_number = random() & 0x7f; - orig_uv[i * (uvw * 2 + b * 2) + j] = random_number; - orig_uv[i * (uvw * 2 + b * 2) + j + 1] = -random_number; - } - } - - int y_off = b * (yw + b * 2) + b; - int uv_off = b * (uvw + b * 2) + b; - - int y_st = yw + b * 2; - int uv_st = uvw + b * 2; - - NV12ToI420Rotate(orig_y+y_off, y_st, - orig_uv+y_off, y_st, - roa_y+y_off, y_st, - roa_u+uv_off, uv_st, - roa_v+uv_off, uv_st, - yw, -yh, - kRotate180); - - I420Rotate(roa_y+y_off, y_st, - roa_u+uv_off, uv_st, - roa_v+uv_off, uv_st, - rob_y+y_off, y_st, - rob_u+uv_off, uv_st, - rob_v+uv_off, uv_st, - yw, -yh, - kRotate180); - - for (i = 0; i < y_plane_size; ++i) { - if (orig_y[i] != rob_y[i]) - ++y_err; - } - - if (y_err) { - printf("input %dx%d \n", yw, yh); - PrintArray(orig_y, y_st, yh + b * 2); - - printf("rotate a\n"); - PrintArray(roa_y, y_st, yh + b * 2); - - printf("rotate b\n"); - PrintArray(rob_y, y_st, yh + b * 2); - } - - int zero_cnt = 0; +TEST_F(LibYUVRotateTest, NV12Rotate0_Invert) { + NV12TestRotate(benchmark_width_, -benchmark_height_, + benchmark_width_, benchmark_height_, + kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} - for (i = 0; i < uv_plane_size; ++i) { - if ((signed char)rob_u[i] != -(signed char)rob_v[i]) { - ++uv_err; - } - if (rob_u[i] != 0) { - ++zero_cnt; - } - } +TEST_F(LibYUVRotateTest, NV12Rotate90_Invert) { + NV12TestRotate(benchmark_width_, -benchmark_height_, + benchmark_height_, benchmark_width_, + kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} - if (!zero_cnt) { - ++uv_err; - } +TEST_F(LibYUVRotateTest, NV12Rotate180_Invert) { + NV12TestRotate(benchmark_width_, -benchmark_height_, + benchmark_width_, benchmark_height_, + kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} - if (uv_err) { - printf("input %dx%d \n", uvw * 2, uvh); - PrintArray(orig_uv, y_st, uvh + b * 2); +TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) { + NV12TestRotate(benchmark_width_, -benchmark_height_, + benchmark_height_, benchmark_width_, + kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} - printf("rotate a\n"); - PrintArray(roa_u, uv_st, uvh + b * 2); - PrintArray(roa_v, uv_st, uvh + b * 2); - printf("rotate b\n"); - PrintArray(rob_u, uv_st, uvh + b * 2); - PrintArray(rob_v, uv_st, uvh + b * 2); - } - free_aligned_buffer_16(orig_y) - free_aligned_buffer_16(orig_uv) - free_aligned_buffer_16(roa_y) - free_aligned_buffer_16(roa_u) - free_aligned_buffer_16(roa_v) - free_aligned_buffer_16(rob_y) - free_aligned_buffer_16(rob_u) - free_aligned_buffer_16(rob_v) - EXPECT_EQ(0, y_err + uv_err); -} } // namespace libyuv diff --git a/files/unit_test/scale_argb_test.cc b/files/unit_test/scale_argb_test.cc index fef96764..f99782f7 100644 --- a/files/unit_test/scale_argb_test.cc +++ b/files/unit_test/scale_argb_test.cc @@ -4,69 +4,80 @@ * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include <stdlib.h> #include <time.h> +#include "libyuv/convert_argb.h" #include "libyuv/cpu_id.h" #include "libyuv/scale_argb.h" +#include "libyuv/video_common.h" #include "../unit_test/unit_test.h" namespace libyuv { +#define STRINGIZE(line) #line +#define FILELINESTR(file, line) file ":" STRINGIZE(line) + +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. static int ARGBTestFilter(int src_width, int src_height, int dst_width, int dst_height, - FilterMode f, int benchmark_iterations) { - const int b = 128; - int src_argb_plane_size = (src_width + b * 2) * (src_height + b * 2) * 4; - int src_stride_argb = (b * 2 + src_width) * 4; + FilterMode f, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } - align_buffer_16(src_argb, src_argb_plane_size) - memset(src_argb, 1, src_argb_plane_size); + int i, j; + const int b = 0; // 128 to test for padding/stride. + int64 src_argb_plane_size = (Abs(src_width) + b * 2) * + (Abs(src_height) + b * 2) * 4LL; + int src_stride_argb = (b * 2 + Abs(src_width)) * 4; + + align_buffer_page_end(src_argb, src_argb_plane_size); + if (!src_argb) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_argb, src_argb_plane_size); - int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; + int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4LL; int dst_stride_argb = (b * 2 + dst_width) * 4; - srandom(time(NULL)); - - int i, j; - for (i = b; i < (src_height + b); ++i) { - for (j = b; j < (src_width + b) * 4; ++j) { - src_argb[(i * src_stride_argb) + j] = (random() & 0xff); - } + align_buffer_page_end(dst_argb_c, dst_argb_plane_size); + align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); + if (!dst_argb_c || !dst_argb_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; } - - align_buffer_16(dst_argb_c, dst_argb_plane_size) - align_buffer_16(dst_argb_opt, dst_argb_plane_size) memset(dst_argb_c, 2, dst_argb_plane_size); memset(dst_argb_opt, 3, dst_argb_plane_size); // Warm up both versions for consistent benchmarks. - MaskCpuFlags(0); // Disable all CPU optimization. + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, dst_width, dst_height, f); - MaskCpuFlags(-1); // Enable all CPU optimization. + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, src_width, src_height, dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, dst_width, dst_height, f); - MaskCpuFlags(0); // Disable all CPU optimization. + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. double c_time = get_time(); - for (i = 0; i < benchmark_iterations; ++i) { - ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, - src_width, src_height, - dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, - dst_width, dst_height, f); - } - c_time = (get_time() - c_time) / benchmark_iterations; + ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, + src_width, src_height, + dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, + dst_width, dst_height, f); + + c_time = (get_time() - c_time); - MaskCpuFlags(-1); // Enable all CPU optimization. + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. double opt_time = get_time(); for (i = 0; i < benchmark_iterations; ++i) { ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, @@ -78,7 +89,7 @@ static int ARGBTestFilter(int src_width, int src_height, // Report performance of C vs OPT printf("filter %d - %8d us C - %8d us OPT\n", - f, static_cast<int>(c_time*1e6), static_cast<int>(opt_time*1e6)); + f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); // C version may be a little off from the optimized. Order of // operations may introduce rounding somewhere. So do a difference @@ -87,7 +98,7 @@ static int ARGBTestFilter(int src_width, int src_height, int max_diff = 0; for (i = b; i < (dst_height + b); ++i) { for (j = b * 4; j < (dst_width + b) * 4; ++j) { - int abs_diff = abs(dst_argb_c[(i * dst_stride_argb) + j] - + int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - dst_argb_opt[(i * dst_stride_argb) + j]); if (abs_diff > max_diff) { max_diff = abs_diff; @@ -95,161 +106,357 @@ static int ARGBTestFilter(int src_width, int src_height, } } - free_aligned_buffer_16(dst_argb_c) - free_aligned_buffer_16(dst_argb_opt) - free_aligned_buffer_16(src_argb) + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + free_aligned_buffer_page_end(src_argb); return max_diff; } -TEST_F(libyuvTest, ARGBScaleDownBy2) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width / 2; - const int dst_height = src_height / 2; - - for (int f = 0; f < 2; ++f) { - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), - benchmark_iterations_); - EXPECT_LE(max_diff, 1); +static const int kTileX = 8; +static const int kTileY = 8; + +static int TileARGBScale(const uint8* src_argb, int src_stride_argb, + int src_width, int src_height, + uint8* dst_argb, int dst_stride_argb, + int dst_width, int dst_height, + FilterMode filtering) { + for (int y = 0; y < dst_height; y += kTileY) { + for (int x = 0; x < dst_width; x += kTileX) { + int clip_width = kTileX; + if (x + clip_width > dst_width) { + clip_width = dst_width - x; + } + int clip_height = kTileY; + if (y + clip_height > dst_height) { + clip_height = dst_height - y; + } + int r = ARGBScaleClip(src_argb, src_stride_argb, + src_width, src_height, + dst_argb, dst_stride_argb, + dst_width, dst_height, + x, y, clip_width, clip_height, filtering); + if (r) { + return r; + } + } } + return 0; } -TEST_F(libyuvTest, ARGBScaleDownBy4) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width / 4; - const int dst_height = src_height / 4; +static int ARGBClipTestFilter(int src_width, int src_height, + int dst_width, int dst_height, + FilterMode f, int benchmark_iterations) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } - for (int f = 0; f < 2; ++f) { - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + const int b = 128; + int64 src_argb_plane_size = (Abs(src_width) + b * 2) * + (Abs(src_height) + b * 2) * 4; + int src_stride_argb = (b * 2 + Abs(src_width)) * 4; + + align_buffer_page_end(src_argb, src_argb_plane_size); + if (!src_argb) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; } -} + memset(src_argb, 1, src_argb_plane_size); -TEST_F(libyuvTest, ARGBScaleDownBy5) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width / 5; - const int dst_height = src_height / 5; + int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; + int dst_stride_argb = (b * 2 + dst_width) * 4; - for (int f = 0; f < 2; ++f) { - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + int i, j; + for (i = b; i < (Abs(src_height) + b); ++i) { + for (j = b; j < (Abs(src_width) + b) * 4; ++j) { + src_argb[(i * src_stride_argb) + j] = (fastrand() & 0xff); + } } -} -TEST_F(libyuvTest, ARGBScaleDownBy8) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width / 8; - const int dst_height = src_height / 8; - - for (int f = 0; f < 2; ++f) { - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + align_buffer_page_end(dst_argb_c, dst_argb_plane_size); + align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); + if (!dst_argb_c || !dst_argb_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; } -} + memset(dst_argb_c, 2, dst_argb_plane_size); + memset(dst_argb_opt, 3, dst_argb_plane_size); -TEST_F(libyuvTest, ARGBScaleDownBy16) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width / 16; - const int dst_height = src_height / 16; + // Do full image, no clipping. + double c_time = get_time(); + ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, + src_width, src_height, + dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, + dst_width, dst_height, f); + c_time = (get_time() - c_time); - for (int f = 0; f < 2; ++f) { - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + // Do tiled image, clipping scale to a tile at a time. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + TileARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, + src_width, src_height, + dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, + dst_width, dst_height, f); } -} + opt_time = (get_time() - opt_time) / benchmark_iterations; -TEST_F(libyuvTest, ARGBScaleDownBy34) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width * 3 / 4; - const int dst_height = src_height * 3 / 4; + // Report performance of Full vs Tiled. + printf("filter %d - %8d us Full - %8d us Tiled\n", + f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); - for (int f = 0; f < 2; ++f) { - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + // Compare full scaled image vs tiled image. + int max_diff = 0; + for (i = b; i < (dst_height + b); ++i) { + for (j = b * 4; j < (dst_width + b) * 4; ++j) { + int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - + dst_argb_opt[(i * dst_stride_argb) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } } + + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + free_aligned_buffer_page_end(src_argb); + return max_diff; } -TEST_F(libyuvTest, ARGBScaleDownBy38) { - int src_width = 1280; - int src_height = 720; - int dst_width = src_width * 3 / 8; - int dst_height = src_height * 3 / 8; +// The following adjustments in dimensions ensure the scale factor will be +// exactly achieved. +#define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom) +#define SX(x, nom, denom) static_cast<int>((x / nom) * denom) + +#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ + TEST_F(LibYUVScaleTest, ARGBScaleDownBy##name##_##filter) { \ + int diff = ARGBTestFilter(SX(benchmark_width_, nom, denom), \ + SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), \ + DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, ARGBScaleDownClipBy##name##_##filter) { \ + int diff = ARGBClipTestFilter(SX(benchmark_width_, nom, denom), \ + SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), \ + DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ + } - for (int f = 0; f < 2; ++f) { - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), - benchmark_iterations_); - EXPECT_LE(max_diff, 1); - } -} +// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but +// filtering is different fixed point implementations for SSSE3, Neon and C. +#define TEST_FACTOR(name, nom, denom) \ + TEST_FACTOR1(name, None, nom, denom, 0) \ + TEST_FACTOR1(name, Linear, nom, denom, 3) \ + TEST_FACTOR1(name, Bilinear, nom, denom, 3) \ + TEST_FACTOR1(name, Box, nom, denom, 3) + +TEST_FACTOR(2, 1, 2) +TEST_FACTOR(4, 1, 4) +TEST_FACTOR(8, 1, 8) +TEST_FACTOR(3by4, 3, 4) +TEST_FACTOR(3by8, 3, 8) +TEST_FACTOR(3, 1, 3) +#undef TEST_FACTOR1 +#undef TEST_FACTOR +#undef SX +#undef DX + +#define TEST_SCALETO1(name, width, height, filter, max_diff) \ + TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \ + int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \ + width, height, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ + int diff = ARGBTestFilter(width, height, \ + Abs(benchmark_width_), Abs(benchmark_height_), \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, name##ClipTo##width##x##height##_##filter) { \ + int diff = ARGBClipTestFilter(benchmark_width_, benchmark_height_, \ + width, height, \ + kFilter##filter, benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, name##ClipFrom##width##x##height##_##filter) { \ + int diff = ARGBClipTestFilter(width, height, \ + Abs(benchmark_width_), \ + Abs(benchmark_height_), \ + kFilter##filter, benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ + } -TEST_F(libyuvTest, ARGBScaleTo1366) { - int src_width = 1280; - int src_height = 720; - int dst_width = 1366; - int dst_height = 768; +/// Test scale to a specified size with all 4 filters. +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(name, width, height, None, 0) \ + TEST_SCALETO1(name, width, height, Linear, 3) \ + TEST_SCALETO1(name, width, height, Bilinear, 3) + +TEST_SCALETO(ARGBScale, 1, 1) +TEST_SCALETO(ARGBScale, 320, 240) +TEST_SCALETO(ARGBScale, 352, 288) +TEST_SCALETO(ARGBScale, 569, 480) +TEST_SCALETO(ARGBScale, 640, 360) +TEST_SCALETO(ARGBScale, 1280, 720) +#undef TEST_SCALETO1 +#undef TEST_SCALETO + +// Scale with YUV conversion to ARGB and clipping. +LIBYUV_API +int YUVToARGBScaleReference2(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint32 src_fourcc, + int src_width, int src_height, + uint8* dst_argb, int dst_stride_argb, + uint32 dst_fourcc, + int dst_width, int dst_height, + int clip_x, int clip_y, + int clip_width, int clip_height, + enum FilterMode filtering) { + uint8* argb_buffer = static_cast<uint8*>(malloc(src_width * src_height * 4)); + int r; + I420ToARGB(src_y, src_stride_y, + src_u, src_stride_u, + src_v, src_stride_v, + argb_buffer, src_width * 4, + src_width, src_height); + + r = ARGBScaleClip(argb_buffer, src_width * 4, + src_width, src_height, + dst_argb, dst_stride_argb, + dst_width, dst_height, + clip_x, clip_y, clip_width, clip_height, + filtering); + free(argb_buffer); + return r; +} - for (int f = 0; f < 2; ++f) { - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), - benchmark_iterations_); - EXPECT_LE(max_diff, 1); +static void FillRamp(uint8* buf, int width, int height, int v, int dx, int dy) { + int rv = v; + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + *buf++ = v; + v += dx; + if (v < 0 || v > 255) { + dx = -dx; + v += dx; + } + } + v = rv + dy; + if (v < 0 || v > 255) { + dy = -dy; + v += dy; + } + rv = v; } } -TEST_F(libyuvTest, ARGBScaleTo4074) { - int src_width = 2880 * 2; - int src_height = 1800; - int dst_width = 4074; - int dst_height = 1272; +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int YUVToARGBTestFilter(int src_width, int src_height, + int dst_width, int dst_height, + FilterMode f, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info) { + int64 src_y_plane_size = Abs(src_width) * Abs(src_height); + int64 src_uv_plane_size = ((Abs(src_width) + 1) / 2) * + ((Abs(src_height) + 1) / 2); + int src_stride_y = Abs(src_width); + int src_stride_uv = (Abs(src_width) + 1) / 2; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + + int64 dst_argb_plane_size = (dst_width) * (dst_height) * 4LL; + int dst_stride_argb = (dst_width) * 4; + align_buffer_page_end(dst_argb_c, dst_argb_plane_size); + align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); + if (!dst_argb_c || !dst_argb_opt || !src_y || !src_u || !src_v) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + // Fill YUV image with continuous ramp, which is less sensitive to + // subsampling and filtering differences for test purposes. + FillRamp(src_y, Abs(src_width), Abs(src_height), 128, 1, 1); + FillRamp(src_u, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 3, 1, 1); + FillRamp(src_v, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 4, 1, 1); + memset(dst_argb_c, 2, dst_argb_plane_size); + memset(dst_argb_opt, 3, dst_argb_plane_size); - for (int f = 0; f < 2; ++f) { - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + YUVToARGBScaleReference2(src_y, src_stride_y, + src_u, src_stride_uv, + src_v, src_stride_uv, + libyuv::FOURCC_I420, + src_width, src_height, + dst_argb_c, dst_stride_argb, + libyuv::FOURCC_I420, + dst_width, dst_height, + 0, 0, dst_width, dst_height, + f); + + for (int i = 0; i < benchmark_iterations; ++i) { + YUVToARGBScaleClip(src_y, src_stride_y, + src_u, src_stride_uv, + src_v, src_stride_uv, + libyuv::FOURCC_I420, + src_width, src_height, + dst_argb_opt, dst_stride_argb, + libyuv::FOURCC_I420, + dst_width, dst_height, + 0, 0, dst_width, dst_height, + f); + } + int max_diff = 0; + for (int i = 0; i < dst_height; ++i) { + for (int j = 0; j < dst_width * 4; ++j) { + int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - + dst_argb_opt[(i * dst_stride_argb) + j]); + if (abs_diff > max_diff) { + printf("error %d at %d,%d c %d opt %d", + abs_diff, + j, i, + dst_argb_c[(i * dst_stride_argb) + j], + dst_argb_opt[(i * dst_stride_argb) + j]); + EXPECT_LE(abs_diff, 40); + max_diff = abs_diff; + } + } } -} + free_aligned_buffer_page_end(dst_argb_c); + free_aligned_buffer_page_end(dst_argb_opt); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + return max_diff; +} -TEST_F(libyuvTest, ARGBScaleTo853) { - int src_width = 1280; - int src_height = 720; - int dst_width = 853; - int dst_height = 480; +TEST_F(LibYUVScaleTest, YUVToRGBScaleUp) { + int diff = YUVToARGBTestFilter(benchmark_width_, benchmark_height_, + benchmark_width_ * 3 / 2, + benchmark_height_ * 3 / 2, + libyuv::kFilterBilinear, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); + EXPECT_LE(diff, 10); +} - for (int f = 0; f < 2; ++f) { - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), - benchmark_iterations_); - EXPECT_LE(max_diff, 1); - } +TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) { + int diff = YUVToARGBTestFilter(benchmark_width_ * 3 / 2, + benchmark_height_ * 3 / 2, + benchmark_width_, benchmark_height_, + libyuv::kFilterBilinear, + benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); + EXPECT_LE(diff, 10); } + } // namespace libyuv diff --git a/files/unit_test/scale_test.cc b/files/unit_test/scale_test.cc index 55b4148d..f40443e2 100644 --- a/files/unit_test/scale_test.cc +++ b/files/unit_test/scale_test.cc @@ -4,7 +4,7 @@ * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ @@ -15,59 +15,65 @@ #include "libyuv/scale.h" #include "../unit_test/unit_test.h" +#define STRINGIZE(line) #line +#define FILELINESTR(file, line) file ":" STRINGIZE(line) + namespace libyuv { +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. static int TestFilter(int src_width, int src_height, int dst_width, int dst_height, - FilterMode f, int rounding, int benchmark_iterations) { - const int b = 128 * rounding; - int src_width_uv = (src_width + rounding) >> 1; - int src_height_uv = (src_height + rounding) >> 1; + FilterMode f, int benchmark_iterations, + int disable_cpu_flags, int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i, j; + const int b = 0; // 128 to test for padding/stride. + int src_width_uv = (Abs(src_width) + 1) >> 1; + int src_height_uv = (Abs(src_height) + 1) >> 1; - int src_y_plane_size = (src_width + b * 2) * (src_height + b * 2); - int src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2); + int64 src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2); + int64 src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2); - int src_stride_y = b * 2 + src_width; + int src_stride_y = b * 2 + Abs(src_width); int src_stride_uv = b * 2 + src_width_uv; align_buffer_page_end(src_y, src_y_plane_size) align_buffer_page_end(src_u, src_uv_plane_size) align_buffer_page_end(src_v, src_uv_plane_size) + if (!src_y || !src_u || !src_v) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); - int dst_width_uv = (dst_width + rounding) >> 1; - int dst_height_uv = (dst_height + rounding) >> 1; + int dst_width_uv = (dst_width + 1) >> 1; + int dst_height_uv = (dst_height + 1) >> 1; - int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2); - int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2); + int64 dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2); + int64 dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2); int dst_stride_y = b * 2 + dst_width; int dst_stride_uv = b * 2 + dst_width_uv; - srandom(time(NULL)); - - int i, j; - for (i = b; i < (src_height + b); ++i) { - for (j = b; j < (src_width + b); ++j) { - src_y[(i * src_stride_y) + j] = (random() & 0xff); - } - } - - for (i = b; i < (src_height_uv + b); ++i) { - for (j = b; j < (src_width_uv + b); ++j) { - src_u[(i * src_stride_uv) + j] = (random() & 0xff); - src_v[(i * src_stride_uv) + j] = (random() & 0xff); - } - } - align_buffer_page_end(dst_y_c, dst_y_plane_size) align_buffer_page_end(dst_u_c, dst_uv_plane_size) align_buffer_page_end(dst_v_c, dst_uv_plane_size) align_buffer_page_end(dst_y_opt, dst_y_plane_size) align_buffer_page_end(dst_u_opt, dst_uv_plane_size) align_buffer_page_end(dst_v_opt, dst_uv_plane_size) + if (!dst_y_c || !dst_u_c || !dst_v_c || + !dst_y_opt|| !dst_u_opt|| !dst_v_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } - // Warm up both versions for consistent benchmarks. - MaskCpuFlags(0); // Disable all CPU optimization. + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + double c_time = get_time(); I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, src_u + (src_stride_uv * b) + b, src_stride_uv, src_v + (src_stride_uv * b) + b, src_stride_uv, @@ -76,31 +82,9 @@ static int TestFilter(int src_width, int src_height, dst_u_c + (dst_stride_uv * b) + b, dst_stride_uv, dst_v_c + (dst_stride_uv * b) + b, dst_stride_uv, dst_width, dst_height, f); - MaskCpuFlags(-1); // Enable all CPU optimization. - I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, - src_u + (src_stride_uv * b) + b, src_stride_uv, - src_v + (src_stride_uv * b) + b, src_stride_uv, - src_width, src_height, - dst_y_opt + (dst_stride_y * b) + b, dst_stride_y, - dst_u_opt + (dst_stride_uv * b) + b, dst_stride_uv, - dst_v_opt + (dst_stride_uv * b) + b, dst_stride_uv, - dst_width, dst_height, f); - - MaskCpuFlags(0); // Disable all CPU optimization. - double c_time = get_time(); - for (i = 0; i < benchmark_iterations; ++i) { - I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, - src_u + (src_stride_uv * b) + b, src_stride_uv, - src_v + (src_stride_uv * b) + b, src_stride_uv, - src_width, src_height, - dst_y_c + (dst_stride_y * b) + b, dst_stride_y, - dst_u_c + (dst_stride_uv * b) + b, dst_stride_uv, - dst_v_c + (dst_stride_uv * b) + b, dst_stride_uv, - dst_width, dst_height, f); - } - c_time = (get_time() - c_time) / benchmark_iterations; + c_time = (get_time() - c_time); - MaskCpuFlags(-1); // Enable all CPU optimization. + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. double opt_time = get_time(); for (i = 0; i < benchmark_iterations; ++i) { I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, @@ -113,10 +97,11 @@ static int TestFilter(int src_width, int src_height, dst_width, dst_height, f); } opt_time = (get_time() - opt_time) / benchmark_iterations; - // Report performance of C vs OPT printf("filter %d - %8d us C - %8d us OPT\n", - f, static_cast<int>(c_time*1e6), static_cast<int>(opt_time*1e6)); + f, + static_cast<int>(c_time * 1e6), + static_cast<int>(opt_time * 1e6)); // C version may be a little off from the optimized. Order of // operations may introduce rounding somewhere. So do a difference @@ -125,7 +110,7 @@ static int TestFilter(int src_width, int src_height, int max_diff = 0; for (i = b; i < (dst_height + b); ++i) { for (j = b; j < (dst_width + b); ++j) { - int abs_diff = abs(dst_y_c[(i * dst_stride_y) + j] - + int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] - dst_y_opt[(i * dst_stride_y) + j]); if (abs_diff > max_diff) { max_diff = abs_diff; @@ -135,12 +120,12 @@ static int TestFilter(int src_width, int src_height, for (i = b; i < (dst_height_uv + b); ++i) { for (j = b; j < (dst_width_uv + b); ++j) { - int abs_diff = abs(dst_u_c[(i * dst_stride_uv) + j] - + int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] - dst_u_opt[(i * dst_stride_uv) + j]); if (abs_diff > max_diff) { max_diff = abs_diff; } - abs_diff = abs(dst_v_c[(i * dst_stride_uv) + j] - + abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] - dst_v_opt[(i * dst_stride_uv) + j]); if (abs_diff > max_diff) { max_diff = abs_diff; @@ -162,215 +147,226 @@ static int TestFilter(int src_width, int src_height, return max_diff; } -TEST_F(libyuvTest, ScaleDownBy2) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width / 2; - const int dst_height = src_height / 2; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); +// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference. +// 0 = exact. +static int TestFilter_16(int src_width, int src_height, + int dst_width, int dst_height, + FilterMode f, int benchmark_iterations) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; } -} -TEST_F(libyuvTest, ScaleDownBy4) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width / 4; - const int dst_height = src_height / 4; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 2); // This is the only scale factor with error of 2. - } -} + int i, j; + const int b = 0; // 128 to test for padding/stride. + int src_width_uv = (Abs(src_width) + 1) >> 1; + int src_height_uv = (Abs(src_height) + 1) >> 1; -TEST_F(libyuvTest, ScaleDownBy5) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width / 5; - const int dst_height = src_height / 5; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); - } -} + int64 src_y_plane_size = (Abs(src_width) + b * 2) * + (Abs(src_height) + b * 2); + int64 src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2); -TEST_F(libyuvTest, ScaleDownBy8) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width / 8; - const int dst_height = src_height / 8; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); - } -} + int src_stride_y = b * 2 + Abs(src_width); + int src_stride_uv = b * 2 + src_width_uv; -TEST_F(libyuvTest, ScaleDownBy16) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width / 16; - const int dst_height = src_height / 16; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + align_buffer_page_end(src_y, src_y_plane_size) + align_buffer_page_end(src_u, src_uv_plane_size) + align_buffer_page_end(src_v, src_uv_plane_size) + align_buffer_page_end(src_y_16, src_y_plane_size * 2) + align_buffer_page_end(src_u_16, src_uv_plane_size * 2) + align_buffer_page_end(src_v_16, src_uv_plane_size * 2) + uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16); + uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16); + uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16); + + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + for (i = b; i < src_height + b; ++i) { + for (j = b; j < src_width + b; ++j) { + p_src_y_16[(i * src_stride_y) + j] = src_y[(i * src_stride_y) + j]; + } } -} -TEST_F(libyuvTest, ScaleDownBy34) { - const int src_width = 1280; - const int src_height = 720; - const int dst_width = src_width * 3 / 4; - const int dst_height = src_height * 3 / 4; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + for (i = b; i < (src_height_uv + b); ++i) { + for (j = b; j < (src_width_uv + b); ++j) { + p_src_u_16[(i * src_stride_uv) + j] = src_u[(i * src_stride_uv) + j]; + p_src_v_16[(i * src_stride_uv) + j] = src_v[(i * src_stride_uv) + j]; + } } -} -TEST_F(libyuvTest, ScaleDownBy38) { - int src_width = 1280; - int src_height = 720; - int dst_width = src_width * 3 / 8; - int dst_height = src_height * 3 / 8; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); - } -} + int dst_width_uv = (dst_width + 1) >> 1; + int dst_height_uv = (dst_height + 1) >> 1; -TEST_F(libyuvTest, ScaleTo1366) { - int src_width = 1280; - int src_height = 720; - int dst_width = 1366; - int dst_height = 768; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); - } -} + int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2); + int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2); -TEST_F(libyuvTest, ScaleTo4074) { - int src_width = 2880 * 2; - int src_height = 1800; - int dst_width = 4074; - int dst_height = 1272; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); - } -} + int dst_stride_y = b * 2 + dst_width; + int dst_stride_uv = b * 2 + dst_width_uv; -TEST_F(libyuvTest, ScaleTo853) { - int src_width = 1280; - int src_height = 720; - int dst_width = 853; - int dst_height = 480; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); - } -} + align_buffer_page_end(dst_y_8, dst_y_plane_size) + align_buffer_page_end(dst_u_8, dst_uv_plane_size) + align_buffer_page_end(dst_v_8, dst_uv_plane_size) + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2) + align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2) + align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2) -TEST_F(libyuvTest, ScaleTo853Wrong) { - int src_width = 1280; - int src_height = 720; - int dst_width = 853; - int dst_height = 480; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 0, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); - } -} + uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16); + uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16); + uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16); + + I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, + src_u + (src_stride_uv * b) + b, src_stride_uv, + src_v + (src_stride_uv * b) + b, src_stride_uv, + src_width, src_height, + dst_y_8 + (dst_stride_y * b) + b, dst_stride_y, + dst_u_8 + (dst_stride_uv * b) + b, dst_stride_uv, + dst_v_8 + (dst_stride_uv * b) + b, dst_stride_uv, + dst_width, dst_height, f); -// A one off test for a screen cast resolution scale. -TEST_F(libyuvTest, ScaleTo684) { - int src_width = 686; - int src_height = 557; - int dst_width = 684; - int dst_height = 552; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + for (i = 0; i < benchmark_iterations; ++i) { + I420Scale_16(p_src_y_16 + (src_stride_y * b) + b, src_stride_y, + p_src_u_16 + (src_stride_uv * b) + b, src_stride_uv, + p_src_v_16 + (src_stride_uv * b) + b, src_stride_uv, + src_width, src_height, + p_dst_y_16 + (dst_stride_y * b) + b, dst_stride_y, + p_dst_u_16 + (dst_stride_uv * b) + b, dst_stride_uv, + p_dst_v_16 + (dst_stride_uv * b) + b, dst_stride_uv, + dst_width, dst_height, f); } -} -TEST_F(libyuvTest, ScaleTo342) { - int src_width = 686; - int src_height = 557; - int dst_width = 342; - int dst_height = 276; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + // Expect an exact match + int max_diff = 0; + for (i = b; i < (dst_height + b); ++i) { + for (j = b; j < (dst_width + b); ++j) { + int abs_diff = Abs(dst_y_8[(i * dst_stride_y) + j] - + p_dst_y_16[(i * dst_stride_y) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } } -} -TEST_F(libyuvTest, ScaleToHalf342) { - int src_width = 684; - int src_height = 552; - int dst_width = 342; - int dst_height = 276; - - for (int f = 0; f < 3; ++f) { - int max_diff = TestFilter(src_width, src_height, - dst_width, dst_height, - static_cast<FilterMode>(f), 1, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + for (i = b; i < (dst_height_uv + b); ++i) { + for (j = b; j < (dst_width_uv + b); ++j) { + int abs_diff = Abs(dst_u_8[(i * dst_stride_uv) + j] - + p_dst_u_16[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_8[(i * dst_stride_uv) + j] - + p_dst_v_16[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } } + + free_aligned_buffer_page_end(dst_y_8) + free_aligned_buffer_page_end(dst_u_8) + free_aligned_buffer_page_end(dst_v_8) + free_aligned_buffer_page_end(dst_y_16) + free_aligned_buffer_page_end(dst_u_16) + free_aligned_buffer_page_end(dst_v_16) + + free_aligned_buffer_page_end(src_y) + free_aligned_buffer_page_end(src_u) + free_aligned_buffer_page_end(src_v) + free_aligned_buffer_page_end(src_y_16) + free_aligned_buffer_page_end(src_u_16) + free_aligned_buffer_page_end(src_v_16) + + return max_diff; } +// The following adjustments in dimensions ensure the scale factor will be +// exactly achieved. +// 2 is chroma subsample +#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2) +#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) + +#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ + TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \ + int diff = TestFilter(SX(benchmark_width_, nom, denom), \ + SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), \ + DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, DISABLED_ScaleDownBy##name##_##filter##_16) { \ + int diff = TestFilter_16(SX(benchmark_width_, nom, denom), \ + SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), \ + DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but +// filtering is different fixed point implementations for SSSE3, Neon and C. +#define TEST_FACTOR(name, nom, denom, boxdiff) \ + TEST_FACTOR1(name, None, nom, denom, 0) \ + TEST_FACTOR1(name, Linear, nom, denom, 3) \ + TEST_FACTOR1(name, Bilinear, nom, denom, 3) \ + TEST_FACTOR1(name, Box, nom, denom, boxdiff) + +TEST_FACTOR(2, 1, 2, 0) +TEST_FACTOR(4, 1, 4, 0) +TEST_FACTOR(8, 1, 8, 0) +TEST_FACTOR(3by4, 3, 4, 1) +TEST_FACTOR(3by8, 3, 8, 1) +TEST_FACTOR(3, 1, 3, 0) +#undef TEST_FACTOR1 +#undef TEST_FACTOR +#undef SX +#undef DX + +#define TEST_SCALETO1(name, width, height, filter, max_diff) \ + TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \ + int diff = TestFilter(benchmark_width_, benchmark_height_, \ + width, height, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ + int diff = TestFilter(width, height, \ + Abs(benchmark_width_), Abs(benchmark_height_), \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##name##To##width##x##height##_##filter##_16) { \ + int diff = TestFilter_16(benchmark_width_, benchmark_height_, \ + width, height, \ + kFilter##filter, benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##name##From##width##x##height##_##filter##_16) { \ + int diff = TestFilter_16(width, height, \ + Abs(benchmark_width_), Abs(benchmark_height_), \ + kFilter##filter, benchmark_iterations_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test scale to a specified size with all 4 filters. +#define TEST_SCALETO(name, width, height) \ + TEST_SCALETO1(name, width, height, None, 0) \ + TEST_SCALETO1(name, width, height, Linear, 0) \ + TEST_SCALETO1(name, width, height, Bilinear, 0) \ + TEST_SCALETO1(name, width, height, Box, 0) + +TEST_SCALETO(Scale, 1, 1) +TEST_SCALETO(Scale, 320, 240) +TEST_SCALETO(Scale, 352, 288) +TEST_SCALETO(Scale, 569, 480) +TEST_SCALETO(Scale, 640, 360) +TEST_SCALETO(Scale, 1280, 720) +#undef TEST_SCALETO1 +#undef TEST_SCALETO + } // namespace libyuv diff --git a/files/unit_test/testdata/juno.txt b/files/unit_test/testdata/juno.txt new file mode 100644 index 00000000..c275be74 --- /dev/null +++ b/files/unit_test/testdata/juno.txt @@ -0,0 +1,15 @@ +Processor : AArch64 Processor rev 0 (aarch64)
+processor : 0
+processor : 1
+processor : 2
+processor : 3
+processor : 4
+processor : 5
+Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
+CPU implementer : 0x41
+CPU architecture: AArch64
+CPU variant : 0x0
+CPU part : 0xd07
+CPU revision : 0
+
+Hardware : Juno
diff --git a/files/unit_test/unit_test.cc b/files/unit_test/unit_test.cc index 007c81f0..e75510fd 100644 --- a/files/unit_test/unit_test.cc +++ b/files/unit_test/unit_test.cc @@ -4,7 +4,7 @@ * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ @@ -14,20 +14,343 @@ #include <cstring> +#include "gflags/gflags.h" + // Change this to 1000 for benchmarking. // TODO(fbarchard): Add command line parsing to pass this as option. #define BENCHMARK_ITERATIONS 1 -libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128), - benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(1280), - benchmark_height_(720) { - const char* repeat = getenv("LIBYUV_REPEAT"); - if (repeat) { - benchmark_iterations_ = atoi(repeat); // NOLINT - } +unsigned int fastrand_seed = 0xfb; + +DEFINE_int32(libyuv_width, 0, "width of test image."); +DEFINE_int32(libyuv_height, 0, "height of test image."); +DEFINE_int32(libyuv_repeat, 0, "number of times to repeat test."); +DEFINE_int32(libyuv_flags, 0, + "cpu flags for reference code. 1 = C, -1 = SIMD"); +DEFINE_int32(libyuv_cpu_info, 0, + "cpu flags for benchmark code. 1 = C, -1 = SIMD"); + +// For quicker unittests, default is 128 x 72. But when benchmarking, +// default to 720p. Allow size to specify. +// Set flags to -1 for benchmarking to avoid slower C code. + +LibYUVConvertTest::LibYUVConvertTest() : + benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128), + benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (FLAGS_libyuv_repeat) { + benchmark_iterations_ = FLAGS_libyuv_repeat; + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (FLAGS_libyuv_width) { + benchmark_width_ = FLAGS_libyuv_width; + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (FLAGS_libyuv_height) { + benchmark_height_ = FLAGS_libyuv_height; + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_flags) { + disable_cpu_flags_ = FLAGS_libyuv_flags; + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_cpu_info) { + benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; + } + benchmark_pixels_div256_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 255.0) / 256.0); + benchmark_pixels_div1280_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0); +} + +LibYUVColorTest::LibYUVColorTest() : + benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128), + benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (FLAGS_libyuv_repeat) { + benchmark_iterations_ = FLAGS_libyuv_repeat; + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (FLAGS_libyuv_width) { + benchmark_width_ = FLAGS_libyuv_width; + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (FLAGS_libyuv_height) { + benchmark_height_ = FLAGS_libyuv_height; + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_flags) { + disable_cpu_flags_ = FLAGS_libyuv_flags; + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_cpu_info) { + benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; + } + benchmark_pixels_div256_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 255.0) / 256.0); + benchmark_pixels_div1280_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0); +} + +LibYUVScaleTest::LibYUVScaleTest() : + benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128), + benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (FLAGS_libyuv_repeat) { + benchmark_iterations_ = FLAGS_libyuv_repeat; + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (FLAGS_libyuv_width) { + benchmark_width_ = FLAGS_libyuv_width; + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (FLAGS_libyuv_height) { + benchmark_height_ = FLAGS_libyuv_height; + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_flags) { + disable_cpu_flags_ = FLAGS_libyuv_flags; + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_cpu_info) { + benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; + } + benchmark_pixels_div256_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 255.0) / 256.0); + benchmark_pixels_div1280_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0); +} + +LibYUVRotateTest::LibYUVRotateTest() : + benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128), + benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (FLAGS_libyuv_repeat) { + benchmark_iterations_ = FLAGS_libyuv_repeat; + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (FLAGS_libyuv_width) { + benchmark_width_ = FLAGS_libyuv_width; + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (FLAGS_libyuv_height) { + benchmark_height_ = FLAGS_libyuv_height; + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_flags) { + disable_cpu_flags_ = FLAGS_libyuv_flags; + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_cpu_info) { + benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; + } + benchmark_pixels_div256_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 255.0) / 256.0); + benchmark_pixels_div1280_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0); +} + +LibYUVPlanarTest::LibYUVPlanarTest() : + benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128), + benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (FLAGS_libyuv_repeat) { + benchmark_iterations_ = FLAGS_libyuv_repeat; + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (FLAGS_libyuv_width) { + benchmark_width_ = FLAGS_libyuv_width; + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (FLAGS_libyuv_height) { + benchmark_height_ = FLAGS_libyuv_height; + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_flags) { + disable_cpu_flags_ = FLAGS_libyuv_flags; + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_cpu_info) { + benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; + } + benchmark_pixels_div256_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 255.0) / 256.0); + benchmark_pixels_div1280_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0); +} + +LibYUVBaseTest::LibYUVBaseTest() : + benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128), + benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (FLAGS_libyuv_repeat) { + benchmark_iterations_ = FLAGS_libyuv_repeat; + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (FLAGS_libyuv_width) { + benchmark_width_ = FLAGS_libyuv_width; + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (FLAGS_libyuv_height) { + benchmark_height_ = FLAGS_libyuv_height; + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_flags) { + disable_cpu_flags_ = FLAGS_libyuv_flags; + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_cpu_info) { + benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; + } + benchmark_pixels_div256_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 255.0) / 256.0); + benchmark_pixels_div1280_ = static_cast<int>(( + static_cast<double>(Abs(benchmark_width_)) * + static_cast<double>(Abs(benchmark_height_)) * + static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0); } int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); + // AllowCommandLineParsing allows us to ignore flags passed on to us by + // Chromium build bots without having to explicitly disable them. + google::AllowCommandLineReparsing(); + google::ParseCommandLineFlags(&argc, &argv, true); return RUN_ALL_TESTS(); } diff --git a/files/unit_test/unit_test.h b/files/unit_test/unit_test.h index 62521e88..f2c4bef0 100644 --- a/files/unit_test/unit_test.h +++ b/files/unit_test/unit_test.h @@ -4,53 +4,85 @@ * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef UNIT_TEST_UNIT_TEST_H_ +#ifndef UNIT_TEST_UNIT_TEST_H_ // NOLINT #define UNIT_TEST_UNIT_TEST_H_ +#ifdef WIN32 +#include <windows.h> +#else +#include <sys/time.h> +#include <sys/resource.h> +#endif + #include <gtest/gtest.h> -#define align_buffer_16(var, size) \ - uint8* var; \ - uint8* var##_mem; \ - var##_mem = reinterpret_cast<uint8*>(malloc((size) + 15)); \ - var = reinterpret_cast<uint8*> \ - ((reinterpret_cast<intptr_t>(var##_mem) + 15) & ~15); +#include "libyuv/basic_types.h" -#define free_aligned_buffer_16(var) \ - free(var##_mem); \ - var = 0; +#ifndef SIMD_ALIGNED +#if defined(_MSC_VER) && !defined(__CLR_VER) +#define SIMD_ALIGNED(var) __declspec(align(16)) var +#elif defined(__GNUC__) && !defined(__pnacl__) +#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) +#else +#define SIMD_ALIGNED(var) var +#endif +#endif +static __inline int Abs(int v) { + return v >= 0 ? v : -v; +} + +#define OFFBY 0 + +// Scaling uses 16.16 fixed point to step thru the source image, so a +// maximum size of 32767.999 can be expressed. 32768 is valid because +// the step is 1 beyond the image but not used. +// Destination size is mainly constrained by valid scale step not the +// absolute size, so it may be possible to relax the destination size +// constraint. +// Source size is unconstrained for most specialized scalers. e.g. +// An image of 65536 scaled to half size would be valid. The test +// could be relaxed for special scale factors. +// If this test is removed, the scaling function should gracefully +// fail with a return code. The test could be changed to know that +// libyuv failed in a controlled way. + +static const int kMaxWidth = 32768; +static const int kMaxHeight = 32768; + +static inline bool SizeValid(int src_width, int src_height, + int dst_width, int dst_height) { + if (src_width > kMaxWidth || src_height > kMaxHeight || + dst_width > kMaxWidth || dst_height > kMaxHeight) { + printf("Warning - size too large to test. Skipping\n"); + return false; + } + return true; +} #define align_buffer_page_end(var, size) \ uint8* var; \ uint8* var##_mem; \ - var##_mem = reinterpret_cast<uint8*>(malloc(((size) + 4095) & ~4095)); \ - var = var##_mem + (-(size) & 4095); + var##_mem = reinterpret_cast<uint8*>(malloc(((size) + 4095 + 63) & ~4095)); \ + var = (uint8*)((intptr_t)(var##_mem + (((size) + 4095 + 63) & ~4095) - \ + (size)) & ~63); #define free_aligned_buffer_page_end(var) \ free(var##_mem); \ var = 0; #ifdef WIN32 -#include <windows.h> static inline double get_time() { LARGE_INTEGER t, f; QueryPerformanceCounter(&t); QueryPerformanceFrequency(&f); return static_cast<double>(t.QuadPart) / static_cast<double>(f.QuadPart); } - -#define random rand -#define srandom srand #else - -#include <sys/time.h> -#include <sys/resource.h> - static inline double get_time() { struct timeval t; struct timezone tzp; @@ -59,16 +91,109 @@ static inline double get_time() { } #endif -class libyuvTest : public ::testing::Test { +#ifndef SIMD_ALIGNED +#if defined(_MSC_VER) && !defined(__CLR_VER) +#define SIMD_ALIGNED(var) __declspec(align(16)) var +#elif defined(__GNUC__) && !defined(__pnacl__) +#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) +#else +#define SIMD_ALIGNED(var) var +#endif +#endif + +extern unsigned int fastrand_seed; +inline int fastrand() { + fastrand_seed = fastrand_seed * 214013u + 2531011u; + return static_cast<int>((fastrand_seed >> 16) & 0xffff); +} + +static inline void MemRandomize(uint8* dst, int64 len) { + int64 i; + for (i = 0; i < len - 1; i += 2) { + *reinterpret_cast<uint16*>(dst) = fastrand(); + dst += 2; + } + for (; i < len; ++i) { + *dst++ = fastrand(); + } +} + +class LibYUVColorTest : public ::testing::Test { + protected: + LibYUVColorTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +class LibYUVConvertTest : public ::testing::Test { + protected: + LibYUVConvertTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +class LibYUVScaleTest : public ::testing::Test { protected: - libyuvTest(); + LibYUVScaleTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; - const int rotate_max_w_; - const int rotate_max_h_; +class LibYUVRotateTest : public ::testing::Test { + protected: + LibYUVRotateTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +class LibYUVPlanarTest : public ::testing::Test { + protected: + LibYUVPlanarTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +class LibYUVBaseTest : public ::testing::Test { + protected: + LibYUVBaseTest(); - int benchmark_iterations_; - const int benchmark_width_; - const int benchmark_height_; + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. }; -#endif // UNIT_TEST_UNIT_TEST_H_ +#endif // UNIT_TEST_UNIT_TEST_H_ NOLINT diff --git a/files/unit_test/version_test.cc b/files/unit_test/version_test.cc deleted file mode 100644 index c53d754c..00000000 --- a/files/unit_test/version_test.cc +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <stdlib.h> -#include <string.h> - -#include "libyuv/basic_types.h" -#include "libyuv/version.h" -#include "../unit_test/unit_test.h" - -namespace libyuv { - -// Tests SVN version against include/libyuv/version.h -// SVN version is bumped by documentation changes as well as code. -// Although the versions should match, once checked in, a tolerance is allowed. -TEST_F(libyuvTest, TestVersion) { - EXPECT_GE(LIBYUV_VERSION, 169); // 169 is first version to support version. - printf("LIBYUV_VERSION %d\n", LIBYUV_VERSION); -#ifdef LIBYUV_SVNREVISION - const char *ver = strchr(LIBYUV_SVNREVISION, ':'); - if (ver) { - ++ver; - } else { - ver = LIBYUV_SVNREVISION; - } - int svn_revision = atoi(ver); // NOLINT - printf("LIBYUV_SVNREVISION %d\n", svn_revision); - EXPECT_NEAR(LIBYUV_VERSION, svn_revision, 3); // Allow version to be close. - if (LIBYUV_VERSION != svn_revision) { - printf("WARNING - Versions do not match.\n"); - } -#endif -} - -} // namespace libyuv diff --git a/files/unit_test/video_common_test.cc b/files/unit_test/video_common_test.cc new file mode 100644 index 00000000..ac97d0f3 --- /dev/null +++ b/files/unit_test/video_common_test.cc @@ -0,0 +1,107 @@ +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <string.h> + +#include "libyuv/video_common.h" +#include "../unit_test/unit_test.h" + +namespace libyuv { + +// Tests FourCC codes in video common, which are used for ConvertToI420(). + +static bool TestValidChar(uint32 onecc) { + if ((onecc >= '0' && onecc <= '9') || + (onecc >= 'A' && onecc <= 'Z') || + (onecc >= 'a' && onecc <= 'z') || + (onecc == ' ') || (onecc == 0xff)) { + return true; + } + return false; +} + +static bool TestValidFourCC(uint32 fourcc, int bpp) { + if (!TestValidChar(fourcc & 0xff) || + !TestValidChar((fourcc >> 8) & 0xff) || + !TestValidChar((fourcc >> 16) & 0xff) || + !TestValidChar((fourcc >> 24) & 0xff)) { + return false; + } + if (bpp < 0 || bpp > 32) { + return false; + } + return true; +} + +TEST_F(LibYUVBaseTest, TestCanonicalFourCC) { + EXPECT_EQ(FOURCC_I420, CanonicalFourCC(FOURCC_IYUV)); + EXPECT_EQ(FOURCC_I420, CanonicalFourCC(FOURCC_YU12)); + EXPECT_EQ(FOURCC_I422, CanonicalFourCC(FOURCC_YU16)); + EXPECT_EQ(FOURCC_I444, CanonicalFourCC(FOURCC_YU24)); + EXPECT_EQ(FOURCC_YUY2, CanonicalFourCC(FOURCC_YUYV)); + EXPECT_EQ(FOURCC_YUY2, CanonicalFourCC(FOURCC_YUVS)); + EXPECT_EQ(FOURCC_UYVY, CanonicalFourCC(FOURCC_HDYC)); + EXPECT_EQ(FOURCC_UYVY, CanonicalFourCC(FOURCC_2VUY)); + EXPECT_EQ(FOURCC_MJPG, CanonicalFourCC(FOURCC_JPEG)); + EXPECT_EQ(FOURCC_MJPG, CanonicalFourCC(FOURCC_DMB1)); + EXPECT_EQ(FOURCC_RAW, CanonicalFourCC(FOURCC_RGB3)); + EXPECT_EQ(FOURCC_24BG, CanonicalFourCC(FOURCC_BGR3)); + EXPECT_EQ(FOURCC_BGRA, CanonicalFourCC(FOURCC_CM32)); + EXPECT_EQ(FOURCC_RAW, CanonicalFourCC(FOURCC_CM24)); + EXPECT_EQ(FOURCC_RGBO, CanonicalFourCC(FOURCC_L555)); + EXPECT_EQ(FOURCC_RGBP, CanonicalFourCC(FOURCC_L565)); + EXPECT_EQ(FOURCC_RGBO, CanonicalFourCC(FOURCC_5551)); +} + +TEST_F(LibYUVBaseTest, TestFourCC) { + EXPECT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I422, FOURCC_BPP_I422)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I444, FOURCC_BPP_I444)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I411, FOURCC_BPP_I411)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I400, FOURCC_BPP_I400)); + EXPECT_TRUE(TestValidFourCC(FOURCC_NV21, FOURCC_BPP_NV21)); + EXPECT_TRUE(TestValidFourCC(FOURCC_NV12, FOURCC_BPP_NV12)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YUY2, FOURCC_BPP_YUY2)); + EXPECT_TRUE(TestValidFourCC(FOURCC_UYVY, FOURCC_BPP_UYVY)); + EXPECT_TRUE(TestValidFourCC(FOURCC_M420, FOURCC_BPP_M420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_Q420, FOURCC_BPP_Q420)); // deprecated. + EXPECT_TRUE(TestValidFourCC(FOURCC_ARGB, FOURCC_BPP_ARGB)); + EXPECT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA)); + EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR)); + EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGBP, FOURCC_BPP_RGBP)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO)); + EXPECT_TRUE(TestValidFourCC(FOURCC_R444, FOURCC_BPP_R444)); + EXPECT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YV24, FOURCC_BPP_YV24)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YU12, FOURCC_BPP_YU12)); + EXPECT_TRUE(TestValidFourCC(FOURCC_IYUV, FOURCC_BPP_IYUV)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YU16, FOURCC_BPP_YU16)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YU24, FOURCC_BPP_YU24)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YUYV, FOURCC_BPP_YUYV)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YUVS, FOURCC_BPP_YUVS)); + EXPECT_TRUE(TestValidFourCC(FOURCC_HDYC, FOURCC_BPP_HDYC)); + EXPECT_TRUE(TestValidFourCC(FOURCC_2VUY, FOURCC_BPP_2VUY)); + EXPECT_TRUE(TestValidFourCC(FOURCC_JPEG, FOURCC_BPP_JPEG)); + EXPECT_TRUE(TestValidFourCC(FOURCC_DMB1, FOURCC_BPP_DMB1)); + EXPECT_TRUE(TestValidFourCC(FOURCC_BA81, FOURCC_BPP_BA81)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGB3, FOURCC_BPP_RGB3)); + EXPECT_TRUE(TestValidFourCC(FOURCC_BGR3, FOURCC_BPP_BGR3)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H264, FOURCC_BPP_H264)); + EXPECT_TRUE(TestValidFourCC(FOURCC_ANY, FOURCC_BPP_ANY)); +} + +} // namespace libyuv |