diff options
Diffstat (limited to 'files/unit_test')
-rw-r--r-- | files/unit_test/basictypes_test.cc | 33 | ||||
-rw-r--r-- | files/unit_test/color_test.cc | 82 | ||||
-rw-r--r-- | files/unit_test/compare_test.cc | 292 | ||||
-rw-r--r-- | files/unit_test/convert_test.cc | 1979 | ||||
-rw-r--r-- | files/unit_test/cpu_test.cc | 82 | ||||
-rw-r--r-- | files/unit_test/cpu_thread_test.cc | 63 | ||||
-rw-r--r-- | files/unit_test/math_test.cc | 12 | ||||
-rw-r--r-- | files/unit_test/planar_test.cc | 957 | ||||
-rw-r--r-- | files/unit_test/rotate_test.cc | 117 | ||||
-rw-r--r-- | files/unit_test/scale_argb_test.cc | 44 | ||||
-rw-r--r-- | files/unit_test/scale_test.cc | 879 | ||||
-rw-r--r-- | files/unit_test/testdata/juno.txt | 30 | ||||
-rw-r--r-- | files/unit_test/testdata/test0.jpg | bin | 0 -> 421 bytes | |||
-rw-r--r-- | files/unit_test/testdata/test1.jpg | bin | 0 -> 735 bytes | |||
-rw-r--r-- | files/unit_test/testdata/test2.jpg | bin | 0 -> 685 bytes | |||
-rw-r--r-- | files/unit_test/testdata/test3.jpg | bin | 0 -> 704 bytes | |||
-rw-r--r-- | files/unit_test/testdata/test4.jpg | bin | 0 -> 701 bytes | |||
-rw-r--r-- | files/unit_test/unit_test.cc | 224 | ||||
-rw-r--r-- | files/unit_test/unit_test.h | 77 | ||||
-rw-r--r-- | files/unit_test/video_common_test.cc | 50 |
20 files changed, 4086 insertions, 835 deletions
diff --git a/files/unit_test/basictypes_test.cc b/files/unit_test/basictypes_test.cc index 89f7644d..9aaa2dcd 100644 --- a/files/unit_test/basictypes_test.cc +++ b/files/unit_test/basictypes_test.cc @@ -13,25 +13,15 @@ namespace libyuv { -TEST_F(LibYUVBaseTest, Endian) { - uint16 v16 = 0x1234u; - uint8 first_byte = *reinterpret_cast<uint8*>(&v16); -#if defined(LIBYUV_LITTLE_ENDIAN) - EXPECT_EQ(0x34u, first_byte); -#else - EXPECT_EQ(0x12u, first_byte); -#endif -} - TEST_F(LibYUVBaseTest, SizeOfTypes) { - int8 i8 = -1; - uint8 u8 = 1u; - int16 i16 = -1; - uint16 u16 = 1u; - int32 i32 = -1; - uint32 u32 = 1u; - int64 i64 = -1; - uint64 u64 = 1u; + int8_t i8 = -1; + uint8_t u8 = 1u; + int16_t i16 = -1; + uint16_t u16 = 1u; + int32_t i32 = -1; + uint32_t u32 = 1u; + int64_t i64 = -1; + uint64_t u64 = 1u; EXPECT_EQ(1u, sizeof(i8)); EXPECT_EQ(1u, sizeof(u8)); EXPECT_EQ(2u, sizeof(i16)); @@ -50,11 +40,4 @@ TEST_F(LibYUVBaseTest, SizeOfTypes) { EXPECT_LT(0u, u64); } -TEST_F(LibYUVBaseTest, SizeOfConstants) { - EXPECT_EQ(8u, sizeof(INT64_C(0))); - EXPECT_EQ(8u, sizeof(UINT64_C(0))); - EXPECT_EQ(8u, sizeof(INT64_C(0x1234567887654321))); - EXPECT_EQ(8u, sizeof(UINT64_C(0x8765432112345678))); -} - } // namespace libyuv diff --git a/files/unit_test/color_test.cc b/files/unit_test/color_test.cc index 0aa7a54a..4bb448d5 100644 --- a/files/unit_test/color_test.cc +++ b/files/unit_test/color_test.cc @@ -63,10 +63,10 @@ namespace libyuv { \ /* The test is overall for color conversion matrix being reversible, so */ \ /* this initializes the pixel with 2x2 blocks to eliminate subsampling. */ \ - uint8* p = orig_y; \ + uint8_t* p = orig_y; \ for (int y = 0; y < benchmark_height_ - HS1; y += HS) { \ for (int x = 0; x < benchmark_width_ - 1; x += 2) { \ - uint8 r = static_cast<uint8>(fastrand()); \ + uint8_t r = static_cast<uint8_t>(fastrand()); \ p[0] = r; \ p[1] = r; \ p[HN] = r; \ @@ -74,7 +74,7 @@ namespace libyuv { p += 2; \ } \ if (benchmark_width_ & 1) { \ - uint8 r = static_cast<uint8>(fastrand()); \ + uint8_t r = static_cast<uint8_t>(fastrand()); \ p[0] = r; \ p[HN] = r; \ p += 1; \ @@ -83,13 +83,13 @@ namespace libyuv { } \ if ((benchmark_height_ & 1) && HS == 2) { \ for (int x = 0; x < benchmark_width_ - 1; x += 2) { \ - uint8 r = static_cast<uint8>(fastrand()); \ + uint8_t r = static_cast<uint8_t>(fastrand()); \ p[0] = r; \ p[1] = r; \ p += 2; \ } \ if (benchmark_width_ & 1) { \ - uint8 r = static_cast<uint8>(fastrand()); \ + uint8_t r = static_cast<uint8_t>(fastrand()); \ p[0] = r; \ p += 1; \ } \ @@ -147,10 +147,10 @@ static void YUVToRGB(int y, int u, int v, int* r, int* g, int* b) { const int kPixels = kWidth * kHeight; const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); - SIMD_ALIGNED(uint8 orig_y[16]); - SIMD_ALIGNED(uint8 orig_u[8]); - SIMD_ALIGNED(uint8 orig_v[8]); - SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); memset(orig_y, y, kPixels); memset(orig_u, u, kHalfPixels); memset(orig_v, v, kHalfPixels); @@ -170,10 +170,10 @@ static void YUVJToRGB(int y, int u, int v, int* r, int* g, int* b) { const int kPixels = kWidth * kHeight; const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); - SIMD_ALIGNED(uint8 orig_y[16]); - SIMD_ALIGNED(uint8 orig_u[8]); - SIMD_ALIGNED(uint8 orig_v[8]); - SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); memset(orig_y, y, kPixels); memset(orig_u, u, kHalfPixels); memset(orig_v, v, kHalfPixels); @@ -192,8 +192,8 @@ static void YToRGB(int y, int* r, int* g, int* b) { const int kHeight = 1; const int kPixels = kWidth * kHeight; - SIMD_ALIGNED(uint8 orig_y[16]); - SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); memset(orig_y, y, kPixels); /* YUV converted to ARGB. */ @@ -209,8 +209,8 @@ static void YJToRGB(int y, int* r, int* g, int* b) { const int kHeight = 1; const int kPixels = kWidth * kHeight; - SIMD_ALIGNED(uint8 orig_y[16]); - SIMD_ALIGNED(uint8 orig_pixels[16 * 4]); + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); memset(orig_y, y, kPixels); /* YUV converted to ARGB. */ @@ -471,21 +471,22 @@ static void PrintHistogram(int rh[256], int gh[256], int bh[256]) { printf("\n"); } +// Step by 5 on inner loop goes from 0 to 255 inclusive. +// Set to 1 for better converage. 3, 5 or 17 for faster testing. +#define FASTSTEP 5 TEST_F(LibYUVColorTest, TestFullYUV) { - int rh[256] = - { - 0, - }, - gh[256] = - { - 0, - }, - bh[256] = { - 0, - }; + int rh[256] = { + 0, + }; + int gh[256] = { + 0, + }; + int bh[256] = { + 0, + }; for (int u = 0; u < 256; ++u) { for (int v = 0; v < 256; ++v) { - for (int y2 = 0; y2 < 256; ++y2) { + for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { int r0, g0, b0, r1, g1, b1; int y = RANDOM256(y2); YUVToRGBReference(y, u, v, &r0, &g0, &b0); @@ -503,20 +504,18 @@ TEST_F(LibYUVColorTest, TestFullYUV) { } TEST_F(LibYUVColorTest, TestFullYUVJ) { - int rh[256] = - { - 0, - }, - gh[256] = - { - 0, - }, - bh[256] = { - 0, - }; + int rh[256] = { + 0, + }; + int gh[256] = { + 0, + }; + int bh[256] = { + 0, + }; for (int u = 0; u < 256; ++u) { for (int v = 0; v < 256; ++v) { - for (int y2 = 0; y2 < 256; ++y2) { + for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { int r0, g0, b0, r1, g1, b1; int y = RANDOM256(y2); YUVJToRGBReference(y, u, v, &r0, &g0, &b0); @@ -532,6 +531,7 @@ TEST_F(LibYUVColorTest, TestFullYUVJ) { } PrintHistogram(rh, gh, bh); } +#undef FASTSTEP TEST_F(LibYUVColorTest, TestGreyYUVJ) { int r0, g0, b0, r1, g1, b1, r2, g2, b2; diff --git a/files/unit_test/compare_test.cc b/files/unit_test/compare_test.cc index 13f74705..136254e1 100644 --- a/files/unit_test/compare_test.cc +++ b/files/unit_test/compare_test.cc @@ -15,14 +15,17 @@ #include "../unit_test/unit_test.h" #include "libyuv/basic_types.h" #include "libyuv/compare.h" +#include "libyuv/compare_row.h" /* For HammingDistance_C */ #include "libyuv/cpu_id.h" #include "libyuv/video_common.h" namespace libyuv { // hash seed of 5381 recommended. -static uint32 ReferenceHashDjb2(const uint8* src, uint64 count, uint32 seed) { - uint32 hash = seed; +static uint32_t ReferenceHashDjb2(const uint8_t* src, + uint64_t count, + uint32_t seed) { + uint32_t hash = seed; if (count > 0) { do { hash = hash * 33 + *src++; @@ -31,7 +34,7 @@ static uint32 ReferenceHashDjb2(const uint8* src, uint64 count, uint32 seed) { return hash; } -TEST_F(LibYUVBaseTest, Djb2_Test) { +TEST_F(LibYUVCompareTest, Djb2_Test) { const int kMaxTest = benchmark_width_ * benchmark_height_; align_buffer_page_end(src_a, kMaxTest); align_buffer_page_end(src_b, kMaxTest); @@ -40,8 +43,8 @@ TEST_F(LibYUVBaseTest, Djb2_Test) { "The quick brown fox jumps over the lazy dog" " and feels as if he were in the seventh heaven of typography" " together with Hermann Zapf"; - uint32 foxhash = HashDjb2(reinterpret_cast<const uint8*>(fox), 131, 5381); - const uint32 kExpectedFoxHash = 2611006483u; + uint32_t foxhash = HashDjb2(reinterpret_cast<const uint8_t*>(fox), 131, 5381); + const uint32_t kExpectedFoxHash = 2611006483u; EXPECT_EQ(kExpectedFoxHash, foxhash); for (int i = 0; i < kMaxTest; ++i) { @@ -49,8 +52,8 @@ TEST_F(LibYUVBaseTest, Djb2_Test) { src_b[i] = (fastrand() & 0xff); } // Compare different buffers. Expect hash is different. - uint32 h1 = HashDjb2(src_a, kMaxTest, 5381); - uint32 h2 = HashDjb2(src_b, kMaxTest, 5381); + uint32_t h1 = HashDjb2(src_a, kMaxTest, 5381); + uint32_t h2 = HashDjb2(src_b, kMaxTest, 5381); EXPECT_NE(h1, h2); // Make last half same. Expect hash is different. @@ -116,15 +119,15 @@ TEST_F(LibYUVBaseTest, Djb2_Test) { free_aligned_buffer_page_end(src_b); } -TEST_F(LibYUVBaseTest, BenchmarkDjb2_Opt) { +TEST_F(LibYUVCompareTest, BenchmarkDjb2_Opt) { const int kMaxTest = benchmark_width_ * benchmark_height_; align_buffer_page_end(src_a, kMaxTest); for (int i = 0; i < kMaxTest; ++i) { src_a[i] = i; } - uint32 h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381); - uint32 h1; + uint32_t h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381); + uint32_t h1; for (int i = 0; i < benchmark_iterations_; ++i) { h1 = HashDjb2(src_a, kMaxTest, 5381); } @@ -132,14 +135,14 @@ TEST_F(LibYUVBaseTest, BenchmarkDjb2_Opt) { free_aligned_buffer_page_end(src_a); } -TEST_F(LibYUVBaseTest, BenchmarkDjb2_Unaligned) { +TEST_F(LibYUVCompareTest, BenchmarkDjb2_Unaligned) { const int kMaxTest = benchmark_width_ * benchmark_height_; align_buffer_page_end(src_a, kMaxTest + 1); for (int i = 0; i < kMaxTest; ++i) { src_a[i + 1] = i; } - uint32 h2 = ReferenceHashDjb2(src_a + 1, kMaxTest, 5381); - uint32 h1; + uint32_t h2 = ReferenceHashDjb2(src_a + 1, kMaxTest, 5381); + uint32_t h1; for (int i = 0; i < benchmark_iterations_; ++i) { h1 = HashDjb2(src_a + 1, kMaxTest, 5381); } @@ -147,8 +150,8 @@ TEST_F(LibYUVBaseTest, BenchmarkDjb2_Unaligned) { free_aligned_buffer_page_end(src_a); } -TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Opt) { - uint32 fourcc; +TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Opt) { + uint32_t fourcc; const int kMaxTest = benchmark_width_ * benchmark_height_ * 4; align_buffer_page_end(src_a, kMaxTest); for (int i = 0; i < kMaxTest; ++i) { @@ -158,12 +161,12 @@ TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Opt) { src_a[0] = 0; fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_BGRA), fourcc); + EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_BGRA), fourcc); src_a[0] = 255; src_a[3] = 0; fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_ARGB), fourcc); + EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_ARGB), fourcc); src_a[3] = 255; for (int i = 0; i < benchmark_iterations_; ++i) { @@ -175,8 +178,8 @@ TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Opt) { free_aligned_buffer_page_end(src_a); } -TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Unaligned) { - uint32 fourcc; +TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Unaligned) { + uint32_t fourcc; const int kMaxTest = benchmark_width_ * benchmark_height_ * 4 + 1; align_buffer_page_end(src_a, kMaxTest); for (int i = 1; i < kMaxTest; ++i) { @@ -186,12 +189,12 @@ TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Unaligned) { src_a[0 + 1] = 0; fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_BGRA), fourcc); + EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_BGRA), fourcc); src_a[0 + 1] = 255; src_a[3 + 1] = 0; fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_ARGB), fourcc); + EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_ARGB), fourcc); src_a[3 + 1] = 255; for (int i = 0; i < benchmark_iterations_; ++i) { @@ -202,7 +205,223 @@ TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Unaligned) { free_aligned_buffer_page_end(src_a); } -TEST_F(LibYUVBaseTest, BenchmarkSumSquareError_Opt) { + +TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) { + const int kMaxWidth = 4096 * 3; + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); + memset(src_a, 0, kMaxWidth); + memset(src_b, 0, kMaxWidth); + + // Test known value + memcpy(src_a, "test0123test4567", 16); + memcpy(src_b, "tick0123tock4567", 16); + uint32_t h1 = HammingDistance_C(src_a, src_b, 16); + EXPECT_EQ(16u, h1); + + // Test C vs OPT on random buffer + MemRandomize(src_a, kMaxWidth); + MemRandomize(src_b, kMaxWidth); + + uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth); + + int count = + benchmark_iterations_ * + ((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth); + for (int i = 0; i < count; ++i) { +#if defined(HAS_HAMMINGDISTANCE_NEON) + h1 = HammingDistance_NEON(src_a, src_b, kMaxWidth); +#elif defined(HAS_HAMMINGDISTANCE_AVX2) + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + if (has_avx2) { + h1 = HammingDistance_AVX2(src_a, src_b, kMaxWidth); + } else { + int has_sse42 = TestCpuFlag(kCpuHasSSE42); + if (has_sse42) { + h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth); + } else { + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + if (has_ssse3) { + h1 = HammingDistance_SSSE3(src_a, src_b, kMaxWidth); + } else { + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); + } + } + } +#elif defined(HAS_HAMMINGDISTANCE_SSE42) + int has_sse42 = TestCpuFlag(kCpuHasSSE42); + if (has_sse42) { + h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth); + } else { + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); + } +#else + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); +#endif + } + EXPECT_EQ(h0, h1); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_C) { + const int kMaxWidth = 4096 * 3; + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); + memset(src_a, 0, kMaxWidth); + memset(src_b, 0, kMaxWidth); + + // Test known value + memcpy(src_a, "test0123test4567", 16); + memcpy(src_b, "tick0123tock4567", 16); + uint32_t h1 = HammingDistance_C(src_a, src_b, 16); + EXPECT_EQ(16u, h1); + + // Test C vs OPT on random buffer + MemRandomize(src_a, kMaxWidth); + MemRandomize(src_b, kMaxWidth); + + uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth); + + int count = + benchmark_iterations_ * + ((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth); + for (int i = 0; i < count; ++i) { + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); + } + + EXPECT_EQ(h0, h1); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, BenchmarkHammingDistance) { + const int kMaxWidth = 4096 * 3; + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); + memset(src_a, 0, kMaxWidth); + memset(src_b, 0, kMaxWidth); + + memcpy(src_a, "test0123test4567", 16); + memcpy(src_b, "tick0123tock4567", 16); + uint64_t h1 = ComputeHammingDistance(src_a, src_b, 16); + EXPECT_EQ(16u, h1); + + // Test C vs OPT on random buffer + MemRandomize(src_a, kMaxWidth); + MemRandomize(src_b, kMaxWidth); + + uint32_t h0 = HammingDistance_C(src_a, src_b, kMaxWidth); + + int count = + benchmark_iterations_ * + ((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth); + for (int i = 0; i < count; ++i) { + h1 = ComputeHammingDistance(src_a, src_b, kMaxWidth); + } + + EXPECT_EQ(h0, h1); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +// Tests low levels match reference C for specified size. +// The opt implementations have size limitations +// For NEON the counters are 16 bit so the shorts overflow after 65536 bytes. +// So doing one less iteration of the loop is the maximum. +#if defined(HAS_HAMMINGDISTANCE_NEON) +static const int kMaxOptCount = 65536 - 32; // 65504 +#else +static const int kMaxOptCount = (1 << (32 - 3)) - 64; // 536870848 +#endif + +TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) { + uint32_t h1 = 0; + const int kMaxWidth = (benchmark_width_ * benchmark_height_ + 31) & ~31; + align_buffer_page_end(src_a, kMaxWidth); + align_buffer_page_end(src_b, kMaxWidth); + memset(src_a, 255u, kMaxWidth); + memset(src_b, 0u, kMaxWidth); + + uint64_t h0 = ComputeHammingDistance(src_a, src_b, kMaxWidth); + EXPECT_EQ(kMaxWidth * 8ULL, h0); + + for (int i = 0; i < benchmark_iterations_; ++i) { +#if defined(HAS_HAMMINGDISTANCE_NEON) + h1 = HammingDistance_NEON(src_a, src_b, kMaxWidth); +#elif defined(HAS_HAMMINGDISTANCE_AVX2) + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + if (has_avx2) { + h1 = HammingDistance_AVX2(src_a, src_b, kMaxWidth); + } else { + int has_sse42 = TestCpuFlag(kCpuHasSSE42); + if (has_sse42) { + h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth); + } else { + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + if (has_ssse3) { + h1 = HammingDistance_SSSE3(src_a, src_b, kMaxWidth); + } else { + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); + } + } + } +#elif defined(HAS_HAMMINGDISTANCE_SSE42) + int has_sse42 = TestCpuFlag(kCpuHasSSE42); + if (has_sse42) { + h1 = HammingDistance_SSE42(src_a, src_b, kMaxWidth); + } else { + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); + } +#else + h1 = HammingDistance_C(src_a, src_b, kMaxWidth); +#endif + } + + // A large count will cause the low level to potentially overflow so the + // result can not be expected to be correct. + // TODO(fbarchard): Consider expecting the low 16 bits to match. + if (kMaxWidth <= kMaxOptCount) { + EXPECT_EQ(kMaxWidth * 8U, h1); + } else { + if (kMaxWidth * 8ULL != static_cast<uint64_t>(h1)) { + printf( + "warning - HammingDistance_Opt %u does not match %llu " + "but length of %u is longer than guaranteed.\n", + h1, kMaxWidth * 8ULL, kMaxWidth); + } else { + printf( + "warning - HammingDistance_Opt %u matches but length of %u " + "is longer than guaranteed.\n", + h1, kMaxWidth); + } + } + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, TestHammingDistance) { + align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_); + align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); + memset(src_a, 255u, benchmark_width_ * benchmark_height_); + memset(src_b, 0, benchmark_width_ * benchmark_height_); + + uint64_t h1 = 0; + for (int i = 0; i < benchmark_iterations_; ++i) { + h1 = ComputeHammingDistance(src_a, src_b, + benchmark_width_ * benchmark_height_); + } + EXPECT_EQ(benchmark_width_ * benchmark_height_ * 8ULL, h1); + + free_aligned_buffer_page_end(src_a); + free_aligned_buffer_page_end(src_b); +} + +TEST_F(LibYUVCompareTest, BenchmarkSumSquareError_Opt) { const int kMaxWidth = 4096 * 3; align_buffer_page_end(src_a, kMaxWidth); align_buffer_page_end(src_b, kMaxWidth); @@ -211,7 +430,7 @@ TEST_F(LibYUVBaseTest, BenchmarkSumSquareError_Opt) { memcpy(src_a, "test0123test4567", 16); memcpy(src_b, "tick0123tock4567", 16); - uint64 h1 = ComputeSumSquareError(src_a, src_b, 16); + uint64_t h1 = ComputeSumSquareError(src_a, src_b, 16); EXPECT_EQ(790u, h1); for (int i = 0; i < kMaxWidth; ++i) { @@ -234,14 +453,14 @@ TEST_F(LibYUVBaseTest, BenchmarkSumSquareError_Opt) { free_aligned_buffer_page_end(src_b); } -TEST_F(LibYUVBaseTest, SumSquareError) { +TEST_F(LibYUVCompareTest, SumSquareError) { const int kMaxWidth = 4096 * 3; align_buffer_page_end(src_a, kMaxWidth); align_buffer_page_end(src_b, kMaxWidth); memset(src_a, 0, kMaxWidth); memset(src_b, 0, kMaxWidth); - uint64 err; + uint64_t err; err = ComputeSumSquareError(src_a, src_b, kMaxWidth); EXPECT_EQ(0u, err); @@ -263,10 +482,10 @@ TEST_F(LibYUVBaseTest, SumSquareError) { } MaskCpuFlags(disable_cpu_flags_); - uint64 c_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); + uint64_t c_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); MaskCpuFlags(benchmark_cpu_info_); - uint64 opt_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); + uint64_t opt_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); EXPECT_EQ(c_err, opt_err); @@ -274,7 +493,7 @@ TEST_F(LibYUVBaseTest, SumSquareError) { free_aligned_buffer_page_end(src_b); } -TEST_F(LibYUVBaseTest, BenchmarkPsnr_Opt) { +TEST_F(LibYUVCompareTest, BenchmarkPsnr_Opt) { align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_); align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { @@ -285,9 +504,10 @@ TEST_F(LibYUVBaseTest, BenchmarkPsnr_Opt) { MaskCpuFlags(benchmark_cpu_info_); double opt_time = get_time(); - for (int i = 0; i < benchmark_iterations_; ++i) + for (int i = 0; i < benchmark_iterations_; ++i) { CalcFramePsnr(src_a, benchmark_width_, src_b, benchmark_width_, benchmark_width_, benchmark_height_); + } opt_time = (get_time() - opt_time) / benchmark_iterations_; printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6); @@ -298,7 +518,7 @@ TEST_F(LibYUVBaseTest, BenchmarkPsnr_Opt) { free_aligned_buffer_page_end(src_b); } -TEST_F(LibYUVBaseTest, BenchmarkPsnr_Unaligned) { +TEST_F(LibYUVCompareTest, BenchmarkPsnr_Unaligned) { align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_ + 1); align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { @@ -309,9 +529,10 @@ TEST_F(LibYUVBaseTest, BenchmarkPsnr_Unaligned) { MaskCpuFlags(benchmark_cpu_info_); double opt_time = get_time(); - for (int i = 0; i < benchmark_iterations_; ++i) + for (int i = 0; i < benchmark_iterations_; ++i) { CalcFramePsnr(src_a + 1, benchmark_width_, src_b, benchmark_width_, benchmark_width_, benchmark_height_); + } opt_time = (get_time() - opt_time) / benchmark_iterations_; printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6); @@ -322,7 +543,7 @@ TEST_F(LibYUVBaseTest, BenchmarkPsnr_Unaligned) { free_aligned_buffer_page_end(src_b); } -TEST_F(LibYUVBaseTest, Psnr) { +TEST_F(LibYUVCompareTest, Psnr) { const int kSrcWidth = benchmark_width_; const int kSrcHeight = benchmark_height_; const int b = 128; @@ -399,7 +620,7 @@ TEST_F(LibYUVBaseTest, Psnr) { free_aligned_buffer_page_end(src_b); } -TEST_F(LibYUVBaseTest, DISABLED_BenchmarkSsim_Opt) { +TEST_F(LibYUVCompareTest, DISABLED_BenchmarkSsim_Opt) { align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_); align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { @@ -410,9 +631,10 @@ TEST_F(LibYUVBaseTest, DISABLED_BenchmarkSsim_Opt) { MaskCpuFlags(benchmark_cpu_info_); double opt_time = get_time(); - for (int i = 0; i < benchmark_iterations_; ++i) + for (int i = 0; i < benchmark_iterations_; ++i) { CalcFrameSsim(src_a, benchmark_width_, src_b, benchmark_width_, benchmark_width_, benchmark_height_); + } opt_time = (get_time() - opt_time) / benchmark_iterations_; printf("BenchmarkSsim_Opt - %8.2f us opt\n", opt_time * 1e6); @@ -423,7 +645,7 @@ TEST_F(LibYUVBaseTest, DISABLED_BenchmarkSsim_Opt) { free_aligned_buffer_page_end(src_b); } -TEST_F(LibYUVBaseTest, Ssim) { +TEST_F(LibYUVCompareTest, Ssim) { const int kSrcWidth = benchmark_width_; const int kSrcHeight = benchmark_height_; const int b = 128; diff --git a/files/unit_test/convert_test.cc b/files/unit_test/convert_test.cc index 3e2eea85..32a4cd1c 100644 --- a/files/unit_test/convert_test.cc +++ b/files/unit_test/convert_test.cc @@ -8,9 +8,12 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <assert.h> #include <stdlib.h> #include <time.h> +#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */ + #include "libyuv/basic_types.h" #include "libyuv/compare.h" #include "libyuv/convert.h" @@ -26,102 +29,91 @@ #include "libyuv/rotate.h" #include "libyuv/video_common.h" +#if defined(__arm__) || defined(__aarch64__) +// arm version subsamples by summing 4 pixels then multiplying by matrix with +// 4x smaller coefficients which are rounded to nearest integer. +#define ARM_YUV_ERROR 4 +#else +#define ARM_YUV_ERROR 0 +#endif + namespace libyuv { +// Alias to copy pixels as is +#define AR30ToAR30 ARGBCopy +#define ABGRToABGR ARGBCopy + #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) -#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ +// Planar test + +#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "DST SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "DST SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST DST_SUBSAMP_Y unsupported"); \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kHeight = benchmark_height_; \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ - OFF); \ - align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ - OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ - src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ - (fastrand() & 0xff); \ - src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ - (fastrand() & 0xff); \ - } \ - } \ - memset(dst_y_c, 1, kWidth* kHeight); \ - memset(dst_u_c, 2, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_c, 3, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth* kHeight); \ - memset(dst_u_opt, 102, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_opt, 103, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_u, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_v, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ + MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ MaskCpuFlags(disable_cpu_flags_); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \ - dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ + reinterpret_cast<SRC_T*>(src_y + OFF), kWidth, \ + reinterpret_cast<SRC_T*>(src_u + OFF), kSrcHalfWidth, \ + reinterpret_cast<SRC_T*>(src_v + OFF), kSrcHalfWidth, \ + reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ + reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \ + reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \ + NEG kHeight); \ MaskCpuFlags(benchmark_cpu_info_); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \ - dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_opt, \ - SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ - static_cast<int>(dst_y_opt[i * kWidth + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ + reinterpret_cast<SRC_T*>(src_y + OFF), kWidth, \ + reinterpret_cast<SRC_T*>(src_u + OFF), kSrcHalfWidth, \ + reinterpret_cast<SRC_T*>(src_v + OFF), kSrcHalfWidth, \ + reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ + reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \ + reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \ + NEG kHeight); \ } \ - EXPECT_EQ(0, max_diff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = abs( \ - static_cast<int>(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast<int>( \ - dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ + for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ } \ - EXPECT_LE(max_diff, 3); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = abs( \ - static_cast<int>(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast<int>( \ - dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ + for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ } \ - EXPECT_LE(max_diff, 3); \ free_aligned_buffer_page_end(dst_y_c); \ free_aligned_buffer_page_end(dst_u_c); \ free_aligned_buffer_page_end(dst_v_c); \ @@ -133,25 +125,36 @@ namespace libyuv { free_aligned_buffer_page_end(src_v); \ } -#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) - -TESTPLANARTOP(I420, 2, 2, I420, 2, 2) -TESTPLANARTOP(I422, 2, 1, I420, 2, 2) -TESTPLANARTOP(I444, 1, 1, I420, 2, 2) -TESTPLANARTOP(I420, 2, 2, I422, 2, 1) -TESTPLANARTOP(I420, 2, 2, I444, 1, 1) -TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2) -TESTPLANARTOP(I422, 2, 1, I422, 2, 1) -TESTPLANARTOP(I444, 1, 1, I444, 1, 1) +#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2) +TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1) +TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2) +TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2) +TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2) +TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2) // Test Android 420 to I420 #define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \ @@ -175,8 +178,8 @@ TESTPLANARTOP(I444, 1, 1, I444, 1, 1) SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - uint8* src_u = src_uv + OFF_U; \ - uint8* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ + uint8_t* src_u = src_uv + OFF_U; \ + uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \ for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < kWidth; ++j) \ @@ -278,6 +281,23 @@ TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2) TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2) TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) +// wrapper to keep API the same +int I400ToNV21(const uint8_t* src_y, + int src_stride_y, + const uint8_t* /* src_u */, + int /* src_stride_u */, + const uint8_t* /* src_v */, + int /* src_stride_v */, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + return I400ToNV21(src_y, src_stride_y, dst_y, dst_stride_y, dst_vu, + dst_stride_vu, width, height); +} + #define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ @@ -291,10 +311,10 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ OFF); \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < kWidth; ++j) \ @@ -309,21 +329,21 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) } \ memset(dst_y_c, 1, kWidth* kHeight); \ memset(dst_uv_c, 2, \ - SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ memset(dst_y_opt, 101, kWidth* kHeight); \ memset(dst_uv_opt, 102, \ - SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ MaskCpuFlags(disable_cpu_flags_); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \ - dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \ + dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ MaskCpuFlags(benchmark_cpu_info_); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \ - dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \ + dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ } \ int max_diff = 0; \ for (int i = 0; i < kHeight; ++i) { \ @@ -337,12 +357,12 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) } \ EXPECT_LE(max_diff, 1); \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ int abs_diff = \ abs(static_cast<int>( \ - dst_uv_c[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \ + dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \ static_cast<int>( \ - dst_uv_opt[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \ + dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \ if (abs_diff > max_diff) { \ max_diff = abs_diff; \ } \ @@ -371,6 +391,102 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) TESTPLANARTOBP(I420, 2, 2, NV12, 2, 2) TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2) +TESTPLANARTOBP(I422, 2, 1, NV21, 2, 2) +TESTPLANARTOBP(I444, 1, 1, NV21, 2, 2) +TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2) + +#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, \ + OFF) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_uv, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2 * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 0 + OFF] = \ + (fastrand() & 0xff); \ + src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 1 + OFF] = \ + (fastrand() & 0xff); \ + } \ + } \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_uv_c, 2, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_uv_opt, 102, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_uv + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_c, kWidth, dst_uv_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_uv + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_opt, kWidth, dst_uv_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ + static_cast<int>(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ + int abs_diff = \ + abs(static_cast<int>( \ + dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \ + static_cast<int>( \ + dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width, _Unaligned, +, 1) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) + +// TODO(fbarchard): Fix msan on this unittest +// TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2) #define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \ @@ -491,108 +607,102 @@ TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2) #define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) -#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_u, kSizeUV + OFF); \ - align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y[i + OFF] = (fastrand() & 0xff); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - src_u[i + OFF] = (fastrand() & 0xff); \ - src_v[i + OFF] = (fastrand() & 0xff); \ - } \ - memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ - memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, dst_argb_c + OFF, kStrideB, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, dst_argb_opt + OFF, \ - kStrideB, kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ - align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight); \ - align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight); \ - memset(dst_argb32_c, 2, kWidth* BPP_C* kHeight); \ - memset(dst_argb32_opt, 102, kWidth* BPP_C* kHeight); \ - FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, dst_argb32_c, kWidth * BPP_C, \ - kWidth, kHeight); \ - FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt, \ - kWidth * BPP_C, kWidth, kHeight); \ - for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ - int abs_diff = abs(static_cast<int>(dst_argb32_c[i]) - \ - static_cast<int>(dst_argb32_opt[i])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - EXPECT_LE(max_diff, DIFF); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - free_aligned_buffer_page_end(dst_argb32_c); \ - free_aligned_buffer_page_end(dst_argb32_opt); \ +#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + double time0 = get_time(); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_c + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + double time1 = get_time(); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, dst_argb_opt + OFF, \ + kStrideB, kWidth, NEG kHeight); \ + } \ + double time2 = get_time(); \ + printf(" %8d us C - %8d us OPT\n", \ + static_cast<int>((time1 - time0) * 1e6), \ + static_cast<int>((time2 - time1) * 1e6 / benchmark_iterations_)); \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ } -#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, DIFF, FMT_C, BPP_C) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, BPP_C) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, \ - BPP_C) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C) - -TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(J420, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(J420, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1, 9, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1, 17, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1, 9, ARGB, 4) -TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1, 1, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1, 1, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1, 0, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1, 0, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4) -TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1, 0, ARGB, 4) +#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ - 4, _Any, +, 0) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Unaligned, +, 1) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Invert, -, 0) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0) + +TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(J420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(J420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1) +TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1) +TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1) +TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1) +TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1) +TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1) +TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1) +TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1) +TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1) +TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1) +TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) +TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) +TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) +TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) +TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1) +TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1) +TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) #define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, W1280, DIFF, N, NEG, OFF, ATTEN) \ @@ -663,8 +773,8 @@ TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1, 0, ARGB, 4) TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2) TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2) -#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - W1280, DIFF, N, NEG, OFF) \ +#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \ + BPP_B, W1280, DIFF, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kHeight = benchmark_height_; \ @@ -699,9 +809,9 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2) align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \ memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \ memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \ - FMT_B##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \ + FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \ kHeight); \ - FMT_B##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \ + FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \ kHeight); \ int max_diff = 0; \ for (int i = 0; i < kHeight; ++i) { \ @@ -723,21 +833,27 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2) free_aligned_buffer_page_end(dst_argb32_opt); \ } -#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_ - 4, DIFF, _Any, +, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_, DIFF, _Unaligned, +, 1) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_, DIFF, _Invert, -, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ +#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + DIFF) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_ - 4, DIFF, _Any, +, 0) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, DIFF, _Unaligned, +, 1) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, DIFF, _Invert, -, 0) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ benchmark_width_, DIFF, _Opt, +, 0) -TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2) -TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2) -TESTBIPLANARTOB(NV12, 2, 2, ABGR, 4, 2) -TESTBIPLANARTOB(NV21, 2, 2, ABGR, 4, 2) -TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) +TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4, 2) +TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4, 2) +TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4, 2) +TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4, 2) +TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2) +TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2) +TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2) +TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2) +TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9) +TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2) #ifdef DO_THREE_PLANES // Do 3 allocations for yuv. conventional but slower. @@ -864,33 +980,27 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ benchmark_width_, DIFF, _Opt, +, 0) +TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4) -#if defined(__arm__) || defined(__aarch64__) -// arm version subsamples by summing 4 pixels then multiplying by matrix with -// 4x smaller coefficients which are rounded to nearest integer. -TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 4) -TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, 4) -#else -TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0) -TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, 0) -#endif +TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2) +TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2) +TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR) +TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR) +TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15) +TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4) -TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4) -TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) +TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) +TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2) TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4) TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4) +// TODO(fbarchard): Investigate J420 error of 11 on Windows. +TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 11) TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) -// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9. -TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15) -TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) -TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2) -TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2) -TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2) +TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2) -TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2) TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2) -TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) -TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2) +TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2) +TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2) #define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \ SUBSAMP_Y, W1280, N, NEG, OFF) \ @@ -964,6 +1074,8 @@ TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2) TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2) TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2) TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) +TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2) +TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2) #define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B, W1280, DIFF, N, NEG, OFF) \ @@ -1032,15 +1144,9 @@ TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) MaskCpuFlags(benchmark_cpu_info_); \ FMT_A##To##FMT_B(src_argb, kStrideA, dst_argb_opt, kStrideB, kWidth, \ kHeight); \ - int max_diff = 0; \ for (int i = 0; i < kStrideB * kHeightB; ++i) { \ - int abs_diff = abs(static_cast<int>(dst_argb_c[i]) - \ - static_cast<int>(dst_argb_opt[i])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ + EXPECT_NEAR(dst_argb_c[i], dst_argb_opt[i], DIFF); \ } \ - EXPECT_LE(max_diff, DIFF); \ free_aligned_buffer_page_end(src_argb); \ free_aligned_buffer_page_end(dst_argb_c); \ free_aligned_buffer_page_end(dst_argb_opt); \ @@ -1060,37 +1166,47 @@ TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B, DIFF) +// TODO(fbarchard): make ARM version of C code that matches NEON. +TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0) +TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2) +TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2) TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) -TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) +TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4) -TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2) -TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2) +TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) +TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0) +TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0) +TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0) TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0) TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0) TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0) TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, 4) -TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, 4) +TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) +TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0) -TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0) -TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0) -TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0) -TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0) -TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0) #define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B, W1280, DIFF, N, NEG, OFF) \ @@ -1240,8 +1356,8 @@ TESTSYM(BGRAToARGB, 4, 4, 1) TESTSYM(ABGRToARGB, 4, 4, 1) TEST_F(LibYUVConvertTest, Test565) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); - SIMD_ALIGNED(uint8 pixels565[256][2]); + SIMD_ALIGNED(uint8_t orig_pixels[256][4]); + SIMD_ALIGNED(uint8_t pixels565[256][2]); for (int i = 0; i < 256; ++i) { for (int j = 0; j < 4; ++j) { @@ -1249,7 +1365,7 @@ TEST_F(LibYUVConvertTest, Test565) { } } ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1); - uint32 checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); + uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); EXPECT_EQ(610919429u, checksum); } @@ -1353,7 +1469,7 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) { TEST_F(LibYUVConvertTest, FuzzJpeg) { // SOI but no EOI. Expect fail. for (int times = 0; times < benchmark_iterations_; ++times) { - const int kSize = fastrand() % 5000 + 2; + const int kSize = fastrand() % 5000 + 3; align_buffer_page_end(orig_pixels, kSize); MemRandomize(orig_pixels, kSize); @@ -1362,79 +1478,619 @@ TEST_F(LibYUVConvertTest, FuzzJpeg) { orig_pixels[1] = 0xd8; // SOI. orig_pixels[2] = 0xff; orig_pixels[kSize - 1] = 0xff; - ValidateJpeg(orig_pixels, kSize); // Failure normally expected. + ValidateJpeg(orig_pixels, + kSize); // Failure normally expected. free_aligned_buffer_page_end(orig_pixels); } } -TEST_F(LibYUVConvertTest, MJPGToI420) { - const int kOff = 10; - const int kMinJpeg = 64; - const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg - ? benchmark_width_ * benchmark_height_ - : kMinJpeg; - const int kSize = kImageSize + kOff; - align_buffer_page_end(orig_pixels, kSize); - align_buffer_page_end(dst_y_opt, benchmark_width_ * benchmark_height_); - align_buffer_page_end(dst_u_opt, SUBSAMPLE(benchmark_width_, 2) * - SUBSAMPLE(benchmark_height_, 2)); - align_buffer_page_end(dst_v_opt, SUBSAMPLE(benchmark_width_, 2) * - SUBSAMPLE(benchmark_height_, 2)); +// Test data created in GIMP. In export jpeg, disable +// thumbnails etc, choose a subsampling, and use low quality +// (50) to keep size small. Generated with xxd -i test.jpg +// test 0 is J400 +static const uint8_t kTest0Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xc2, 0x00, 0x0b, 0x08, 0x00, 0x10, + 0x00, 0x20, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x01, + 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x43, 0x7e, 0xa7, 0x97, 0x57, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0xc0, 0x6f, 0x66, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, + 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, + 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, + 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x21, 0x65, 0x6e, 0x31, 0x86, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, + 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, + 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x10, 0x35, 0xff, 0xc4, 0x00, 0x1f, 0x10, + 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, + 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x10, 0x0b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x88, 0xab, 0x8b, + 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, + 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, + 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, + 0xd9}; +static const size_t kTest0JpgLen = 421; + +// test 1 is J444 +static const uint8_t kTest1Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, + 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x03, 0xff, 0xda, + 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, 0x01, + 0x40, 0x8f, 0x26, 0xe8, 0xf4, 0xcc, 0xf9, 0x69, 0x2b, 0x1b, 0x2a, 0xcb, + 0xff, 0xc4, 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, + 0x00, 0x03, 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, + 0x01, 0x05, 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, + 0x0d, 0x26, 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x01, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x10, 0x11, 0x02, 0x12, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x03, 0x01, 0x01, 0x3f, 0x01, 0xf1, 0x00, 0x27, 0x45, 0xbb, 0x31, + 0xaf, 0xff, 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x02, 0x03, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x02, 0x10, 0x11, 0x41, 0x12, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, + 0x01, 0x3f, 0x01, 0xf6, 0x4b, 0x5f, 0x48, 0xb3, 0x69, 0x63, 0x35, 0x72, + 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, + 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, + 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, + 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, + 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, + 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, + 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, + 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, + 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, + 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x26, 0x61, 0xd4, 0xff, + 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, + 0x31, 0x41, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, + 0x10, 0x54, 0xa8, 0xbf, 0x50, 0x87, 0xb0, 0x9d, 0x8b, 0xc4, 0x6a, 0x26, + 0x6b, 0x2a, 0x9c, 0x1f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x11, 0x21, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, + 0x01, 0x01, 0x3f, 0x10, 0x70, 0xe1, 0x3e, 0xd1, 0x8e, 0x0d, 0xe1, 0xb5, + 0xd5, 0x91, 0x76, 0x43, 0x82, 0x45, 0x4c, 0x7b, 0x7f, 0xff, 0xc4, 0x00, + 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, + 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, + 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, + 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, + 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, + 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, + 0xd4, 0xff, 0xd9}; +static const size_t kTest1JpgLen = 735; + +// test 2 is J420 +static const uint8_t kTest2Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x01, 0x02, 0x04, 0xff, + 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x02, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0x20, 0xe7, 0x28, 0xa3, 0x0b, 0x2e, 0x2d, 0xcf, 0xff, 0xc4, 0x00, + 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, 0x10, + 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, 0x02, + 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, 0x62, + 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, + 0x01, 0xc8, 0x53, 0xff, 0xc4, 0x00, 0x16, 0x11, 0x01, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x32, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, + 0x01, 0xd2, 0xc7, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, + 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, + 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, + 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, + 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, + 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, + 0x03, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x13, 0x5f, + 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, + 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x0e, + 0xa1, 0x3a, 0x76, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x21, 0x11, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, + 0x3f, 0x10, 0x57, 0x0b, 0x08, 0x70, 0xdb, 0xff, 0xc4, 0x00, 0x1f, 0x10, + 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, + 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, + 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, + 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, + 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, + 0xd9}; +static const size_t kTest2JpgLen = 685; + +// test 3 is J422 +static const uint8_t kTest3Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x21, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, + 0x00, 0x17, 0x01, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x00, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0x43, 0x8d, 0x1f, 0xa2, 0xb3, 0xca, 0x1b, 0x57, 0x0f, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x02, 0x10, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, + 0x01, 0x01, 0x3f, 0x01, 0x51, 0xce, 0x8c, 0x75, 0xff, 0xc4, 0x00, 0x18, + 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x61, 0x21, 0xff, 0xda, + 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xa6, 0xd9, 0x2f, 0x84, + 0xe8, 0xf0, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, + 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, + 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, + 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, + 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, + 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, + 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, + 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, + 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x2e, 0x45, 0xff, + 0xc4, 0x00, 0x18, 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, + 0x31, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x53, + 0x50, 0xba, 0x54, 0xc1, 0x67, 0x4f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, 0x00, 0x10, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x02, 0x01, 0x01, 0x3f, 0x10, 0x18, 0x81, 0x5c, 0x04, 0x1a, 0xca, + 0x91, 0xbf, 0xff, 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, + 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, + 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, + 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, + 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, + 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; +static const size_t kTest3JpgLen = 704; + +// test 4 is J422 vertical - not supported +static const uint8_t kTest4Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x12, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x05, 0x01, 0x02, 0x03, 0xff, + 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0xd2, 0x98, 0xe9, 0x03, 0x0c, 0x00, 0x46, 0x21, 0xd9, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x01, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, + 0x3f, 0x01, 0x98, 0xb1, 0xbd, 0x47, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xb6, 0x35, 0xa2, 0xe1, 0x47, 0xff, + 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x21, 0x02, + 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, 0xed, 0xf9, + 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, 0x10, 0x01, + 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, 0x81, 0xf0, + 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, 0x75, 0x6e, + 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, 0xf3, 0xde, + 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, 0x5d, 0x7a, + 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x24, 0xaf, 0xff, 0xc4, 0x00, 0x19, + 0x11, 0x00, 0x03, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x51, 0x21, 0x31, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x59, 0x11, 0xca, + 0x42, 0x60, 0x9f, 0x69, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x11, 0x21, 0x31, 0x61, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x02, 0x01, 0x01, 0x3f, 0x10, 0xb0, 0xd7, 0x27, 0x51, 0xb6, 0x41, 0xff, + 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, + 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, + 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, + 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, + 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, + 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; +static const size_t kTest4JpgLen = 701; + +TEST_F(LibYUVConvertTest, TestMJPGSize) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + printf("test jpeg size %d x %d\n", width, height); +} - // EOI, SOI to make MJPG appear valid. - memset(orig_pixels, 0, kSize); - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - orig_pixels[2] = 0xff; - orig_pixels[kSize - kOff + 0] = 0xff; - orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. +TEST_F(LibYUVConvertTest, TestMJPGToI420) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_u, half_width * half_height); + align_buffer_page_end(dst_v, half_width * half_height); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_u, half_width, + dst_v, half_width, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); - for (int times = 0; times < benchmark_iterations_; ++times) { - int ret = - MJPGToI420(orig_pixels, kSize, dst_y_opt, benchmark_width_, dst_u_opt, - SUBSAMPLE(benchmark_width_, 2), dst_v_opt, - SUBSAMPLE(benchmark_width_, 2), benchmark_width_, - benchmark_height_, benchmark_width_, benchmark_height_); - // Expect failure because image is not really valid. - EXPECT_EQ(1, ret); - } - - free_aligned_buffer_page_end(dst_y_opt); - free_aligned_buffer_page_end(dst_u_opt); - free_aligned_buffer_page_end(dst_v_opt); - free_aligned_buffer_page_end(orig_pixels); + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381); + uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_u_hash, 2501859930u); + EXPECT_EQ(dst_v_hash, 2126459123u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); } -TEST_F(LibYUVConvertTest, MJPGToARGB) { - const int kOff = 10; - const int kMinJpeg = 64; - const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg - ? benchmark_width_ * benchmark_height_ - : kMinJpeg; - const int kSize = kImageSize + kOff; - align_buffer_page_end(orig_pixels, kSize); - align_buffer_page_end(dst_argb_opt, benchmark_width_ * benchmark_height_ * 4); +TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); - // EOI, SOI to make MJPG appear valid. - memset(orig_pixels, 0, kSize); - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - orig_pixels[2] = 0xff; - orig_pixels[kSize - kOff + 0] = 0xff; - orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); - for (int times = 0; times < benchmark_iterations_; ++times) { - int ret = MJPGToARGB(orig_pixels, kSize, dst_argb_opt, benchmark_width_ * 4, - benchmark_width_, benchmark_height_, benchmark_width_, - benchmark_height_); - // Expect failure because image is not really valid. - EXPECT_EQ(1, ret); + // Convert to NV21 + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_vu, + half_width * 2, width, height, width, height); } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert to I420 + align_buffer_page_end(dst2_y, width * height); + align_buffer_page_end(dst2_u, half_width * half_height); + align_buffer_page_end(dst2_v, half_width * half_height); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, + dst2_v, half_width, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); - free_aligned_buffer_page_end(dst_argb_opt); - free_aligned_buffer_page_end(orig_pixels); + // Convert I420 to NV21 + align_buffer_page_end(dst3_y, width * height); + align_buffer_page_end(dst3_vu, half_width * half_height * 2); + + I420ToNV21(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, + width, dst3_vu, half_width * 2, width, height); + + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_y[i], dst3_y[i]); + } + for (int i = 0; i < half_width * half_height * 2; ++i) { + EXPECT_EQ(dst_vu[i], dst3_vu[i]); + EXPECT_EQ(dst_vu[i], dst3_vu[i]); + } + + free_aligned_buffer_page_end(dst3_y); + free_aligned_buffer_page_end(dst3_vu); + + free_aligned_buffer_page_end(dst2_y); + free_aligned_buffer_page_end(dst2_u); + free_aligned_buffer_page_end(dst2_v); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 1069662856u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); } +TEST_F(LibYUVConvertTest, TestMJPGToNV21_422) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 3543430771u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 330644005u); + EXPECT_EQ(dst_uv_hash, 135214341u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 506143297u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToARGB) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + + align_buffer_page_end(dst_argb, width * height * 4); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToARGB(kTest3Jpg, kTest3JpgLen, dst_argb, width * 4, width, + height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381); + EXPECT_EQ(dst_argb_hash, 2355976473u); + + free_aligned_buffer_page_end(dst_argb); +} + +static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) { + MJpegDecoder mjpeg_decoder; + LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); + + int width = mjpeg_decoder.GetWidth(); + int height = mjpeg_decoder.GetHeight(); + + // YUV420 + if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 2 && + mjpeg_decoder.GetHorizSampFactor(0) == 2 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J420, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + // YUV422 + } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 2 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J422, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + // YUV444 + } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 1 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J444, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + // YUV400 + } else if (mjpeg_decoder.GetColorSpace() == + MJpegDecoder::kColorSpaceGrayscale && + mjpeg_decoder.GetNumComponents() == 1 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 1) { + printf("JPeg is J400, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + } else { + // Unknown colorspace. + printf("JPeg is Unknown colorspace.\n"); + } + mjpeg_decoder.UnloadFrame(); + return ret; +} + +TEST_F(LibYUVConvertTest, TestMJPGInfo) { + EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg, + kTest4JpgLen)); // Valid but unsupported. +} #endif // HAVE_JPEG TEST_F(LibYUVConvertTest, NV12Crop) { @@ -1450,7 +2106,7 @@ TEST_F(LibYUVConvertTest, NV12Crop) { const int sample_size = kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; align_buffer_page_end(src_y, sample_size); - uint8* src_uv = src_y + kWidth * kHeight; + uint8_t* src_uv = src_y + kWidth * kHeight; align_buffer_page_end(dst_y, kDestWidth * kDestHeight); align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * @@ -1517,14 +2173,86 @@ TEST_F(LibYUVConvertTest, NV12Crop) { free_aligned_buffer_page_end(src_y); } +TEST_F(LibYUVConvertTest, I420CropOddY) { + const int SUBSAMP_X = 2; + const int SUBSAMP_Y = 2; + const int kWidth = benchmark_width_; + const int kHeight = benchmark_height_; + const int crop_y = 1; + const int kDestWidth = benchmark_width_; + const int kDestHeight = benchmark_height_ - crop_y * 2; + const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X); + const int kStrideV = SUBSAMPLE(kWidth, SUBSAMP_X); + const int sample_size = kWidth * kHeight + + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y) + + kStrideV * SUBSAMPLE(kHeight, SUBSAMP_Y); + align_buffer_page_end(src_y, sample_size); + uint8_t* src_u = src_y + kWidth * kHeight; + uint8_t* src_v = src_u + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y); + + align_buffer_page_end(dst_y, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + for (int i = 0; i < kHeight * kWidth; ++i) { + src_y[i] = (fastrand() & 0xff); + } + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideU; ++i) { + src_u[i] = (fastrand() & 0xff); + } + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideV; ++i) { + src_v[i] = (fastrand() & 0xff); + } + memset(dst_y, 1, kDestWidth * kDestHeight); + memset(dst_u, 2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v, 3, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + MaskCpuFlags(benchmark_cpu_info_); + for (int i = 0; i < benchmark_iterations_; ++i) { + ConvertToI420(src_y, sample_size, dst_y, kDestWidth, dst_u, + SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, + SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, + kDestWidth, kDestHeight, libyuv::kRotate0, + libyuv::FOURCC_I420); + } + + for (int i = 0; i < kDestHeight; ++i) { + for (int j = 0; j < kDestWidth; ++j) { + EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j], + dst_y[i * kDestWidth + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j], + dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j], + dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); + free_aligned_buffer_page_end(src_y); +} + TEST_F(LibYUVConvertTest, TestYToARGB) { - uint8 y[32]; - uint8 expectedg[32]; + uint8_t y[32]; + uint8_t expectedg[32]; for (int i = 0; i < 32; ++i) { y[i] = i * 5 + 17; expectedg[i] = static_cast<int>((y[i] - 16) * 1.164f + 0.5f); } - uint8 argb[32 * 4]; + uint8_t argb[32 * 4]; YToARGB(y, 0, argb, 0, 32, 1); for (int i = 0; i < 32; ++i) { @@ -1536,7 +2264,7 @@ TEST_F(LibYUVConvertTest, TestYToARGB) { } } -static const uint8 kNoDither4x4[16] = { +static const uint8_t kNoDither4x4[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; @@ -1563,7 +2291,7 @@ TEST_F(LibYUVConvertTest, TestNoDither) { } // Ordered 4x4 dither for 888 to 565. Values from 0 to 7. -static const uint8 kDither565_4x4[16] = { +static const uint8_t kDither565_4x4[16] = { 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, }; @@ -1734,6 +2462,8 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4) TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12) TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12) +// Transitive tests. A to B to C is same as A to C. + #define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ W1280, N, NEG, OFF, FMT_C, BPP_C) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \ @@ -1806,10 +2536,15 @@ TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3) TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, RAW, 3) TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RAW, 3) TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3) +TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3) +TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3) +TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4) TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2) TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2) TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2) -TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2) +TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2) TESTPLANARTOE(J422, 2, 1, ARGB, 1, 4, ARGB, 4) TESTPLANARTOE(J422, 2, 1, ABGR, 1, 4, ARGB, 4) TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4) @@ -1894,6 +2629,64 @@ TESTPLANARTOE(I422, 2, 1, UYVY, 2, 4, ARGB, 4) TESTQPLANARTOE(I420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +#define TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, W1280, N, NEG, \ + OFF, FMT_C, BPP_C) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_##FMT_C##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ + const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ + align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ + MemRandomize(src_argb_a + OFF, kStrideA * kHeight); \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + } \ + /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + FMT_A##To##FMT_C(src_argb_a + OFF, kStrideA, dst_argb_c + OFF, kStrideC, \ + kWidth, NEG kHeight); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, dst_argb_bc + OFF, kStrideC, \ + kWidth, kHeight); \ + for (int i = 0; i < kStrideC * kHeight; i += 4) { \ + EXPECT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]); \ + EXPECT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]); \ + EXPECT_EQ(dst_argb_c[i + OFF + 2], dst_argb_bc[i + OFF + 2]); \ + EXPECT_NEAR(dst_argb_c[i + OFF + 3], dst_argb_bc[i + OFF + 3], 64); \ + } \ + free_aligned_buffer_page_end(src_argb_a); \ + free_aligned_buffer_page_end(dst_argb_b); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_bc); \ + } + +#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, \ + benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Unaligned, +, 1, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Opt, +, 0, FMT_C, BPP_C) + +// Caveat: Destination needs to be 4 bytes +TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4) +TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4) +TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4) +TESTPLANETOE(AR30, 1, 4, ABGR, 1, 4, ARGB, 4) +TESTPLANETOE(ARGB, 1, 4, AB30, 1, 4, ARGB, 4) +TESTPLANETOE(ABGR, 1, 4, AB30, 1, 4, ABGR, 4) +TESTPLANETOE(AB30, 1, 4, ARGB, 1, 4, ABGR, 4) +TESTPLANETOE(AB30, 1, 4, ABGR, 1, 4, ARGB, 4) + TEST_F(LibYUVConvertTest, RotateWithARGBSource) { // 2x2 frames uint32_t src[4]; @@ -1929,4 +2722,502 @@ TEST_F(LibYUVConvertTest, RotateWithARGBSource) { EXPECT_EQ(dst[3], src[1]); } +#ifdef HAS_ARGBTOAR30ROW_AVX2 +TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) { + // ARGBToAR30Row_AVX2 expects a multiple of 8 pixels. + const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; + align_buffer_page_end(src, kPixels * 4); + align_buffer_page_end(dst_opt, kPixels * 4); + align_buffer_page_end(dst_c, kPixels * 4); + MemRandomize(src, kPixels * 4); + memset(dst_opt, 0, kPixels * 4); + memset(dst_c, 1, kPixels * 4); + + ARGBToAR30Row_C(src, dst_c, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + ARGBToAR30Row_AVX2(src, dst_opt, kPixels); + } else if (has_ssse3) { + ARGBToAR30Row_SSSE3(src, dst_opt, kPixels); + } else { + ARGBToAR30Row_C(src, dst_opt, kPixels); + } + } + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_opt[i], dst_c[i]); + } + + free_aligned_buffer_page_end(src); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(dst_c); +} +#endif // HAS_ARGBTOAR30ROW_AVX2 + +#ifdef HAS_ABGRTOAR30ROW_AVX2 +TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { + // ABGRToAR30Row_AVX2 expects a multiple of 8 pixels. + const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; + align_buffer_page_end(src, kPixels * 4); + align_buffer_page_end(dst_opt, kPixels * 4); + align_buffer_page_end(dst_c, kPixels * 4); + MemRandomize(src, kPixels * 4); + memset(dst_opt, 0, kPixels * 4); + memset(dst_c, 1, kPixels * 4); + + ABGRToAR30Row_C(src, dst_c, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + ABGRToAR30Row_AVX2(src, dst_opt, kPixels); + } else if (has_ssse3) { + ABGRToAR30Row_SSSE3(src, dst_opt, kPixels); + } else { + ABGRToAR30Row_C(src, dst_opt, kPixels); + } + } + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_opt[i], dst_c[i]); + } + + free_aligned_buffer_page_end(src); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(dst_c); +} +#endif // HAS_ABGRTOAR30ROW_AVX2 + +// TODO(fbarchard): Fix clamping issue affected by U channel. +#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + ALIGN, YALIGN, W1280, DIFF, N, NEG, SOFF, DOFF) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + const int kBpc = 2; \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ + align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & 0x3ff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & 0x3ff); \ + reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & 0x3ff); \ + } \ + memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B( \ + reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ + reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \ + reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \ + dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B( \ + reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ + reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \ + reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \ + dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + int abs_diff = abs(static_cast<int>(dst_argb_c[i + DOFF]) - \ + static_cast<int>(dst_argb_opt[i + DOFF])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, DIFF) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, 0) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, 1) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Invert, -, 0, 0) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Opt, +, 0, 0) + +TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 2) +TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 2) +TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2) +TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 2) +TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 2) +TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 2) +TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2) +TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1, 2) + +static int Clamp(int y) { + if (y < 0) { + y = 0; + } + if (y > 255) { + y = 255; + } + return y; +} + +static int Clamp10(int y) { + if (y < 0) { + y = 0; + } + if (y > 1023) { + y = 1023; + } + return y; +} + +// Test 8 bit YUV to 8 bit RGB +TEST_F(LibYUVConvertTest, TestH420ToARGB) { + const int kSize = 256; + int histogram_b[256]; + int histogram_g[256]; + int histogram_r[256]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2); + align_buffer_page_end(argb_pixels, kSize * 4); + uint8_t* orig_y = orig_yuv; + uint8_t* orig_u = orig_y + kSize; + uint8_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 128; // 128 is 0. + orig_v[i] = 128; + } + + H420ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b = argb_pixels[i * 4 + 0]; + int g = argb_pixels[i * 4 + 1]; + int r = argb_pixels[i * 4 + 2]; + int a = argb_pixels[i * 4 + 3]; + ++histogram_b[b]; + ++histogram_g[g]; + ++histogram_r[r]; + int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f)); + EXPECT_NEAR(b, expected_y, 1); + EXPECT_NEAR(g, expected_y, 1); + EXPECT_NEAR(r, expected_y, 1); + EXPECT_EQ(a, 255); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(argb_pixels); +} + +// Test 10 bit YUV to 8 bit RGB +TEST_F(LibYUVConvertTest, TestH010ToARGB) { + const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(argb_pixels, kSize * 4); + uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToARGB(orig_y, 0, orig_u, 0, orig_v, 0, argb_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b = argb_pixels[i * 4 + 0]; + int g = argb_pixels[i * 4 + 1]; + int r = argb_pixels[i * 4 + 2]; + int a = argb_pixels[i * 4 + 3]; + ++histogram_b[b]; + ++histogram_g[g]; + ++histogram_r[r]; + int expected_y = Clamp(static_cast<int>((i - 64) * 1.164f / 4)); + EXPECT_NEAR(b, expected_y, 1); + EXPECT_NEAR(g, expected_y, 1); + EXPECT_NEAR(r, expected_y, 1); + EXPECT_EQ(a, 255); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(argb_pixels); +} + +// Test 10 bit YUV to 10 bit RGB +// Caveat: Result is near due to float rounding in expected +// result. +TEST_F(LibYUVConvertTest, TestH010ToAR30) { + const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(ar30_pixels, kSize * 4); + uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023; + int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023; + int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ar30_pixels); +} + +// Test 10 bit YUV to 10 bit RGB +// Caveat: Result is near due to float rounding in expected +// result. +TEST_F(LibYUVConvertTest, TestH010ToAB30) { + const int kSize = 1024; + int histogram_b[1024]; + int histogram_g[1024]; + int histogram_r[1024]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + + align_buffer_page_end(orig_yuv, kSize * 2 + kSize / 2 * 2 * 2); + align_buffer_page_end(ab30_pixels, kSize * 4); + uint16_t* orig_y = reinterpret_cast<uint16_t*>(orig_yuv); + uint16_t* orig_u = orig_y + kSize; + uint16_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 512; // 512 is 0. + orig_v[i] = 512; + } + + H010ToAB30(orig_y, 0, orig_u, 0, orig_v, 0, ab30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int r10 = reinterpret_cast<uint32_t*>(ab30_pixels)[i] & 1023; + int g10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 10) & 1023; + int b10 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast<uint32_t*>(ab30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ab30_pixels); +} + +// Test 8 bit YUV to 10 bit RGB +TEST_F(LibYUVConvertTest, TestH420ToAR30) { + const int kSize = 256; + const int kHistSize = 1024; + int histogram_b[kHistSize]; + int histogram_g[kHistSize]; + int histogram_r[kHistSize]; + memset(histogram_b, 0, sizeof(histogram_b)); + memset(histogram_g, 0, sizeof(histogram_g)); + memset(histogram_r, 0, sizeof(histogram_r)); + align_buffer_page_end(orig_yuv, kSize + kSize / 2 * 2); + align_buffer_page_end(ar30_pixels, kSize * 4); + uint8_t* orig_y = orig_yuv; + uint8_t* orig_u = orig_y + kSize; + uint8_t* orig_v = orig_u + kSize / 2; + + // Test grey scale + for (int i = 0; i < kSize; ++i) { + orig_y[i] = i; + } + for (int i = 0; i < kSize / 2; ++i) { + orig_u[i] = 128; // 128 is 0. + orig_v[i] = 128; + } + + H420ToAR30(orig_y, 0, orig_u, 0, orig_v, 0, ar30_pixels, 0, kSize, 1); + + for (int i = 0; i < kSize; ++i) { + int b10 = reinterpret_cast<uint32_t*>(ar30_pixels)[i] & 1023; + int g10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 10) & 1023; + int r10 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 20) & 1023; + int a2 = (reinterpret_cast<uint32_t*>(ar30_pixels)[i] >> 30) & 3; + ++histogram_b[b10]; + ++histogram_g[g10]; + ++histogram_r[r10]; + int expected_y = Clamp10(static_cast<int>((i - 16) * 1.164f * 4.f)); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); + } + + int count_b = 0; + int count_g = 0; + int count_r = 0; + for (int i = 0; i < kHistSize; ++i) { + if (histogram_b[i]) { + ++count_b; + } + if (histogram_g[i]) { + ++count_g; + } + if (histogram_r[i]) { + ++count_r; + } + } + printf("uniques: B %d, G, %d, R %d\n", count_b, count_g, count_r); + + free_aligned_buffer_page_end(orig_yuv); + free_aligned_buffer_page_end(ar30_pixels); +} + +// Test RGB24 to ARGB and back to RGB24 +TEST_F(LibYUVConvertTest, TestARGBToRGB24) { + const int kSize = 256; + align_buffer_page_end(orig_rgb24, kSize * 3); + align_buffer_page_end(argb_pixels, kSize * 4); + align_buffer_page_end(dest_rgb24, kSize * 3); + + // Test grey scale + for (int i = 0; i < kSize * 3; ++i) { + orig_rgb24[i] = i; + } + + RGB24ToARGB(orig_rgb24, 0, argb_pixels, 0, kSize, 1); + ARGBToRGB24(argb_pixels, 0, dest_rgb24, 0, kSize, 1); + + for (int i = 0; i < kSize * 3; ++i) { + EXPECT_EQ(orig_rgb24[i], dest_rgb24[i]); + } + + free_aligned_buffer_page_end(orig_rgb24); + free_aligned_buffer_page_end(argb_pixels); + free_aligned_buffer_page_end(dest_rgb24); +} + } // namespace libyuv diff --git a/files/unit_test/cpu_test.cc b/files/unit_test/cpu_test.cc index 048ed31a..a7991d2b 100644 --- a/files/unit_test/cpu_test.cc +++ b/files/unit_test/cpu_test.cc @@ -20,39 +20,56 @@ namespace libyuv { TEST_F(LibYUVBaseTest, TestCpuHas) { int cpu_flags = TestCpuFlag(-1); - printf("Cpu Flags %x\n", cpu_flags); + printf("Cpu Flags %d\n", cpu_flags); +#if defined(__arm__) || defined(__aarch64__) int has_arm = TestCpuFlag(kCpuHasARM); - printf("Has ARM %x\n", has_arm); + printf("Has ARM %d\n", has_arm); int has_neon = TestCpuFlag(kCpuHasNEON); - printf("Has NEON %x\n", has_neon); + printf("Has NEON %d\n", has_neon); +#endif int has_x86 = TestCpuFlag(kCpuHasX86); - printf("Has X86 %x\n", has_x86); int has_sse2 = TestCpuFlag(kCpuHasSSE2); - printf("Has SSE2 %x\n", has_sse2); int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); - printf("Has SSSE3 %x\n", has_ssse3); int has_sse41 = TestCpuFlag(kCpuHasSSE41); - printf("Has SSE4.1 %x\n", has_sse41); int has_sse42 = TestCpuFlag(kCpuHasSSE42); - printf("Has SSE4.2 %x\n", has_sse42); int has_avx = TestCpuFlag(kCpuHasAVX); - printf("Has AVX %x\n", has_avx); int has_avx2 = TestCpuFlag(kCpuHasAVX2); - printf("Has AVX2 %x\n", has_avx2); int has_erms = TestCpuFlag(kCpuHasERMS); - printf("Has ERMS %x\n", has_erms); int has_fma3 = TestCpuFlag(kCpuHasFMA3); - printf("Has FMA3 %x\n", has_fma3); - int has_avx3 = TestCpuFlag(kCpuHasAVX3); - printf("Has AVX3 %x\n", has_avx3); int has_f16c = TestCpuFlag(kCpuHasF16C); - printf("Has F16C %x\n", has_f16c); + int has_gfni = TestCpuFlag(kCpuHasGFNI); + int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW); + int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL); + int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI); + int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); + int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); + int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); + printf("Has X86 %d\n", has_x86); + printf("Has SSE2 %d\n", has_sse2); + printf("Has SSSE3 %d\n", has_ssse3); + printf("Has SSE41 %d\n", has_sse41); + printf("Has SSE42 %d\n", has_sse42); + printf("Has AVX %d\n", has_avx); + printf("Has AVX2 %d\n", has_avx2); + printf("Has ERMS %d\n", has_erms); + printf("Has FMA3 %d\n", has_fma3); + printf("Has F16C %d\n", has_f16c); + printf("Has GFNI %d\n", has_gfni); + printf("Has AVX512BW %d\n", has_avx512bw); + printf("Has AVX512VL %d\n", has_avx512vl); + printf("Has AVX512VBMI %d\n", has_avx512vbmi); + printf("Has AVX512VBMI2 %d\n", has_avx512vbmi2); + printf("Has AVX512VBITALG %d\n", has_avx512vbitalg); + printf("Has AVX512VPOPCNTDQ %d\n", has_avx512vpopcntdq); + +#if defined(__mips__) int has_mips = TestCpuFlag(kCpuHasMIPS); - printf("Has MIPS %x\n", has_mips); - int has_dspr2 = TestCpuFlag(kCpuHasDSPR2); - printf("Has DSPR2 %x\n", has_dspr2); + printf("Has MIPS %d\n", has_mips); int has_msa = TestCpuFlag(kCpuHasMSA); - printf("Has MSA %x\n", has_msa); + printf("Has MSA %d\n", has_msa); + int has_mmi = TestCpuFlag(kCpuHasMMI); + printf("Has MMI %d\n", has_mmi); +#endif } TEST_F(LibYUVBaseTest, TestCpuCompilerEnabled) { @@ -83,7 +100,7 @@ TEST_F(LibYUVBaseTest, TestCpuCompilerEnabled) { TEST_F(LibYUVBaseTest, TestCpuId) { int has_x86 = TestCpuFlag(kCpuHasX86); if (has_x86) { - uint32 cpu_info[4]; + int cpu_info[4]; // Vendor ID: // AuthenticAMD AMD processor // CentaurHauls Centaur processor @@ -130,6 +147,8 @@ static int FileExists(const char* file_name) { TEST_F(LibYUVBaseTest, TestLinuxNeon) { if (FileExists("../../unit_test/testdata/arm_v7.txt")) { + printf("Note: testing to load \"../../unit_test/testdata/arm_v7.txt\"\n"); + EXPECT_EQ(0, ArmCpuCaps("../../unit_test/testdata/arm_v7.txt")); EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/tegra3.txt")); EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/juno.txt")); @@ -141,4 +160,27 @@ TEST_F(LibYUVBaseTest, TestLinuxNeon) { #endif } +TEST_F(LibYUVBaseTest, TestSetCpuFlags) { + // Reset any masked flags that may have been set so auto init is enabled. + MaskCpuFlags(0); + + int original_cpu_flags = TestCpuFlag(-1); + + // Test setting different CPU configurations. + int cpu_flags = kCpuHasARM | kCpuHasNEON | kCpuInitialized; + SetCpuFlags(cpu_flags); + EXPECT_EQ(cpu_flags, TestCpuFlag(-1)); + + cpu_flags = kCpuHasX86 | kCpuInitialized; + SetCpuFlags(cpu_flags); + EXPECT_EQ(cpu_flags, TestCpuFlag(-1)); + + // Test that setting 0 turns auto-init back on. + SetCpuFlags(0); + EXPECT_EQ(original_cpu_flags, TestCpuFlag(-1)); + + // Restore the CPU flag mask. + MaskCpuFlags(benchmark_cpu_info_); +} + } // namespace libyuv diff --git a/files/unit_test/cpu_thread_test.cc b/files/unit_test/cpu_thread_test.cc new file mode 100644 index 00000000..59061b98 --- /dev/null +++ b/files/unit_test/cpu_thread_test.cc @@ -0,0 +1,63 @@ +/* + * Copyright 2017 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <gtest/gtest.h> + +#include "libyuv/cpu_id.h" + +#if defined(__clang__) +#if __has_include(<pthread.h>) +#define LIBYUV_HAVE_PTHREAD 1 +#endif +#elif defined(__linux__) +#define LIBYUV_HAVE_PTHREAD 1 +#endif + +#ifdef LIBYUV_HAVE_PTHREAD +#include <pthread.h> +#endif + +namespace libyuv { + +#ifdef LIBYUV_HAVE_PTHREAD +void* ThreadMain(void* arg) { + int* flags = static_cast<int*>(arg); + + *flags = TestCpuFlag(kCpuHasSSSE3); + return nullptr; +} +#endif // LIBYUV_HAVE_PTHREAD + +// Call TestCpuFlag() from two threads. ThreadSanitizer should not report any +// data race. +TEST(LibYUVCpuThreadTest, TestCpuFlagMultipleThreads) { +#ifdef LIBYUV_HAVE_PTHREAD + int cpu_flags1; + int cpu_flags2; + int ret; + pthread_t thread1; + pthread_t thread2; + + MaskCpuFlags(0); // Reset to 0 to allow auto detect. + ret = pthread_create(&thread1, nullptr, ThreadMain, &cpu_flags1); + ASSERT_EQ(ret, 0); + ret = pthread_create(&thread2, nullptr, ThreadMain, &cpu_flags2); + ASSERT_EQ(ret, 0); + ret = pthread_join(thread1, nullptr); + EXPECT_EQ(ret, 0); + ret = pthread_join(thread2, nullptr); + EXPECT_EQ(ret, 0); + EXPECT_EQ(cpu_flags1, cpu_flags2); +#else + printf("pthread unavailable; Test skipped."); +#endif // LIBYUV_HAVE_PTHREAD +} + +} // namespace libyuv diff --git a/files/unit_test/math_test.cc b/files/unit_test/math_test.cc index 2b4b57b1..0abbad51 100644 --- a/files/unit_test/math_test.cc +++ b/files/unit_test/math_test.cc @@ -65,8 +65,8 @@ TEST_F(LibYUVBaseTest, TestFixedDiv) { } EXPECT_EQ(123 * 65536, libyuv::FixedDiv(123, 1)); - MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num)); - MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div)); + MemRandomize(reinterpret_cast<uint8_t*>(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast<uint8_t*>(&div[0]), sizeof(div)); for (int j = 0; j < 1280; ++j) { if (div[j] == 0) { div[j] = 1280; @@ -90,8 +90,8 @@ TEST_F(LibYUVBaseTest, TestFixedDiv_Opt) { int result_opt[1280]; int result_c[1280]; - MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num)); - MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div)); + MemRandomize(reinterpret_cast<uint8_t*>(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast<uint8_t*>(&div[0]), sizeof(div)); for (int j = 0; j < 1280; ++j) { num[j] &= 4095; // Make numerator smaller. div[j] &= 4095; // Make divisor smaller. @@ -124,8 +124,8 @@ TEST_F(LibYUVBaseTest, TestFixedDiv1_Opt) { int result_opt[1280]; int result_c[1280]; - MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num)); - MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div)); + MemRandomize(reinterpret_cast<uint8_t*>(&num[0]), sizeof(num)); + MemRandomize(reinterpret_cast<uint8_t*>(&div[0]), sizeof(div)); for (int j = 0; j < 1280; ++j) { num[j] &= 4095; // Make numerator smaller. div[j] &= 4095; // Make divisor smaller. diff --git a/files/unit_test/planar_test.cc b/files/unit_test/planar_test.cc index 28d557a5..70f8966e 100644 --- a/files/unit_test/planar_test.cc +++ b/files/unit_test/planar_test.cc @@ -8,9 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <math.h> #include <stdlib.h> #include <time.h> +// row.h defines SIMD_ALIGNED, overriding unit_test.h +#include "libyuv/row.h" /* For ScaleSumSamples_Neon */ + #include "../unit_test/unit_test.h" #include "libyuv/compare.h" #include "libyuv/convert.h" @@ -248,8 +252,8 @@ TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { } TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { - SIMD_ALIGNED(uint8 orig_pixels[16][16][4]); - SIMD_ALIGNED(int32 added_pixels[16][16][4]); + SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]); + SIMD_ALIGNED(int32_t added_pixels[16][16][4]); for (int y = 0; y < 16; ++y) { for (int x = 0; x < 16; ++x) { @@ -274,7 +278,7 @@ TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { } TEST_F(LibYUVPlanarTest, TestARGBGray) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); // Test blue @@ -345,8 +349,8 @@ TEST_F(LibYUVPlanarTest, TestARGBGray) { } TEST_F(LibYUVPlanarTest, TestARGBGrayTo) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 gray_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t gray_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); // Test blue @@ -417,7 +421,7 @@ TEST_F(LibYUVPlanarTest, TestARGBGrayTo) { } TEST_F(LibYUVPlanarTest, TestARGBSepia) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); // Test blue @@ -489,12 +493,12 @@ TEST_F(LibYUVPlanarTest, TestARGBSepia) { } TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]); // Matrix for Sepia. - SIMD_ALIGNED(static const int8 kRGBToSepia[]) = { + SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = { 17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0, 24 / 2, 98 / 2, 50 / 2, 0, 0, 0, 0, 64, // Copy alpha. }; @@ -565,10 +569,10 @@ TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) { } TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); // Matrix for Sepia. - SIMD_ALIGNED(static const int8 kRGBToSepia[]) = { + SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = { 17, 68, 35, 0, 22, 88, 45, 0, 24, 98, 50, 0, 0, 0, 0, 0, // Unused but makes matrix 16 bytes. }; @@ -625,11 +629,11 @@ TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) { } TEST_F(LibYUVPlanarTest, TestARGBColorTable) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); // Matrix for Sepia. - static const uint8 kARGBTable[256 * 4] = { + static const uint8_t kARGBTable[256 * 4] = { 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, }; @@ -681,11 +685,11 @@ TEST_F(LibYUVPlanarTest, TestARGBColorTable) { // Same as TestARGBColorTable except alpha does not change. TEST_F(LibYUVPlanarTest, TestRGBColorTable) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); // Matrix for Sepia. - static const uint8 kARGBTable[256 * 4] = { + static const uint8_t kARGBTable[256 * 4] = { 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u, }; @@ -736,7 +740,7 @@ TEST_F(LibYUVPlanarTest, TestRGBColorTable) { } TEST_F(LibYUVPlanarTest, TestARGBQuantize) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -760,8 +764,8 @@ TEST_F(LibYUVPlanarTest, TestARGBQuantize) { } TEST_F(LibYUVPlanarTest, TestARGBMirror) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels[1280][4]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -783,8 +787,8 @@ TEST_F(LibYUVPlanarTest, TestARGBMirror) { } TEST_F(LibYUVPlanarTest, TestShade) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 shade_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t shade_pixels[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); orig_pixels[0][0] = 10u; @@ -841,9 +845,9 @@ TEST_F(LibYUVPlanarTest, TestShade) { } TEST_F(LibYUVPlanarTest, TestARGBInterpolate) { - SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); - SIMD_ALIGNED(uint8 orig_pixels_1[1280][4]); - SIMD_ALIGNED(uint8 interpolate_pixels[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]); + SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]); memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); @@ -922,9 +926,9 @@ TEST_F(LibYUVPlanarTest, TestARGBInterpolate) { } TEST_F(LibYUVPlanarTest, TestInterpolatePlane) { - SIMD_ALIGNED(uint8 orig_pixels_0[1280]); - SIMD_ALIGNED(uint8 orig_pixels_1[1280]); - SIMD_ALIGNED(uint8 interpolate_pixels[1280]); + SIMD_ALIGNED(uint8_t orig_pixels_0[1280]); + SIMD_ALIGNED(uint8_t orig_pixels_1[1280]); + SIMD_ALIGNED(uint8_t interpolate_pixels[1280]); memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); @@ -1188,7 +1192,6 @@ static void TestBlendPlane(int width, free_aligned_buffer_page_end(src_argb_alpha); free_aligned_buffer_page_end(dst_argb_c); free_aligned_buffer_page_end(dst_argb_opt); - return; } TEST_F(LibYUVPlanarTest, BlendPlane_Opt) { @@ -1282,7 +1285,6 @@ static void TestI420Blend(int width, free_aligned_buffer_page_end(dst_y_opt); free_aligned_buffer_page_end(dst_u_opt); free_aligned_buffer_page_end(dst_v_opt); - return; } TEST_F(LibYUVPlanarTest, I420Blend_Opt) { @@ -1305,8 +1307,8 @@ TEST_F(LibYUVPlanarTest, I420Blend_Invert) { } TEST_F(LibYUVPlanarTest, TestAffine) { - SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); - SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]); + SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]); for (int i = 0; i < 1280; ++i) { for (int j = 0; j < 4; ++j) { @@ -1323,7 +1325,7 @@ TEST_F(LibYUVPlanarTest, TestAffine) { EXPECT_EQ(191u, interpolate_pixels_C[255][3]); #if defined(HAS_ARGBAFFINEROW_SSE2) - SIMD_ALIGNED(uint8 interpolate_pixels_Opt[1280][4]); + SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]); ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0], uv_step, 1280); EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4)); @@ -1363,7 +1365,7 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) { // Fill destination buffers with random data. for (i = 0; i < y_plane_size; ++i) { - uint8 random_number = fastrand() & 0x7f; + uint8_t random_number = fastrand() & 0x7f; dst_c[i] = random_number; dst_opt[i] = dst_c[i]; } @@ -1386,8 +1388,9 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) { } for (i = 0; i < y_plane_size; ++i) { - if (dst_c[i] != dst_opt[i]) + if (dst_c[i] != dst_opt[i]) { ++err; + } } free_aligned_buffer_page_end(orig_y); @@ -1863,12 +1866,12 @@ static int TestBlur(int width, MaskCpuFlags(disable_cpu_flags); ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride, - reinterpret_cast<int32*>(dst_cumsum), width * 4, width, + reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width, invert * height, radius); MaskCpuFlags(benchmark_cpu_info); for (int i = 0; i < benchmark_iterations; ++i) { ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride, - reinterpret_cast<int32*>(dst_cumsum), width * 4, width, + reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width, invert * height, radius); } int max_diff = 0; @@ -1945,9 +1948,9 @@ TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) { } TEST_F(LibYUVPlanarTest, TestARGBPolynomial) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = { @@ -2042,37 +2045,38 @@ int TestHalfFloatPlane(int benchmark_width, const int y_plane_size = benchmark_width * benchmark_height * 2; align_buffer_page_end(orig_y, y_plane_size * 3); - uint8* dst_opt = orig_y + y_plane_size; - uint8* dst_c = orig_y + y_plane_size * 2; + uint8_t* dst_opt = orig_y + y_plane_size; + uint8_t* dst_c = orig_y + y_plane_size * 2; MemRandomize(orig_y, y_plane_size); memset(dst_c, 0, y_plane_size); memset(dst_opt, 1, y_plane_size); for (i = 0; i < y_plane_size / 2; ++i) { - reinterpret_cast<uint16*>(orig_y)[i] &= mask; + reinterpret_cast<uint16_t*>(orig_y)[i] &= mask; } // Disable all optimizations. MaskCpuFlags(disable_cpu_flags); for (j = 0; j < benchmark_iterations; j++) { - HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, - reinterpret_cast<uint16*>(dst_c), benchmark_width * 2, scale, - benchmark_width, benchmark_height); + HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2, + reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2, + scale, benchmark_width, benchmark_height); } // Enable optimizations. MaskCpuFlags(benchmark_cpu_info); for (j = 0; j < benchmark_iterations; j++) { - HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2, - reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2, + HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2, + reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2, scale, benchmark_width, benchmark_height); } int max_diff = 0; for (i = 0; i < y_plane_size / 2; ++i) { - int abs_diff = abs(static_cast<int>(reinterpret_cast<uint16*>(dst_c)[i]) - - static_cast<int>(reinterpret_cast<uint16*>(dst_opt)[i])); + int abs_diff = + abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) - + static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i])); if (abs_diff > max_diff) { max_diff = abs_diff; } @@ -2164,10 +2168,56 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) { EXPECT_LE(diff, 1); } +float TestByteToFloat(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + float scale) { + int i, j; + const int y_plane_size = benchmark_width * benchmark_height; + + align_buffer_page_end(orig_y, y_plane_size * (1 + 4 + 4)); + float* dst_opt = reinterpret_cast<float*>(orig_y + y_plane_size); + float* dst_c = reinterpret_cast<float*>(orig_y + y_plane_size * 5); + + MemRandomize(orig_y, y_plane_size); + memset(dst_c, 0, y_plane_size * 4); + memset(dst_opt, 1, y_plane_size * 4); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags); + ByteToFloat(orig_y, dst_c, scale, y_plane_size); + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info); + for (j = 0; j < benchmark_iterations; j++) { + ByteToFloat(orig_y, dst_opt, scale, y_plane_size); + } + + float max_diff = 0; + for (i = 0; i < y_plane_size; ++i) { + float abs_diff = fabs(dst_c[i] - dst_opt[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestByteToFloat) { + float diff = TestByteToFloat(benchmark_width_, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, 1.0f); + EXPECT_EQ(0.f, diff); +} + TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { - SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); + SIMD_ALIGNED(uint8_t orig_pixels[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); align_buffer_page_end(lumacolortable, 32768); @@ -2339,7 +2389,7 @@ static int TestARGBRect(int width, } const int kStride = width * bpp; const int kSize = kStride * height; - const uint32 v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff); + const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff); align_buffer_page_end(dst_argb_c, kSize + off); align_buffer_page_end(dst_argb_opt, kSize + off); @@ -2518,4 +2568,805 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) { free_aligned_buffer_page_end(dst_pixels_c); } +TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 3); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 3); + align_buffer_page_end(dst_pixels_c, kPixels * 3); + + MemRandomize(src_pixels, kPixels * 3); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 3); + MemRandomize(dst_pixels_c, kPixels * 3); + + MaskCpuFlags(disable_cpu_flags_); + SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, benchmark_width_, benchmark_height_); + MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, dst_pixels_c, + benchmark_width_ * 3, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, benchmark_width_, benchmark_height_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, + benchmark_width_, tmp_pixels_b, benchmark_width_, + dst_pixels_opt, benchmark_width_ * 3, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < kPixels * 3; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels, kPixels * 3); + align_buffer_page_end(tmp_pixels_r, kPixels); + align_buffer_page_end(tmp_pixels_g, kPixels); + align_buffer_page_end(tmp_pixels_b, kPixels); + align_buffer_page_end(dst_pixels_opt, kPixels * 3); + align_buffer_page_end(dst_pixels_c, kPixels * 3); + + MemRandomize(src_pixels, kPixels * 3); + MemRandomize(tmp_pixels_r, kPixels); + MemRandomize(tmp_pixels_g, kPixels); + MemRandomize(tmp_pixels_b, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 3); + MemRandomize(dst_pixels_c, kPixels * 3); + + MaskCpuFlags(disable_cpu_flags_); + SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b, + benchmark_width_, benchmark_width_, benchmark_height_); + MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, dst_pixels_c, + benchmark_width_ * 3, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r, + benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, benchmark_width_, + benchmark_height_); + } + MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_, + tmp_pixels_b, benchmark_width_, dst_pixels_opt, + benchmark_width_ * 3, benchmark_width_, benchmark_height_); + + for (int i = 0; i < kPixels * 3; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_r); + free_aligned_buffer_page_end(tmp_pixels_g); + free_aligned_buffer_page_end(tmp_pixels_b); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + +// TODO(fbarchard): improve test for platforms and cpu detect +#ifdef HAS_MERGEUVROW_16_AVX2 +TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_u, kPixels * 2); + align_buffer_page_end(src_pixels_v, kPixels * 2); + align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2); + align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2); + + MemRandomize(src_pixels_u, kPixels * 2); + MemRandomize(src_pixels_v, kPixels * 2); + memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2); + memset(dst_pixels_uv_c, 1, kPixels * 2 * 2); + + MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u), + reinterpret_cast<const uint16_t*>(src_pixels_v), + reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 64, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u), + reinterpret_cast<const uint16_t*>(src_pixels_v), + reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64, + kPixels); + } else { + MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u), + reinterpret_cast<const uint16_t*>(src_pixels_v), + reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64, + kPixels); + } + } + + for (int i = 0; i < kPixels * 2 * 2; ++i) { + EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_u); + free_aligned_buffer_page_end(src_pixels_v); + free_aligned_buffer_page_end(dst_pixels_uv_opt); + free_aligned_buffer_page_end(dst_pixels_uv_c); +} +#endif + +// TODO(fbarchard): Improve test for more platforms. +#ifdef HAS_MULTIPLYROW_16_AVX2 +TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_y_c, kPixels * 2); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels * 2); + memset(dst_pixels_y_c, 1, kPixels * 2); + + MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y), + reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y), + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64, + kPixels); + } else { + MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y), + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64, + kPixels); + } + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} +#endif // HAS_MULTIPLYROW_16_AVX2 + +TEST_F(LibYUVPlanarTest, Convert16To8Plane) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y), + benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y), + benchmark_width_, dst_pixels_y_opt, benchmark_width_, + 16384, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} + +// TODO(fbarchard): Improve test for more platforms. +#ifdef HAS_CONVERT16TO8ROW_AVX2 +TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { + // AVX2 does multiple of 32, so round count up + const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + // clamp source range to 10 bits. + for (int i = 0; i < kPixels; ++i) { + reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023; + } + + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y), + dst_pixels_y_c, 16384, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y), + dst_pixels_y_opt, 16384, kPixels); + } else if (has_ssse3) { + Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y), + dst_pixels_y_opt, 16384, kPixels); + } else { + Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y), + dst_pixels_y_opt, 16384, kPixels); + } + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} +#endif // HAS_CONVERT16TO8ROW_AVX2 + +TEST_F(LibYUVPlanarTest, Convert8To16Plane) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels); + align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_y_c, kPixels * 2); + + MemRandomize(src_pixels_y, kPixels); + memset(dst_pixels_y_opt, 0, kPixels * 2); + memset(dst_pixels_y_c, 1, kPixels * 2); + + MaskCpuFlags(disable_cpu_flags_); + Convert8To16Plane(src_pixels_y, benchmark_width_, + reinterpret_cast<uint16_t*>(dst_pixels_y_c), + benchmark_width_, 1024, benchmark_width_, + benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + Convert8To16Plane(src_pixels_y, benchmark_width_, + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), + benchmark_width_, 1024, benchmark_width_, + benchmark_height_); + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} + +// TODO(fbarchard): Improve test for more platforms. +#ifdef HAS_CONVERT8TO16ROW_AVX2 +TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) { + const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31; + align_buffer_page_end(src_pixels_y, kPixels); + align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_y_c, kPixels * 2); + + MemRandomize(src_pixels_y, kPixels); + memset(dst_pixels_y_opt, 0, kPixels * 2); + memset(dst_pixels_y_c, 1, kPixels * 2); + + Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c), + 1024, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + int has_sse2 = TestCpuFlag(kCpuHasSSE2); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + Convert8To16Row_AVX2(src_pixels_y, + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024, + kPixels); + } else if (has_sse2) { + Convert8To16Row_SSE2(src_pixels_y, + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024, + kPixels); + } else { + Convert8To16Row_C(src_pixels_y, + reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024, + kPixels); + } + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} +#endif // HAS_CONVERT8TO16ROW_AVX2 + +float TestScaleMaxSamples(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + float scale, + bool opt) { + int i, j; + float max_c, max_opt = 0.f; + // NEON does multiple of 8, so round count up + const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; + align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48); + uint8_t* dst_c = orig_y + kPixels * 4 + 16; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32; + + // Randomize works but may contain some denormals affecting performance. + // MemRandomize(orig_y, kPixels * 4); + // large values are problematic. audio is really -1 to 1. + for (i = 0; i < kPixels; ++i) { + (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f); + } + memset(dst_c, 0, kPixels * 4); + memset(dst_opt, 1, kPixels * 4); + + max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_c), scale, kPixels); + + for (j = 0; j < benchmark_iterations; j++) { + if (opt) { +#ifdef HAS_SCALESUMSAMPLES_NEON + max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_opt), scale, + kPixels); +#else + max_opt = + ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_opt), scale, kPixels); +#endif + } else { + max_opt = + ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_opt), scale, kPixels); + } + } + + float max_diff = FAbs(max_opt - max_c); + for (i = 0; i < kPixels; ++i) { + float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) - + (reinterpret_cast<float*>(dst_opt)[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) { + float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, false); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) { + float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, true); + EXPECT_EQ(0, diff); +} + +float TestScaleSumSamples(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + float scale, + bool opt) { + int i, j; + float sum_c, sum_opt = 0.f; + // NEON does multiple of 8, so round count up + const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; + align_buffer_page_end(orig_y, kPixels * 4 * 3); + uint8_t* dst_c = orig_y + kPixels * 4; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2; + + // Randomize works but may contain some denormals affecting performance. + // MemRandomize(orig_y, kPixels * 4); + // large values are problematic. audio is really -1 to 1. + for (i = 0; i < kPixels; ++i) { + (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f); + } + memset(dst_c, 0, kPixels * 4); + memset(dst_opt, 1, kPixels * 4); + + sum_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_c), scale, kPixels); + + for (j = 0; j < benchmark_iterations; j++) { + if (opt) { +#ifdef HAS_SCALESUMSAMPLES_NEON + sum_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_opt), scale, + kPixels); +#else + sum_opt = + ScaleSumSamples_C(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_opt), scale, kPixels); +#endif + } else { + sum_opt = + ScaleSumSamples_C(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_opt), scale, kPixels); + } + } + + float mse_opt = sum_opt / kPixels * 4; + float mse_c = sum_c / kPixels * 4; + float mse_error = FAbs(mse_opt - mse_c) / mse_c; + + // If the sum of a float is more than 4 million, small adds are round down on + // float and produce different results with vectorized sum vs scalar sum. + // Ignore the difference if the sum is large. + float max_diff = 0.f; + if (mse_error > 0.0001 && sum_c < 4000000) { // allow .01% difference of mse + max_diff = mse_error; + } + + for (i = 0; i < kPixels; ++i) { + float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) - + (reinterpret_cast<float*>(dst_opt)[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) { + float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, false); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) { + float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, true); + EXPECT_EQ(0, diff); +} + +float TestScaleSamples(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + float scale, + bool opt) { + int i, j; + // NEON does multiple of 8, so round count up + const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; + align_buffer_page_end(orig_y, kPixels * 4 * 3); + uint8_t* dst_c = orig_y + kPixels * 4; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2; + + // Randomize works but may contain some denormals affecting performance. + // MemRandomize(orig_y, kPixels * 4); + // large values are problematic. audio is really -1 to 1. + for (i = 0; i < kPixels; ++i) { + (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f); + } + memset(dst_c, 0, kPixels * 4); + memset(dst_opt, 1, kPixels * 4); + + ScaleSamples_C(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_c), scale, kPixels); + + for (j = 0; j < benchmark_iterations; j++) { + if (opt) { +#ifdef HAS_SCALESUMSAMPLES_NEON + ScaleSamples_NEON(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_opt), scale, kPixels); +#else + ScaleSamples_C(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_opt), scale, kPixels); +#endif + } else { + ScaleSamples_C(reinterpret_cast<float*>(orig_y), + reinterpret_cast<float*>(dst_opt), scale, kPixels); + } + } + + float max_diff = 0.f; + for (i = 0; i < kPixels; ++i) { + float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) - + (reinterpret_cast<float*>(dst_opt)[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestScaleSamples_C) { + float diff = TestScaleSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, false); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) { + float diff = TestScaleSamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, true); + EXPECT_EQ(0, diff); +} + +float TestCopySamples(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + bool opt) { + int i, j; + // NEON does multiple of 16 floats, so round count up + const int kPixels = (benchmark_width * benchmark_height + 15) & ~15; + align_buffer_page_end(orig_y, kPixels * 4 * 3); + uint8_t* dst_c = orig_y + kPixels * 4; + uint8_t* dst_opt = orig_y + kPixels * 4 * 2; + + // Randomize works but may contain some denormals affecting performance. + // MemRandomize(orig_y, kPixels * 4); + // large values are problematic. audio is really -1 to 1. + for (i = 0; i < kPixels; ++i) { + (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f); + } + memset(dst_c, 0, kPixels * 4); + memset(dst_opt, 1, kPixels * 4); + + memcpy(reinterpret_cast<void*>(dst_c), reinterpret_cast<void*>(orig_y), + kPixels * 4); + + for (j = 0; j < benchmark_iterations; j++) { + if (opt) { +#ifdef HAS_COPYROW_NEON + CopyRow_NEON(orig_y, dst_opt, kPixels * 4); +#else + CopyRow_C(orig_y, dst_opt, kPixels * 4); +#endif + } else { + CopyRow_C(orig_y, dst_opt, kPixels * 4); + } + } + + float max_diff = 0.f; + for (i = 0; i < kPixels; ++i) { + float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) - + (reinterpret_cast<float*>(dst_opt)[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestCopySamples_C) { + float diff = TestCopySamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, false); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) { + float diff = TestCopySamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, true); + EXPECT_EQ(0, diff); +} + +extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width); +extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width); + +TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) { + SIMD_ALIGNED(uint32_t orig_pixels[640 + 4]); + SIMD_ALIGNED(uint16_t dst_pixels_c[640]); + SIMD_ALIGNED(uint16_t dst_pixels_opt[640]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); + + for (int i = 0; i < 640 + 4; ++i) { + orig_pixels[i] = i * 256; + } + GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640); + for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && \ + (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640); + } else { + GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640); + } +#else + GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640); +#endif + } + + for (int i = 0; i < 640; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + EXPECT_EQ(dst_pixels_c[0], + static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1)); + EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256)); +} + +extern "C" void GaussCol_NEON(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, + int width); + +extern "C" void GaussCol_C(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, + int width); + +TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { + SIMD_ALIGNED(uint16_t orig_pixels[640 * 5]); + SIMD_ALIGNED(uint32_t dst_pixels_c[640]); + SIMD_ALIGNED(uint32_t dst_pixels_opt[640]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); + + for (int i = 0; i < 640 * 5; ++i) { + orig_pixels[i] = i; + } + GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2], + &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0], + 640); + for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && \ + (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2], + &orig_pixels[640 * 3], &orig_pixels[640 * 4], + &dst_pixels_opt[0], 640); + } else { + GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2], + &orig_pixels[640 * 3], &orig_pixels[640 * 4], + &dst_pixels_opt[0], 640); + } +#else + GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2], + &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_opt[0], + 640); +#endif + } + + for (int i = 0; i < 640; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + EXPECT_EQ(dst_pixels_c[0], + static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + 640 * 3 * 4 + + 640 * 4 * 1)); + EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704)); +} + +float TestFloatDivToByte(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + float scale, + bool opt) { + int i, j; + // NEON does multiple of 8, so round count up + const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; + align_buffer_page_end(src_weights, kPixels * 4); + align_buffer_page_end(src_values, kPixels * 4); + align_buffer_page_end(dst_out_c, kPixels); + align_buffer_page_end(dst_out_opt, kPixels); + align_buffer_page_end(dst_mask_c, kPixels); + align_buffer_page_end(dst_mask_opt, kPixels); + + // Randomize works but may contain some denormals affecting performance. + // MemRandomize(orig_y, kPixels * 4); + // large values are problematic. audio is really -1 to 1. + for (i = 0; i < kPixels; ++i) { + (reinterpret_cast<float*>(src_weights))[i] = scale; + (reinterpret_cast<float*>(src_values))[i] = + sinf(static_cast<float>(i) * 0.1f); + } + memset(dst_out_c, 0, kPixels); + memset(dst_out_opt, 1, kPixels); + memset(dst_mask_c, 2, kPixels); + memset(dst_mask_opt, 3, kPixels); + + FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights), + reinterpret_cast<float*>(src_values), dst_out_c, + dst_mask_c, kPixels); + + for (j = 0; j < benchmark_iterations; j++) { + if (opt) { +#ifdef HAS_FLOATDIVTOBYTEROW_NEON + FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights), + reinterpret_cast<float*>(src_values), dst_out_opt, + dst_mask_opt, kPixels); +#else + FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights), + reinterpret_cast<float*>(src_values), dst_out_opt, + dst_mask_opt, kPixels); +#endif + } else { + FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights), + reinterpret_cast<float*>(src_values), dst_out_opt, + dst_mask_opt, kPixels); + } + } + + uint8_t max_diff = 0; + for (i = 0; i < kPixels; ++i) { + uint8_t abs_diff = abs(dst_out_c[i] - dst_out_opt[i]) + + abs(dst_mask_c[i] - dst_mask_opt[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(src_weights); + free_aligned_buffer_page_end(src_values); + free_aligned_buffer_page_end(dst_out_c); + free_aligned_buffer_page_end(dst_out_opt); + free_aligned_buffer_page_end(dst_mask_c); + free_aligned_buffer_page_end(dst_mask_opt); + + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestFloatDivToByte_C) { + float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, false); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) { + float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, true); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, UVToVURow) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_vu, kPixels * 2); + align_buffer_page_end(dst_pixels_uv, kPixels * 2); + + MemRandomize(src_pixels_vu, kPixels * 2); + memset(dst_pixels_uv, 1, kPixels * 2); + + UVToVURow_C(src_pixels_vu, dst_pixels_uv, kPixels); + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]); + EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]); + } + + free_aligned_buffer_page_end(src_pixels_vu); + free_aligned_buffer_page_end(dst_pixels_uv); +} + } // namespace libyuv diff --git a/files/unit_test/rotate_test.cc b/files/unit_test/rotate_test.cc index d04b96e9..61941e63 100644 --- a/files/unit_test/rotate_test.cc +++ b/files/unit_test/rotate_test.cc @@ -135,6 +135,123 @@ TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) { benchmark_cpu_info_); } +static void I444TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i444_y_size = src_width * Abs(src_height); + int src_i444_uv_size = src_width * Abs(src_height); + int src_i444_size = src_i444_y_size + src_i444_uv_size * 2; + align_buffer_page_end(src_i444, src_i444_size); + for (int i = 0; i < src_i444_size; ++i) { + src_i444[i] = fastrand() & 0xff; + } + + int dst_i444_y_size = dst_width * dst_height; + int dst_i444_uv_size = dst_width * dst_height; + int dst_i444_size = dst_i444_y_size + dst_i444_uv_size * 2; + align_buffer_page_end(dst_i444_c, dst_i444_size); + align_buffer_page_end(dst_i444_opt, dst_i444_size); + memset(dst_i444_c, 2, dst_i444_size); + memset(dst_i444_opt, 3, dst_i444_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width, + src_i444 + src_i444_y_size + src_i444_uv_size, src_width, + dst_i444_c, dst_width, dst_i444_c + dst_i444_y_size, dst_width, + dst_i444_c + dst_i444_y_size + dst_i444_uv_size, dst_width, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width, + src_i444 + src_i444_y_size + src_i444_uv_size, src_width, + dst_i444_opt, dst_width, dst_i444_opt + dst_i444_y_size, + dst_width, dst_i444_opt + dst_i444_y_size + dst_i444_uv_size, + dst_width, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i444_size; ++i) { + EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]); + } + + free_aligned_buffer_page_end(dst_i444_c); + free_aligned_buffer_page_end(dst_i444_opt); + free_aligned_buffer_page_end(src_i444); +} + +TEST_F(LibYUVRotateTest, I444Rotate0_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate90_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate180_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate270_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +// TODO(fbarchard): Remove odd width tests. +// Odd width tests work but disabled because they use C code and can be +// tested by passing an odd width command line or environment variable. +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, kRotate0, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, kRotate90, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, kRotate180, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, kRotate270, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + static void NV12TestRotate(int src_width, int src_height, int dst_width, diff --git a/files/unit_test/scale_argb_test.cc b/files/unit_test/scale_argb_test.cc index d11aec20..94aef60e 100644 --- a/files/unit_test/scale_argb_test.cc +++ b/files/unit_test/scale_argb_test.cc @@ -37,7 +37,7 @@ static int ARGBTestFilter(int src_width, int i, j; const int b = 0; // 128 to test for padding/stride. - int64 src_argb_plane_size = + int64_t src_argb_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4LL; int src_stride_argb = (b * 2 + Abs(src_width)) * 4; @@ -48,7 +48,8 @@ static int ARGBTestFilter(int src_width, } MemRandomize(src_argb, src_argb_plane_size); - int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4LL; + int64_t dst_argb_plane_size = + (dst_width + b * 2) * (dst_height + b * 2) * 4LL; int dst_stride_argb = (b * 2 + dst_width) * 4; align_buffer_page_end(dst_argb_c, dst_argb_plane_size); @@ -116,11 +117,11 @@ static int ARGBTestFilter(int src_width, static const int kTileX = 8; static const int kTileY = 8; -static int TileARGBScale(const uint8* src_argb, +static int TileARGBScale(const uint8_t* src_argb, int src_stride_argb, int src_width, int src_height, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, int dst_width, int dst_height, @@ -157,7 +158,7 @@ static int ARGBClipTestFilter(int src_width, } const int b = 128; - int64 src_argb_plane_size = + int64_t src_argb_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4; int src_stride_argb = (b * 2 + Abs(src_width)) * 4; @@ -168,7 +169,7 @@ static int ARGBClipTestFilter(int src_width, } memset(src_argb, 1, src_argb_plane_size); - int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; + int64_t dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; int dst_stride_argb = (b * 2 + dst_width) * 4; int i, j; @@ -302,27 +303,28 @@ TEST_FACTOR(3, 1, 3) TEST_SCALETO(ARGBScale, 1, 1) TEST_SCALETO(ARGBScale, 320, 240) -TEST_SCALETO(ARGBScale, 352, 288) TEST_SCALETO(ARGBScale, 569, 480) TEST_SCALETO(ARGBScale, 640, 360) TEST_SCALETO(ARGBScale, 1280, 720) +TEST_SCALETO(ARGBScale, 1920, 1080) #undef TEST_SCALETO1 #undef TEST_SCALETO // Scale with YUV conversion to ARGB and clipping. +// TODO(fbarchard): Add fourcc support. All 4 ARGB formats is easy to support. LIBYUV_API -int YUVToARGBScaleReference2(const uint8* src_y, +int YUVToARGBScaleReference2(const uint8_t* src_y, int src_stride_y, - const uint8* src_u, + const uint8_t* src_u, int src_stride_u, - const uint8* src_v, + const uint8_t* src_v, int src_stride_v, - uint32 /* src_fourcc */, // TODO: Add support. + uint32_t /* src_fourcc */, int src_width, int src_height, - uint8* dst_argb, + uint8_t* dst_argb, int dst_stride_argb, - uint32 /* dst_fourcc */, // TODO: Add support. + uint32_t /* dst_fourcc */, int dst_width, int dst_height, int clip_x, @@ -330,7 +332,8 @@ int YUVToARGBScaleReference2(const uint8* src_y, int clip_width, int clip_height, enum FilterMode filtering) { - uint8* argb_buffer = static_cast<uint8*>(malloc(src_width * src_height * 4)); + uint8_t* argb_buffer = + static_cast<uint8_t*>(malloc(src_width * src_height * 4)); int r; I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, argb_buffer, src_width * 4, src_width, src_height); @@ -342,7 +345,12 @@ int YUVToARGBScaleReference2(const uint8* src_y, return r; } -static void FillRamp(uint8* buf, int width, int height, int v, int dx, int dy) { +static void FillRamp(uint8_t* buf, + int width, + int height, + int v, + int dx, + int dy) { int rv = v; for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { @@ -369,8 +377,8 @@ static int YUVToARGBTestFilter(int src_width, int dst_height, FilterMode f, int benchmark_iterations) { - int64 src_y_plane_size = Abs(src_width) * Abs(src_height); - int64 src_uv_plane_size = + int64_t src_y_plane_size = Abs(src_width) * Abs(src_height); + int64_t src_uv_plane_size = ((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2); int src_stride_y = Abs(src_width); int src_stride_uv = (Abs(src_width) + 1) / 2; @@ -379,7 +387,7 @@ static int YUVToARGBTestFilter(int src_width, align_buffer_page_end(src_u, src_uv_plane_size); align_buffer_page_end(src_v, src_uv_plane_size); - int64 dst_argb_plane_size = (dst_width) * (dst_height)*4LL; + int64_t dst_argb_plane_size = (dst_width) * (dst_height)*4LL; int dst_stride_argb = (dst_width)*4; align_buffer_page_end(dst_argb_c, dst_argb_plane_size); align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); diff --git a/files/unit_test/scale_test.cc b/files/unit_test/scale_test.cc index 0b4ec30b..811b2d04 100644 --- a/files/unit_test/scale_test.cc +++ b/files/unit_test/scale_test.cc @@ -14,6 +14,7 @@ #include "../unit_test/unit_test.h" #include "libyuv/cpu_id.h" #include "libyuv/scale.h" +#include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C #define STRINGIZE(line) #line #define FILELINESTR(file, line) file ":" STRINGIZE(line) @@ -21,32 +22,32 @@ namespace libyuv { // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. -static int TestFilter(int src_width, - int src_height, - int dst_width, - int dst_height, - FilterMode f, - int benchmark_iterations, - int disable_cpu_flags, - int benchmark_cpu_info) { +static int I420TestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { if (!SizeValid(src_width, src_height, dst_width, dst_height)) { return 0; } int i, j; - const int b = 0; // 128 to test for padding/stride. int src_width_uv = (Abs(src_width) + 1) >> 1; int src_height_uv = (Abs(src_height) + 1) >> 1; - int64 src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2); - int64 src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2); + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); - int src_stride_y = b * 2 + Abs(src_width); - int src_stride_uv = b * 2 + src_width_uv; + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; - align_buffer_page_end(src_y, src_y_plane_size) - align_buffer_page_end(src_u, src_uv_plane_size) align_buffer_page_end( - src_v, src_uv_plane_size) if (!src_y || !src_u || !src_v) { + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + if (!src_y || !src_u || !src_v) { printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); return 0; } @@ -57,60 +58,51 @@ static int TestFilter(int src_width, int dst_width_uv = (dst_width + 1) >> 1; int dst_height_uv = (dst_height + 1) >> 1; - int64 dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2); - int64 dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2); - - int dst_stride_y = b * 2 + dst_width; - int dst_stride_uv = b * 2 + dst_width_uv; - - align_buffer_page_end(dst_y_c, dst_y_plane_size) - align_buffer_page_end(dst_u_c, dst_uv_plane_size) - align_buffer_page_end(dst_v_c, dst_uv_plane_size) - align_buffer_page_end(dst_y_opt, dst_y_plane_size) - align_buffer_page_end(dst_u_opt, dst_uv_plane_size) - align_buffer_page_end( - dst_v_opt, - dst_uv_plane_size) if (!dst_y_c || !dst_u_c || - !dst_v_c || !dst_y_opt || - !dst_u_opt || !dst_v_opt) { + int64_t dst_y_plane_size = (dst_width) * (dst_height); + int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_c, dst_y_plane_size); + align_buffer_page_end(dst_u_c, dst_uv_plane_size); + align_buffer_page_end(dst_v_c, dst_uv_plane_size); + align_buffer_page_end(dst_y_opt, dst_y_plane_size); + align_buffer_page_end(dst_u_opt, dst_uv_plane_size); + align_buffer_page_end(dst_v_opt, dst_uv_plane_size); + if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt || + !dst_v_opt) { printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); return 0; } MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. double c_time = get_time(); - I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, - src_u + (src_stride_uv * b) + b, src_stride_uv, - src_v + (src_stride_uv * b) + b, src_stride_uv, src_width, - src_height, dst_y_c + (dst_stride_y * b) + b, dst_stride_y, - dst_u_c + (dst_stride_uv * b) + b, dst_stride_uv, - dst_v_c + (dst_stride_uv * b) + b, dst_stride_uv, dst_width, - dst_height, f); + I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_c, dst_stride_y, dst_u_c, + dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f); c_time = (get_time() - c_time); MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. double opt_time = get_time(); for (i = 0; i < benchmark_iterations; ++i) { - I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, - src_u + (src_stride_uv * b) + b, src_stride_uv, - src_v + (src_stride_uv * b) + b, src_stride_uv, src_width, - src_height, dst_y_opt + (dst_stride_y * b) + b, dst_stride_y, - dst_u_opt + (dst_stride_uv * b) + b, dst_stride_uv, - dst_v_opt + (dst_stride_uv * b) + b, dst_stride_uv, dst_width, - dst_height, f); + I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt, + dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height, + f); } opt_time = (get_time() - opt_time) / benchmark_iterations; - // Report performance of C vs OPT + // Report performance of C vs OPT. printf("filter %d - %8d us C - %8d us OPT\n", f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); // C version may be a little off from the optimized. Order of // operations may introduce rounding somewhere. So do a difference - // of the buffers and look to see that the max difference isn't - // over 2. + // of the buffers and look to see that the max difference is not + // over 3. int max_diff = 0; - for (i = b; i < (dst_height + b); ++i) { - for (j = b; j < (dst_width + b); ++j) { + for (i = 0; i < (dst_height); ++i) { + for (j = 0; j < (dst_width); ++j) { int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] - dst_y_opt[(i * dst_stride_y) + j]); if (abs_diff > max_diff) { @@ -119,8 +111,8 @@ static int TestFilter(int src_width, } } - for (i = b; i < (dst_height_uv + b); ++i) { - for (j = b; j < (dst_width_uv + b); ++j) { + for (i = 0; i < (dst_height_uv); ++i) { + for (j = 0; j < (dst_width_uv); ++j) { int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] - dst_u_opt[(i * dst_stride_uv) + j]); if (abs_diff > max_diff) { @@ -134,170 +126,408 @@ static int TestFilter(int src_width, } } - free_aligned_buffer_page_end(dst_y_c) free_aligned_buffer_page_end(dst_u_c) - free_aligned_buffer_page_end(dst_v_c) - free_aligned_buffer_page_end(dst_y_opt) - free_aligned_buffer_page_end(dst_u_opt) - free_aligned_buffer_page_end(dst_v_opt) - - free_aligned_buffer_page_end(src_y) - free_aligned_buffer_page_end(src_u) - free_aligned_buffer_page_end(src_v) - - return max_diff; + free_aligned_buffer_page_end(dst_y_c); + free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_y_opt); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_opt); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + + return max_diff; } // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference. // 0 = exact. -static int TestFilter_16(int src_width, - int src_height, - int dst_width, - int dst_height, - FilterMode f, - int benchmark_iterations) { +static int I420TestFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { if (!SizeValid(src_width, src_height, dst_width, dst_height)) { return 0; } - int i, j; - const int b = 0; // 128 to test for padding/stride. + int i; int src_width_uv = (Abs(src_width) + 1) >> 1; int src_height_uv = (Abs(src_height) + 1) >> 1; - int64 src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2); - int64 src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2); + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); - int src_stride_y = b * 2 + Abs(src_width); - int src_stride_uv = b * 2 + src_width_uv; + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; - align_buffer_page_end(src_y, src_y_plane_size) align_buffer_page_end( - src_u, src_uv_plane_size) align_buffer_page_end(src_v, src_uv_plane_size) - align_buffer_page_end(src_y_16, src_y_plane_size * 2) - align_buffer_page_end(src_u_16, src_uv_plane_size * 2) - align_buffer_page_end(src_v_16, src_uv_plane_size * 2) - uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16); - uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16); - uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16); + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + align_buffer_page_end(src_y_16, src_y_plane_size * 2); + align_buffer_page_end(src_u_16, src_uv_plane_size * 2); + align_buffer_page_end(src_v_16, src_uv_plane_size * 2); + if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); + uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16); + uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16); MemRandomize(src_y, src_y_plane_size); MemRandomize(src_u, src_uv_plane_size); MemRandomize(src_v, src_uv_plane_size); - for (i = b; i < src_height + b; ++i) { - for (j = b; j < src_width + b; ++j) { - p_src_y_16[(i * src_stride_y) + j] = src_y[(i * src_stride_y) + j]; - } + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_16[i] = src_y[i]; } - - for (i = b; i < (src_height_uv + b); ++i) { - for (j = b; j < (src_width_uv + b); ++j) { - p_src_u_16[(i * src_stride_uv) + j] = src_u[(i * src_stride_uv) + j]; - p_src_v_16[(i * src_stride_uv) + j] = src_v[(i * src_stride_uv) + j]; - } + for (i = 0; i < src_uv_plane_size; ++i) { + p_src_u_16[i] = src_u[i]; + p_src_v_16[i] = src_v[i]; } int dst_width_uv = (dst_width + 1) >> 1; int dst_height_uv = (dst_height + 1) >> 1; - int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2); - int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2); + int dst_y_plane_size = (dst_width) * (dst_height); + int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; - int dst_stride_y = b * 2 + dst_width; - int dst_stride_uv = b * 2 + dst_width_uv; + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_u_8, dst_uv_plane_size); + align_buffer_page_end(dst_v_8, dst_uv_plane_size); + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); + align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2); + align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2); - align_buffer_page_end(dst_y_8, dst_y_plane_size) - align_buffer_page_end(dst_u_8, dst_uv_plane_size) - align_buffer_page_end(dst_v_8, dst_uv_plane_size) - align_buffer_page_end(dst_y_16, dst_y_plane_size * 2) - align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2) - align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2) + uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); + uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16); + uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16); - uint16* p_dst_y_16 = - reinterpret_cast<uint16*>(dst_y_16); - uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16); - uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16); + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_8, dst_stride_y, dst_u_8, + dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (i = 0; i < benchmark_iterations; ++i) { + I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv, + p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16, + dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16, + dst_stride_uv, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + for (i = 0; i < dst_uv_plane_size; ++i) { + int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_u_8); + free_aligned_buffer_page_end(dst_v_8); + free_aligned_buffer_page_end(dst_y_16); + free_aligned_buffer_page_end(dst_u_16); + free_aligned_buffer_page_end(dst_v_16); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + free_aligned_buffer_page_end(src_y_16); + free_aligned_buffer_page_end(src_u_16); + free_aligned_buffer_page_end(src_v_16); + + return max_diff; +} - I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, - src_u + (src_stride_uv * b) + b, src_stride_uv, - src_v + (src_stride_uv * b) + b, src_stride_uv, src_width, - src_height, dst_y_8 + (dst_stride_y * b) + b, dst_stride_y, - dst_u_8 + (dst_stride_uv * b) + b, dst_stride_uv, - dst_v_8 + (dst_stride_uv * b) + b, dst_stride_uv, dst_width, - dst_height, f); +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int I444TestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i, j; + int src_width_uv = Abs(src_width); + int src_height_uv = Abs(src_height); + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + if (!src_y || !src_u || !src_v) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + int dst_width_uv = dst_width; + int dst_height_uv = dst_height; + + int64_t dst_y_plane_size = (dst_width) * (dst_height); + int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_c, dst_y_plane_size); + align_buffer_page_end(dst_u_c, dst_uv_plane_size); + align_buffer_page_end(dst_v_c, dst_uv_plane_size); + align_buffer_page_end(dst_y_opt, dst_y_plane_size); + align_buffer_page_end(dst_u_opt, dst_uv_plane_size); + align_buffer_page_end(dst_v_opt, dst_uv_plane_size); + if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt || + !dst_v_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + double c_time = get_time(); + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_c, dst_stride_y, dst_u_c, + dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f); + c_time = (get_time() - c_time); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + double opt_time = get_time(); for (i = 0; i < benchmark_iterations; ++i) { - I420Scale_16(p_src_y_16 + (src_stride_y * b) + b, src_stride_y, - p_src_u_16 + (src_stride_uv * b) + b, src_stride_uv, - p_src_v_16 + (src_stride_uv * b) + b, src_stride_uv, src_width, - src_height, p_dst_y_16 + (dst_stride_y * b) + b, dst_stride_y, - p_dst_u_16 + (dst_stride_uv * b) + b, dst_stride_uv, - p_dst_v_16 + (dst_stride_uv * b) + b, dst_stride_uv, dst_width, - dst_height, f); + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt, + dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height, + f); } + opt_time = (get_time() - opt_time) / benchmark_iterations; + // Report performance of C vs OPT. + printf("filter %d - %8d us C - %8d us OPT\n", f, + static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); - // Expect an exact match + // C version may be a little off from the optimized. Order of + // operations may introduce rounding somewhere. So do a difference + // of the buffers and look to see that the max difference is not + // over 3. int max_diff = 0; - for (i = b; i < (dst_height + b); ++i) { - for (j = b; j < (dst_width + b); ++j) { - int abs_diff = Abs(dst_y_8[(i * dst_stride_y) + j] - - p_dst_y_16[(i * dst_stride_y) + j]); + for (i = 0; i < (dst_height); ++i) { + for (j = 0; j < (dst_width); ++j) { + int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] - + dst_y_opt[(i * dst_stride_y) + j]); if (abs_diff > max_diff) { max_diff = abs_diff; } } } - for (i = b; i < (dst_height_uv + b); ++i) { - for (j = b; j < (dst_width_uv + b); ++j) { - int abs_diff = Abs(dst_u_8[(i * dst_stride_uv) + j] - - p_dst_u_16[(i * dst_stride_uv) + j]); + for (i = 0; i < (dst_height_uv); ++i) { + for (j = 0; j < (dst_width_uv); ++j) { + int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] - + dst_u_opt[(i * dst_stride_uv) + j]); if (abs_diff > max_diff) { max_diff = abs_diff; } - abs_diff = Abs(dst_v_8[(i * dst_stride_uv) + j] - - p_dst_v_16[(i * dst_stride_uv) + j]); + abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] - + dst_v_opt[(i * dst_stride_uv) + j]); if (abs_diff > max_diff) { max_diff = abs_diff; } } } - free_aligned_buffer_page_end(dst_y_8) free_aligned_buffer_page_end(dst_u_8) - free_aligned_buffer_page_end(dst_v_8) - free_aligned_buffer_page_end(dst_y_16) - free_aligned_buffer_page_end(dst_u_16) - free_aligned_buffer_page_end(dst_v_16) + free_aligned_buffer_page_end(dst_y_c); + free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_y_opt); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_opt); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + + return max_diff; +} + +// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference. +// 0 = exact. +static int I444TestFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int src_width_uv = Abs(src_width); + int src_height_uv = Abs(src_height); + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; - free_aligned_buffer_page_end(src_y) - free_aligned_buffer_page_end(src_u) - free_aligned_buffer_page_end(src_v) - free_aligned_buffer_page_end(src_y_16) - free_aligned_buffer_page_end(src_u_16) - free_aligned_buffer_page_end(src_v_16) + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + align_buffer_page_end(src_y_16, src_y_plane_size * 2); + align_buffer_page_end(src_u_16, src_uv_plane_size * 2); + align_buffer_page_end(src_v_16, src_uv_plane_size * 2); + if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); + uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16); + uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16); + + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_16[i] = src_y[i]; + } + for (i = 0; i < src_uv_plane_size; ++i) { + p_src_u_16[i] = src_u[i]; + p_src_v_16[i] = src_v[i]; + } + + int dst_width_uv = dst_width; + int dst_height_uv = dst_height; + + int dst_y_plane_size = (dst_width) * (dst_height); + int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_u_8, dst_uv_plane_size); + align_buffer_page_end(dst_v_8, dst_uv_plane_size); + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); + align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2); + align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2); + + uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); + uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16); + uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_8, dst_stride_y, dst_u_8, + dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (i = 0; i < benchmark_iterations; ++i) { + I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv, + p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16, + dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16, + dst_stride_uv, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + for (i = 0; i < dst_uv_plane_size; ++i) { + int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } - return max_diff; + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_u_8); + free_aligned_buffer_page_end(dst_v_8); + free_aligned_buffer_page_end(dst_y_16); + free_aligned_buffer_page_end(dst_u_16); + free_aligned_buffer_page_end(dst_v_16); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + free_aligned_buffer_page_end(src_y_16); + free_aligned_buffer_page_end(src_u_16); + free_aligned_buffer_page_end(src_v_16); + + return max_diff; } // The following adjustments in dimensions ensure the scale factor will be // exactly achieved. -// 2 is chroma subsample +// 2 is chroma subsample. #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2) #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ - TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \ - int diff = TestFilter( \ + TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \ + int diff = I420TestFilter( \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, DISABLED_ScaleDownBy##name##_##filter##_16) { \ - int diff = TestFilter_16( \ + TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \ + int diff = I444TestFilter( \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ - kFilter##filter, benchmark_iterations_); \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \ + int diff = I420TestFilter_16( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \ + int diff = I444TestFilter_16( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } @@ -321,47 +551,354 @@ TEST_FACTOR(3, 1, 3, 0) #undef DX #define TEST_SCALETO1(name, width, height, filter, max_diff) \ - TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \ - int diff = TestFilter(benchmark_width_, benchmark_height_, width, height, \ - kFilter##filter, benchmark_iterations_, \ - disable_cpu_flags_, benchmark_cpu_info_); \ + TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \ + int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \ + int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \ + int diff = I420TestFilter_16( \ + benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \ + int diff = I444TestFilter_16( \ + benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \ + int diff = I420TestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ - int diff = TestFilter(width, height, Abs(benchmark_width_), \ - Abs(benchmark_height_), kFilter##filter, \ - benchmark_iterations_, disable_cpu_flags_, \ - benchmark_cpu_info_); \ + TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \ + int diff = I444TestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ - DISABLED_##name##To##width##x##height##_##filter##_16) { \ - int diff = TestFilter_16(benchmark_width_, benchmark_height_, width, \ - height, kFilter##filter, benchmark_iterations_); \ + I420##name##From##width##x##height##_##filter##_16) { \ + int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ - DISABLED_##name##From##width##x##height##_##filter##_16) { \ - int diff = TestFilter_16(width, height, Abs(benchmark_width_), \ - Abs(benchmark_height_), kFilter##filter, \ - benchmark_iterations_); \ + I444##name##From##width##x##height##_##filter##_16) { \ + int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } // Test scale to a specified size with all 4 filters. #define TEST_SCALETO(name, width, height) \ TEST_SCALETO1(name, width, height, None, 0) \ - TEST_SCALETO1(name, width, height, Linear, 0) \ - TEST_SCALETO1(name, width, height, Bilinear, 0) \ - TEST_SCALETO1(name, width, height, Box, 0) + TEST_SCALETO1(name, width, height, Linear, 3) \ + TEST_SCALETO1(name, width, height, Bilinear, 3) \ + TEST_SCALETO1(name, width, height, Box, 3) TEST_SCALETO(Scale, 1, 1) TEST_SCALETO(Scale, 320, 240) -TEST_SCALETO(Scale, 352, 288) TEST_SCALETO(Scale, 569, 480) TEST_SCALETO(Scale, 640, 360) TEST_SCALETO(Scale, 1280, 720) +TEST_SCALETO(Scale, 1920, 1080) #undef TEST_SCALETO1 #undef TEST_SCALETO +#ifdef HAS_SCALEROWDOWN2_SSSE3 +TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { + SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[64]); + SIMD_ALIGNED(uint8_t dst_pixels_c[64]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt)); + memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); + + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + if (!has_ssse3) { + printf("Warning SSSE3 not detected; Skipping test.\n"); + } else { + // TL. + orig_pixels[0] = 255u; + orig_pixels[1] = 0u; + orig_pixels[128 + 0] = 0u; + orig_pixels[128 + 1] = 0u; + // TR. + orig_pixels[2] = 0u; + orig_pixels[3] = 100u; + orig_pixels[128 + 2] = 0u; + orig_pixels[128 + 3] = 0u; + // BL. + orig_pixels[4] = 0u; + orig_pixels[5] = 0u; + orig_pixels[128 + 4] = 50u; + orig_pixels[128 + 5] = 0u; + // BR. + orig_pixels[6] = 0u; + orig_pixels[7] = 0u; + orig_pixels[128 + 6] = 0u; + orig_pixels[128 + 7] = 20u; + // Odd. + orig_pixels[126] = 4u; + orig_pixels[127] = 255u; + orig_pixels[128 + 126] = 16u; + orig_pixels[128 + 127] = 255u; + + // Test regular half size. + ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(133u, dst_pixels_c[63]); + + // Test Odd width version - Last pixel is just 1 horizontal pixel. + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(10u, dst_pixels_c[63]); + + // Test one pixel less, should skip the last pixel. + memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(0u, dst_pixels_c[63]); + + // Test regular half size SSSE3. + ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); + + EXPECT_EQ(64u, dst_pixels_opt[0]); + EXPECT_EQ(25u, dst_pixels_opt[1]); + EXPECT_EQ(13u, dst_pixels_opt[2]); + EXPECT_EQ(5u, dst_pixels_opt[3]); + EXPECT_EQ(0u, dst_pixels_opt[4]); + EXPECT_EQ(133u, dst_pixels_opt[63]); + + // Compare C and SSSE3 match. + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); + ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); + for (int i = 0; i < 64; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + } +} +#endif // HAS_SCALEROWDOWN2_SSSE3 + +extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); + +TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { + SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun. + SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); + SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt)); + memset(dst_pixels_c, 2, sizeof(dst_pixels_c)); + + for (int i = 0; i < 640 * 2 + 1; ++i) { + orig_pixels[i] = i; + } + ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); + } else { + ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); + } +#elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) + int has_mmi = TestCpuFlag(kCpuHasMMI); + if (has_mmi) { + ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); + } else { + ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); + } +#else + ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); +#endif + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16); + EXPECT_EQ(dst_pixels_c[1279], 800); +} + +extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); + +TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { + SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]); + SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); + SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); + + for (int i = 0; i < 2560 * 2; ++i) { + orig_pixels[i] = i; + } + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); + } else { + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); + } +#else + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); +#endif + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4); + EXPECT_EQ(dst_pixels_c[1279], 3839); +} + +// Test scaling plane with 8 bit C vs 16 bit C and return maximum pixel +// difference. +// 0 = exact. +static int TestPlaneFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int src_stride_y = Abs(src_width); + int dst_y_plane_size = dst_width * dst_height; + int dst_stride_y = dst_width; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_y_16, src_y_plane_size * 2); + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); + uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); + uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); + + MemRandomize(src_y, src_y_plane_size); + memset(dst_y_8, 0, dst_y_plane_size); + memset(dst_y_16, 1, dst_y_plane_size * 2); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_16[i] = src_y[i] & 255; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y, + dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + + for (i = 0; i < benchmark_iterations; ++i) { + ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16, + dst_stride_y, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_y_16); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_y_16); + + return max_diff; +} + +// The following adjustments in dimensions ensure the scale factor will be +// exactly achieved. +// 2 is chroma subsample. +#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2) +#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) + +#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ + TEST_F(LibYUVScaleTest, ScalePlaneDownBy##name##_##filter##_16) { \ + int diff = TestPlaneFilter_16( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but +// filtering is different fixed point implementations for SSSE3, Neon and C. +#define TEST_FACTOR(name, nom, denom, boxdiff) \ + TEST_FACTOR1(name, None, nom, denom, 0) \ + TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \ + TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \ + TEST_FACTOR1(name, Box, nom, denom, boxdiff) + +TEST_FACTOR(2, 1, 2, 0) +TEST_FACTOR(4, 1, 4, 0) +TEST_FACTOR(8, 1, 8, 0) +TEST_FACTOR(3by4, 3, 4, 1) +TEST_FACTOR(3by8, 3, 8, 1) +TEST_FACTOR(3, 1, 3, 0) +#undef TEST_FACTOR1 +#undef TEST_FACTOR +#undef SX +#undef DX } // namespace libyuv diff --git a/files/unit_test/testdata/juno.txt b/files/unit_test/testdata/juno.txt index c275be74..dd465272 100644 --- a/files/unit_test/testdata/juno.txt +++ b/files/unit_test/testdata/juno.txt @@ -1,15 +1,15 @@ -Processor : AArch64 Processor rev 0 (aarch64)
-processor : 0
-processor : 1
-processor : 2
-processor : 3
-processor : 4
-processor : 5
-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
-CPU implementer : 0x41
-CPU architecture: AArch64
-CPU variant : 0x0
-CPU part : 0xd07
-CPU revision : 0
-
-Hardware : Juno
+Processor : AArch64 Processor rev 0 (aarch64) +processor : 0 +processor : 1 +processor : 2 +processor : 3 +processor : 4 +processor : 5 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 +CPU implementer : 0x41 +CPU architecture: AArch64 +CPU variant : 0x0 +CPU part : 0xd07 +CPU revision : 0 + +Hardware : Juno diff --git a/files/unit_test/testdata/test0.jpg b/files/unit_test/testdata/test0.jpg Binary files differnew file mode 100644 index 00000000..f4461a81 --- /dev/null +++ b/files/unit_test/testdata/test0.jpg diff --git a/files/unit_test/testdata/test1.jpg b/files/unit_test/testdata/test1.jpg Binary files differnew file mode 100644 index 00000000..a0210e9d --- /dev/null +++ b/files/unit_test/testdata/test1.jpg diff --git a/files/unit_test/testdata/test2.jpg b/files/unit_test/testdata/test2.jpg Binary files differnew file mode 100644 index 00000000..816ca767 --- /dev/null +++ b/files/unit_test/testdata/test2.jpg diff --git a/files/unit_test/testdata/test3.jpg b/files/unit_test/testdata/test3.jpg Binary files differnew file mode 100644 index 00000000..792d91dc --- /dev/null +++ b/files/unit_test/testdata/test3.jpg diff --git a/files/unit_test/testdata/test4.jpg b/files/unit_test/testdata/test4.jpg Binary files differnew file mode 100644 index 00000000..1ef41668 --- /dev/null +++ b/files/unit_test/testdata/test4.jpg diff --git a/files/unit_test/unit_test.cc b/files/unit_test/unit_test.cc index 55297e36..a1ae7ea3 100644 --- a/files/unit_test/unit_test.cc +++ b/files/unit_test/unit_test.cc @@ -17,10 +17,7 @@ #ifdef LIBYUV_USE_GFLAGS #include "gflags/gflags.h" #endif - -// Change this to 1000 for benchmarking. -// TODO(fbarchard): Add command line parsing to pass this as option. -#define BENCHMARK_ITERATIONS 1 +#include "libyuv/cpu_id.h" unsigned int fastrand_seed = 0xfb; @@ -34,19 +31,112 @@ DEFINE_int32(libyuv_cpu_info, "cpu flags for benchmark code. 1 = C, -1 = SIMD"); #else // Disable command line parameters if gflags disabled. -static const int32 FLAGS_libyuv_width = 0; -static const int32 FLAGS_libyuv_height = 0; -static const int32 FLAGS_libyuv_repeat = 0; -static const int32 FLAGS_libyuv_flags = 0; -static const int32 FLAGS_libyuv_cpu_info = 0; +static const int32_t FLAGS_libyuv_width = 0; +static const int32_t FLAGS_libyuv_height = 0; +static const int32_t FLAGS_libyuv_repeat = 0; +static const int32_t FLAGS_libyuv_flags = 0; +static const int32_t FLAGS_libyuv_cpu_info = 0; +#endif + +// Test environment variable for disabling CPU features. Any non-zero value +// to disable. Zero ignored to make it easy to set the variable on/off. +#if !defined(__native_client__) && !defined(_M_ARM) +static LIBYUV_BOOL TestEnv(const char* name) { + const char* var = getenv(name); + if (var) { + if (var[0] != '0') { + return LIBYUV_TRUE; + } + } + return LIBYUV_FALSE; +} +#else // nacl does not support getenv(). +static LIBYUV_BOOL TestEnv(const char*) { + return LIBYUV_FALSE; +} #endif +int TestCpuEnv(int cpu_info) { +#if defined(__arm__) || defined(__aarch64__) + if (TestEnv("LIBYUV_DISABLE_NEON")) { + cpu_info &= ~libyuv::kCpuHasNEON; + } +#endif +#if defined(__mips__) && defined(__linux__) + if (TestEnv("LIBYUV_DISABLE_MSA")) { + cpu_info &= ~libyuv::kCpuHasMSA; + } + if (TestEnv("LIBYUV_DISABLE_MMI")) { + cpu_info &= ~libyuv::kCpuHasMMI; + } +#endif +#if !defined(__pnacl__) && !defined(__CLR_VER) && \ + (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ + defined(_M_IX86)) + if (TestEnv("LIBYUV_DISABLE_X86")) { + cpu_info &= ~libyuv::kCpuHasX86; + } + if (TestEnv("LIBYUV_DISABLE_SSE2")) { + cpu_info &= ~libyuv::kCpuHasSSE2; + } + if (TestEnv("LIBYUV_DISABLE_SSSE3")) { + cpu_info &= ~libyuv::kCpuHasSSSE3; + } + if (TestEnv("LIBYUV_DISABLE_SSE41")) { + cpu_info &= ~libyuv::kCpuHasSSE41; + } + if (TestEnv("LIBYUV_DISABLE_SSE42")) { + cpu_info &= ~libyuv::kCpuHasSSE42; + } + if (TestEnv("LIBYUV_DISABLE_AVX")) { + cpu_info &= ~libyuv::kCpuHasAVX; + } + if (TestEnv("LIBYUV_DISABLE_AVX2")) { + cpu_info &= ~libyuv::kCpuHasAVX2; + } + if (TestEnv("LIBYUV_DISABLE_ERMS")) { + cpu_info &= ~libyuv::kCpuHasERMS; + } + if (TestEnv("LIBYUV_DISABLE_FMA3")) { + cpu_info &= ~libyuv::kCpuHasFMA3; + } + if (TestEnv("LIBYUV_DISABLE_F16C")) { + cpu_info &= ~libyuv::kCpuHasF16C; + } + if (TestEnv("LIBYUV_DISABLE_AVX512BW")) { + cpu_info &= ~libyuv::kCpuHasAVX512BW; + } + if (TestEnv("LIBYUV_DISABLE_AVX512VL")) { + cpu_info &= ~libyuv::kCpuHasAVX512VL; + } + if (TestEnv("LIBYUV_DISABLE_AVX512VBMI")) { + cpu_info &= ~libyuv::kCpuHasAVX512VBMI; + } + if (TestEnv("LIBYUV_DISABLE_AVX512VBMI2")) { + cpu_info &= ~libyuv::kCpuHasAVX512VBMI2; + } + if (TestEnv("LIBYUV_DISABLE_AVX512VBITALG")) { + cpu_info &= ~libyuv::kCpuHasAVX512VBITALG; + } + if (TestEnv("LIBYUV_DISABLE_AVX512VPOPCNTDQ")) { + cpu_info &= ~libyuv::kCpuHasAVX512VPOPCNTDQ; + } + if (TestEnv("LIBYUV_DISABLE_GFNI")) { + cpu_info &= ~libyuv::kCpuHasGFNI; + } +#endif + if (TestEnv("LIBYUV_DISABLE_ASM")) { + cpu_info = libyuv::kCpuInitialized; + } + return cpu_info; +} + // For quicker unittests, default is 128 x 72. But when benchmarking, // default to 720p. Allow size to specify. // Set flags to -1 for benchmarking to avoid slower C code. LibYUVConvertTest::LibYUVConvertTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -90,12 +180,9 @@ LibYUVConvertTest::LibYUVConvertTest() if (FLAGS_libyuv_cpu_info) { benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } - benchmark_pixels_div256_ = - static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * - static_cast<double>(Abs(benchmark_height_)) * - static_cast<double>(benchmark_iterations_) + - 255.0) / - 256.0); + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); benchmark_pixels_div1280_ = static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<double>(Abs(benchmark_height_)) * @@ -105,7 +192,7 @@ LibYUVConvertTest::LibYUVConvertTest() } LibYUVColorTest::LibYUVColorTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -149,12 +236,9 @@ LibYUVColorTest::LibYUVColorTest() if (FLAGS_libyuv_cpu_info) { benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } - benchmark_pixels_div256_ = - static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * - static_cast<double>(Abs(benchmark_height_)) * - static_cast<double>(benchmark_iterations_) + - 255.0) / - 256.0); + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); benchmark_pixels_div1280_ = static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<double>(Abs(benchmark_height_)) * @@ -164,7 +248,7 @@ LibYUVColorTest::LibYUVColorTest() } LibYUVScaleTest::LibYUVScaleTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -208,12 +292,9 @@ LibYUVScaleTest::LibYUVScaleTest() if (FLAGS_libyuv_cpu_info) { benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } - benchmark_pixels_div256_ = - static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * - static_cast<double>(Abs(benchmark_height_)) * - static_cast<double>(benchmark_iterations_) + - 255.0) / - 256.0); + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); benchmark_pixels_div1280_ = static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<double>(Abs(benchmark_height_)) * @@ -223,7 +304,7 @@ LibYUVScaleTest::LibYUVScaleTest() } LibYUVRotateTest::LibYUVRotateTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -267,12 +348,9 @@ LibYUVRotateTest::LibYUVRotateTest() if (FLAGS_libyuv_cpu_info) { benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } - benchmark_pixels_div256_ = - static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * - static_cast<double>(Abs(benchmark_height_)) * - static_cast<double>(benchmark_iterations_) + - 255.0) / - 256.0); + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); benchmark_pixels_div1280_ = static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<double>(Abs(benchmark_height_)) * @@ -282,7 +360,7 @@ LibYUVRotateTest::LibYUVRotateTest() } LibYUVPlanarTest::LibYUVPlanarTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -326,12 +404,9 @@ LibYUVPlanarTest::LibYUVPlanarTest() if (FLAGS_libyuv_cpu_info) { benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } - benchmark_pixels_div256_ = - static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * - static_cast<double>(Abs(benchmark_height_)) * - static_cast<double>(benchmark_iterations_) + - 255.0) / - 256.0); + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); benchmark_pixels_div1280_ = static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<double>(Abs(benchmark_height_)) * @@ -341,7 +416,7 @@ LibYUVPlanarTest::LibYUVPlanarTest() } LibYUVBaseTest::LibYUVBaseTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -385,12 +460,65 @@ LibYUVBaseTest::LibYUVBaseTest() if (FLAGS_libyuv_cpu_info) { benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } - benchmark_pixels_div256_ = + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); + benchmark_pixels_div1280_ = static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<double>(Abs(benchmark_height_)) * static_cast<double>(benchmark_iterations_) + - 255.0) / - 256.0); + 1279.0) / + 1280.0); +} + +LibYUVCompareTest::LibYUVCompareTest() + : benchmark_iterations_(1), + benchmark_width_(128), + benchmark_height_(72), + disable_cpu_flags_(1), + benchmark_cpu_info_(-1) { + const char* repeat = getenv("LIBYUV_REPEAT"); + if (repeat) { + benchmark_iterations_ = atoi(repeat); // NOLINT + } + if (FLAGS_libyuv_repeat) { + benchmark_iterations_ = FLAGS_libyuv_repeat; + } + if (benchmark_iterations_ > 1) { + benchmark_width_ = 1280; + benchmark_height_ = 720; + } + const char* width = getenv("LIBYUV_WIDTH"); + if (width) { + benchmark_width_ = atoi(width); // NOLINT + } + if (FLAGS_libyuv_width) { + benchmark_width_ = FLAGS_libyuv_width; + } + const char* height = getenv("LIBYUV_HEIGHT"); + if (height) { + benchmark_height_ = atoi(height); // NOLINT + } + if (FLAGS_libyuv_height) { + benchmark_height_ = FLAGS_libyuv_height; + } + const char* cpu_flags = getenv("LIBYUV_FLAGS"); + if (cpu_flags) { + disable_cpu_flags_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_flags) { + disable_cpu_flags_ = FLAGS_libyuv_flags; + } + const char* cpu_info = getenv("LIBYUV_CPU_INFO"); + if (cpu_info) { + benchmark_cpu_info_ = atoi(cpu_flags); // NOLINT + } + if (FLAGS_libyuv_cpu_info) { + benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; + } + disable_cpu_flags_ = TestCpuEnv(disable_cpu_flags_); + benchmark_cpu_info_ = TestCpuEnv(benchmark_cpu_info_); + libyuv::MaskCpuFlags(benchmark_cpu_info_); benchmark_pixels_div1280_ = static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<double>(Abs(benchmark_height_)) * diff --git a/files/unit_test/unit_test.h b/files/unit_test/unit_test.h index f7d60a76..87907fa1 100644 --- a/files/unit_test/unit_test.h +++ b/files/unit_test/unit_test.h @@ -36,6 +36,9 @@ static __inline int Abs(int v) { return v >= 0 ? v : -v; } +static __inline float FAbs(float v) { + return v >= 0 ? v : -v; +} #define OFFBY 0 // Scaling uses 16.16 fixed point to step thru the source image, so a @@ -66,17 +69,15 @@ static inline bool SizeValid(int src_width, return true; } -#define align_buffer_page_end(var, size) \ - uint8* var; \ - uint8* var##_mem; \ - var##_mem = reinterpret_cast<uint8*>(malloc(((size) + 4095 + 63) & ~4095)); \ - var = (uint8*)((intptr_t)(var##_mem + (((size) + 4095 + 63) & ~4095) - \ - (size)) & \ - ~63); +#define align_buffer_page_end(var, size) \ + uint8_t* var##_mem = \ + reinterpret_cast<uint8_t*>(malloc(((size) + 4095 + 63) & ~4095)); \ + uint8_t* var = reinterpret_cast<uint8_t*>( \ + (intptr_t)(var##_mem + (((size) + 4095 + 63) & ~4095) - (size)) & ~63) #define free_aligned_buffer_page_end(var) \ free(var##_mem); \ - var = 0; + var = 0 #ifdef WIN32 static inline double get_time() { @@ -110,10 +111,10 @@ inline int fastrand() { return static_cast<int>((fastrand_seed >> 16) & 0xffff); } -static inline void MemRandomize(uint8* dst, int64 len) { - int64 i; +static inline void MemRandomize(uint8_t* dst, int64_t len) { + int64_t i; for (i = 0; i < len - 1; i += 2) { - *reinterpret_cast<uint16*>(dst) = fastrand(); + *reinterpret_cast<uint16_t*>(dst) = fastrand(); dst += 2; } for (; i < len; ++i) { @@ -125,10 +126,9 @@ class LibYUVColorTest : public ::testing::Test { protected: LibYUVColorTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -138,10 +138,9 @@ class LibYUVConvertTest : public ::testing::Test { protected: LibYUVConvertTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -151,10 +150,9 @@ class LibYUVScaleTest : public ::testing::Test { protected: LibYUVScaleTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -164,10 +162,9 @@ class LibYUVRotateTest : public ::testing::Test { protected: LibYUVRotateTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -177,10 +174,9 @@ class LibYUVPlanarTest : public ::testing::Test { protected: LibYUVPlanarTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -190,10 +186,21 @@ class LibYUVBaseTest : public ::testing::Test { protected: LibYUVBaseTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. - int benchmark_pixels_div256_; // Total pixels to benchmark / 256. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. + int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. + int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. +}; + +class LibYUVCompareTest : public ::testing::Test { + protected: + LibYUVCompareTest(); + + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. diff --git a/files/unit_test/video_common_test.cc b/files/unit_test/video_common_test.cc index f16b6772..a84206a2 100644 --- a/files/unit_test/video_common_test.cc +++ b/files/unit_test/video_common_test.cc @@ -18,15 +18,12 @@ namespace libyuv { // Tests FourCC codes in video common, which are used for ConvertToI420(). -static bool TestValidChar(uint32 onecc) { - if ((onecc >= '0' && onecc <= '9') || (onecc >= 'A' && onecc <= 'Z') || - (onecc >= 'a' && onecc <= 'z') || (onecc == ' ') || (onecc == 0xff)) { - return true; - } - return false; +static bool TestValidChar(uint32_t onecc) { + return (onecc >= '0' && onecc <= '9') || (onecc >= 'A' && onecc <= 'Z') || + (onecc >= 'a' && onecc <= 'z') || (onecc == ' ') || (onecc == 0xff); } -static bool TestValidFourCC(uint32 fourcc, int bpp) { +static bool TestValidFourCC(uint32_t fourcc, int bpp) { if (!TestValidChar(fourcc & 0xff) || !TestValidChar((fourcc >> 8) & 0xff) || !TestValidChar((fourcc >> 16) & 0xff) || !TestValidChar((fourcc >> 24) & 0xff)) { @@ -39,23 +36,23 @@ static bool TestValidFourCC(uint32 fourcc, int bpp) { } TEST_F(LibYUVBaseTest, TestCanonicalFourCC) { - EXPECT_EQ(static_cast<uint32>(FOURCC_I420), CanonicalFourCC(FOURCC_IYUV)); - EXPECT_EQ(static_cast<uint32>(FOURCC_I420), CanonicalFourCC(FOURCC_YU12)); - EXPECT_EQ(static_cast<uint32>(FOURCC_I422), CanonicalFourCC(FOURCC_YU16)); - EXPECT_EQ(static_cast<uint32>(FOURCC_I444), CanonicalFourCC(FOURCC_YU24)); - EXPECT_EQ(static_cast<uint32>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUYV)); - EXPECT_EQ(static_cast<uint32>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUVS)); - EXPECT_EQ(static_cast<uint32>(FOURCC_UYVY), CanonicalFourCC(FOURCC_HDYC)); - EXPECT_EQ(static_cast<uint32>(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY)); - EXPECT_EQ(static_cast<uint32>(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG)); - EXPECT_EQ(static_cast<uint32>(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1)); - EXPECT_EQ(static_cast<uint32>(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3)); - EXPECT_EQ(static_cast<uint32>(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3)); - EXPECT_EQ(static_cast<uint32>(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32)); - EXPECT_EQ(static_cast<uint32>(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24)); - EXPECT_EQ(static_cast<uint32>(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555)); - EXPECT_EQ(static_cast<uint32>(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565)); - EXPECT_EQ(static_cast<uint32>(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_I420), CanonicalFourCC(FOURCC_IYUV)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_I420), CanonicalFourCC(FOURCC_YU12)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_I422), CanonicalFourCC(FOURCC_YU16)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_I444), CanonicalFourCC(FOURCC_YU24)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUYV)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUVS)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_UYVY), CanonicalFourCC(FOURCC_HDYC)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565)); + EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551)); } TEST_F(LibYUVBaseTest, TestFourCC) { @@ -73,12 +70,17 @@ TEST_F(LibYUVBaseTest, TestFourCC) { EXPECT_TRUE(TestValidFourCC(FOURCC_ARGB, FOURCC_BPP_ARGB)); EXPECT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA)); EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AB30, FOURCC_BPP_AB30)); EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG)); EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW)); EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA)); EXPECT_TRUE(TestValidFourCC(FOURCC_RGBP, FOURCC_BPP_RGBP)); EXPECT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO)); EXPECT_TRUE(TestValidFourCC(FOURCC_R444, FOURCC_BPP_R444)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H422, FOURCC_BPP_H422)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010)); EXPECT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG)); EXPECT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12)); EXPECT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16)); |