diff options
author | Frank Barchard <fbarchard@google.com> | 2016-11-07 17:37:23 -0800 |
---|---|---|
committer | Frank Barchard <fbarchard@google.com> | 2016-11-07 17:37:23 -0800 |
commit | e62309f2591d8b87acae5f4560ab9eeed8f91471 (patch) | |
tree | e3315107bf2f96276ecb00e6212ee518de60c489 /util | |
parent | f2c27dafa2950510ba767cd59937ddf5d1974937 (diff) | |
download | libyuv-e62309f2591d8b87acae5f4560ab9eeed8f91471.tar.gz |
clang-format libyuv
BUG=libyuv:654
R=kjellander@chromium.org
Review URL: https://codereview.chromium.org/2469353005 .
Diffstat (limited to 'util')
-rw-r--r-- | util/compare.cc | 10 | ||||
-rw-r--r-- | util/convert.cc | 141 | ||||
-rw-r--r-- | util/psnr.cc | 232 | ||||
-rw-r--r-- | util/psnr_main.cc | 234 | ||||
-rw-r--r-- | util/ssim.cc | 163 | ||||
-rw-r--r-- | util/ssim.h | 6 |
6 files changed, 395 insertions, 391 deletions
diff --git a/util/compare.cc b/util/compare.cc index c36c0fa5..ef0beefa 100644 --- a/util/compare.cc +++ b/util/compare.cc @@ -39,10 +39,12 @@ int main(int argc, char** argv) { int amt2 = 0; do { amt1 = static_cast<int>(fread(buf1, 1, kBlockSize, fin1)); - if (amt1 > 0) hash1 = libyuv::HashDjb2(buf1, amt1, hash1); + if (amt1 > 0) + hash1 = libyuv::HashDjb2(buf1, amt1, hash1); if (fin2) { amt2 = static_cast<int>(fread(buf2, 1, kBlockSize, fin2)); - if (amt2 > 0) hash2 = libyuv::HashDjb2(buf2, amt2, hash2); + if (amt2 > 0) + hash2 = libyuv::HashDjb2(buf2, amt2, hash2); int amt_min = (amt1 < amt2) ? amt1 : amt2; size_min += amt_min; sum_square_err += libyuv::ComputeSumSquareError(buf1, buf2, amt_min); @@ -52,8 +54,8 @@ int main(int argc, char** argv) { printf("hash1 %x", hash1); if (fin2) { printf(", hash2 %x", hash2); - double mse = static_cast<double>(sum_square_err) / - static_cast<double>(size_min); + double mse = + static_cast<double>(sum_square_err) / static_cast<double>(size_min); printf(", mse %.2f", mse); double psnr = libyuv::SumSquareErrorToPsnr(sum_square_err, size_min); printf(", psnr %.2f\n", psnr); diff --git a/util/convert.cc b/util/convert.cc index 5f071416..acaf43ad 100644 --- a/util/convert.cc +++ b/util/convert.cc @@ -29,13 +29,13 @@ bool verbose = false; bool attenuate = false; bool unattenuate = false; int image_width = 0, image_height = 0; // original width and height -int dst_width = 0, dst_height = 0; // new width and height +int dst_width = 0, dst_height = 0; // new width and height int fileindex_org = 0; // argv argument contains the original file name. int fileindex_rec = 0; // argv argument contains the reconstructed file name. -int num_rec = 0; // Number of reconstructed images. -int num_skip_org = 0; // Number of frames to skip in original. -int num_frames = 0; // Number of frames to convert. -int filter = 1; // Bilinear filter for scaling. +int num_rec = 0; // Number of reconstructed images. +int num_skip_org = 0; // Number of frames to skip in original. +int num_frames = 0; // Number of frames to convert. +int filter = 1; // Bilinear filter for scaling. static __inline uint32 Abs(int32 v) { return v >= 0 ? v : -v; @@ -48,8 +48,8 @@ bool ExtractResolutionFromFilename(const char* name, // Isolate the .width_height. section of the filename by searching for a // dot or underscore followed by a digit. for (int i = 0; name[i]; ++i) { - if ((name[i] == '.' || name[i] == '_') && - name[i + 1] >= '0' && name[i + 1] <= '9') { + if ((name[i] == '.' || name[i] == '_') && name[i + 1] >= '0' && + name[i + 1] <= '9') { int n = sscanf(name + i + 1, "%dx%d", width_ptr, height_ptr); // NOLINT if (2 == n) { return true; @@ -59,13 +59,14 @@ bool ExtractResolutionFromFilename(const char* name, return false; } -void PrintHelp(const char * program) { +void PrintHelp(const char* program) { printf("%s [-options] src_argb.raw dst_yuv.raw\n", program); - printf(" -s <width> <height> .... specify source resolution. " - "Optional if name contains\n" - " resolution (ie. " - "name.1920x800_24Hz_P420.yuv)\n" - " Negative value mirrors.\n"); + printf( + " -s <width> <height> .... specify source resolution. " + "Optional if name contains\n" + " resolution (ie. " + "name.1920x800_24Hz_P420.yuv)\n" + " Negative value mirrors.\n"); printf(" -d <width> <height> .... specify destination resolution.\n"); printf(" -f <filter> ............ 0 = point, 1 = bilinear (default).\n"); printf(" -skip <src_argb> ....... Number of frame to skip of src_argb\n"); @@ -78,7 +79,8 @@ void PrintHelp(const char * program) { } void ParseOptions(int argc, const char* argv[]) { - if (argc <= 1) PrintHelp(argv[0]); + if (argc <= 1) + PrintHelp(argv[0]); for (int c = 1; c < argc; ++c) { if (!strcmp(argv[c], "-v")) { verbose = true; @@ -89,17 +91,17 @@ void ParseOptions(int argc, const char* argv[]) { } else if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) { PrintHelp(argv[0]); } else if (!strcmp(argv[c], "-s") && c + 2 < argc) { - image_width = atoi(argv[++c]); // NOLINT - image_height = atoi(argv[++c]); // NOLINT + image_width = atoi(argv[++c]); // NOLINT + image_height = atoi(argv[++c]); // NOLINT } else if (!strcmp(argv[c], "-d") && c + 2 < argc) { - dst_width = atoi(argv[++c]); // NOLINT - dst_height = atoi(argv[++c]); // NOLINT + dst_width = atoi(argv[++c]); // NOLINT + dst_height = atoi(argv[++c]); // NOLINT } else if (!strcmp(argv[c], "-skip") && c + 1 < argc) { - num_skip_org = atoi(argv[++c]); // NOLINT + num_skip_org = atoi(argv[++c]); // NOLINT } else if (!strcmp(argv[c], "-frames") && c + 1 < argc) { - num_frames = atoi(argv[++c]); // NOLINT + num_frames = atoi(argv[++c]); // NOLINT } else if (!strcmp(argv[c], "-f") && c + 1 < argc) { - filter = atoi(argv[++c]); // NOLINT + filter = atoi(argv[++c]); // NOLINT } else if (argv[c][0] == '-') { fprintf(stderr, "Unknown option. %s\n", argv[c]); } else if (fileindex_org == 0) { @@ -127,11 +129,9 @@ void ParseOptions(int argc, const char* argv[]) { int org_width, org_height; int rec_width, rec_height; bool org_res_avail = ExtractResolutionFromFilename(argv[fileindex_org], - &org_width, - &org_height); + &org_width, &org_height); bool rec_res_avail = ExtractResolutionFromFilename(argv[fileindex_rec], - &rec_width, - &rec_height); + &rec_width, &rec_height); if (image_width == 0 || image_height == 0) { if (org_res_avail) { image_width = org_width; @@ -158,10 +158,14 @@ void ParseOptions(int argc, const char* argv[]) { static const int kTileX = 32; static const int kTileY = 32; -static int TileARGBScale(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, +static int TileARGBScale(const uint8* src_argb, + int src_stride_argb, + int src_width, + int src_height, + uint8* dst_argb, + int dst_stride_argb, + int dst_width, + int dst_height, libyuv::FilterMode filtering) { for (int y = 0; y < dst_height; y += kTileY) { for (int x = 0; x < dst_width; x += kTileX) { @@ -173,11 +177,10 @@ static int TileARGBScale(const uint8* src_argb, int src_stride_argb, if (y + clip_height > dst_height) { clip_height = dst_height - y; } - int r = libyuv::ARGBScaleClip(src_argb, src_stride_argb, - src_width, src_height, - dst_argb, dst_stride_argb, - dst_width, dst_height, - x, y, clip_width, clip_height, filtering); + int r = libyuv::ARGBScaleClip(src_argb, src_stride_argb, src_width, + src_height, dst_argb, dst_stride_argb, + dst_width, dst_height, x, y, clip_width, + clip_height, filtering); if (r) { return r; } @@ -197,8 +200,8 @@ int main(int argc, const char* argv[]) { } // Open all files to convert to - FILE** file_rec = new FILE* [num_rec]; - memset(file_rec, 0, num_rec * sizeof(FILE*)); // NOLINT + FILE** file_rec = new FILE*[num_rec]; + memset(file_rec, 0, num_rec * sizeof(FILE*)); // NOLINT for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { file_rec[cur_rec] = fopen(argv[fileindex_rec + cur_rec], "wb"); if (file_rec[cur_rec] == NULL) { @@ -222,8 +225,8 @@ int main(int argc, const char* argv[]) { // Input is YUV if (org_is_yuv) { const int y_size = Abs(image_width) * Abs(image_height); - const int uv_size = ((Abs(image_width) + 1) / 2) * - ((Abs(image_height) + 1) / 2); + const int uv_size = + ((Abs(image_width) + 1) / 2) * ((Abs(image_height) + 1) / 2); org_size = y_size + 2 * uv_size; // YUV original. } @@ -233,8 +236,8 @@ int main(int argc, const char* argv[]) { const size_t total_size = y_size + 2 * uv_size; #if defined(_MSC_VER) _fseeki64(file_org, - static_cast<__int64>(num_skip_org) * - static_cast<__int64>(org_size), SEEK_SET); + static_cast<__int64>(num_skip_org) * static_cast<__int64>(org_size), + SEEK_SET); #else fseek(file_org, num_skip_org * total_size, SEEK_SET); #endif @@ -256,18 +259,18 @@ int main(int argc, const char* argv[]) { } if (verbose) { - printf("Size: %dx%d to %dx%d\n", image_width, image_height, - dst_width, dst_height); + printf("Size: %dx%d to %dx%d\n", image_width, image_height, dst_width, + dst_height); } int number_of_frames; - for (number_of_frames = 0; ; ++number_of_frames) { + for (number_of_frames = 0;; ++number_of_frames) { if (num_frames && number_of_frames >= num_frames) break; // Load original YUV or ARGB frame. - size_t bytes_org = fread(ch_org, sizeof(uint8), - static_cast<size_t>(org_size), file_org); + size_t bytes_org = + fread(ch_org, sizeof(uint8), static_cast<size_t>(org_size), file_org); if (bytes_org < static_cast<size_t>(org_size)) break; @@ -290,22 +293,17 @@ int main(int argc, const char* argv[]) { int half_src_height = (src_height + 1) / 2; int half_dst_width = (dst_width + 1) / 2; int half_dst_height = (dst_height + 1) / 2; - I420Scale(ch_org, src_width, - ch_org + src_width * src_height, half_src_width, - ch_org + src_width * src_height + - half_src_width * half_src_height, half_src_width, - image_width, image_height, - ch_rec, dst_width, - ch_rec + dst_width * dst_height, half_dst_width, - ch_rec + dst_width * dst_height + - half_dst_width * half_dst_height, half_dst_width, - dst_width, dst_height, - static_cast<libyuv::FilterMode>(filter)); + I420Scale( + ch_org, src_width, ch_org + src_width * src_height, half_src_width, + ch_org + src_width * src_height + half_src_width * half_src_height, + half_src_width, image_width, image_height, ch_rec, dst_width, + ch_rec + dst_width * dst_height, half_dst_width, + ch_rec + dst_width * dst_height + half_dst_width * half_dst_height, + half_dst_width, dst_width, dst_height, + static_cast<libyuv::FilterMode>(filter)); } else { - TileARGBScale(ch_org, Abs(image_width) * 4, - image_width, image_height, - ch_dst, dst_width * 4, - dst_width, dst_height, + TileARGBScale(ch_org, Abs(image_width) * 4, image_width, image_height, + ch_dst, dst_width * 4, dst_width, dst_height, static_cast<libyuv::FilterMode>(filter)); } bool rec_is_yuv = strstr(argv[fileindex_rec + cur_rec], "_P420.") != NULL; @@ -321,25 +319,24 @@ int main(int argc, const char* argv[]) { if (!org_is_yuv && rec_is_yuv) { int half_width = (dst_width + 1) / 2; int half_height = (dst_height + 1) / 2; - libyuv::ARGBToI420(ch_dst, dst_width * 4, - ch_rec, dst_width, - ch_rec + dst_width * dst_height, half_width, - ch_rec + dst_width * dst_height + - half_width * half_height, half_width, - dst_width, dst_height); + libyuv::ARGBToI420( + ch_dst, dst_width * 4, ch_rec, dst_width, + ch_rec + dst_width * dst_height, half_width, + ch_rec + dst_width * dst_height + half_width * half_height, + half_width, dst_width, dst_height); } // Output YUV or ARGB frame. if (rec_is_yuv) { - size_t bytes_rec = fwrite(ch_rec, sizeof(uint8), - static_cast<size_t>(total_size), - file_rec[cur_rec]); + size_t bytes_rec = + fwrite(ch_rec, sizeof(uint8), static_cast<size_t>(total_size), + file_rec[cur_rec]); if (bytes_rec < static_cast<size_t>(total_size)) break; } else { - size_t bytes_rec = fwrite(ch_dst, sizeof(uint8), - static_cast<size_t>(dst_size), - file_rec[cur_rec]); + size_t bytes_rec = + fwrite(ch_dst, sizeof(uint8), static_cast<size_t>(dst_size), + file_rec[cur_rec]); if (bytes_rec < static_cast<size_t>(dst_size)) break; } diff --git a/util/psnr.cc b/util/psnr.cc index 52b04bd5..27f876c0 100644 --- a/util/psnr.cc +++ b/util/psnr.cc @@ -27,7 +27,7 @@ typedef unsigned __int64 uint64; #else // COMPILER_MSVC #if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) typedef unsigned long uint64; // NOLINT -#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) +#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) typedef unsigned long long uint64; // NOLINT #endif // __LP64__ #endif // _MSC_VER @@ -39,85 +39,81 @@ typedef unsigned long long uint64; // NOLINT !defined(__aarch64__) #define HAS_SUMSQUAREERROR_NEON static uint32 SumSquareError_NEON(const uint8* src_a, - const uint8* src_b, int count) { + const uint8* src_b, + int count) { volatile uint32 sse; - asm volatile ( - "vmov.u8 q7, #0 \n" - "vmov.u8 q9, #0 \n" - "vmov.u8 q8, #0 \n" - "vmov.u8 q10, #0 \n" + asm volatile( + "vmov.u8 q7, #0 \n" + "vmov.u8 q9, #0 \n" + "vmov.u8 q8, #0 \n" + "vmov.u8 q10, #0 \n" - "1: \n" - "vld1.u8 {q0}, [%0]! \n" - "vld1.u8 {q1}, [%1]! \n" - "vsubl.u8 q2, d0, d2 \n" - "vsubl.u8 q3, d1, d3 \n" - "vmlal.s16 q7, d4, d4 \n" - "vmlal.s16 q8, d6, d6 \n" - "vmlal.s16 q8, d5, d5 \n" - "vmlal.s16 q10, d7, d7 \n" - "subs %2, %2, #16 \n" - "bhi 1b \n" + "1: \n" + "vld1.u8 {q0}, [%0]! \n" + "vld1.u8 {q1}, [%1]! \n" + "vsubl.u8 q2, d0, d2 \n" + "vsubl.u8 q3, d1, d3 \n" + "vmlal.s16 q7, d4, d4 \n" + "vmlal.s16 q8, d6, d6 \n" + "vmlal.s16 q8, d5, d5 \n" + "vmlal.s16 q10, d7, d7 \n" + "subs %2, %2, #16 \n" + "bhi 1b \n" - "vadd.u32 q7, q7, q8 \n" - "vadd.u32 q9, q9, q10 \n" - "vadd.u32 q10, q7, q9 \n" - "vpaddl.u32 q1, q10 \n" - "vadd.u64 d0, d2, d3 \n" - "vmov.32 %3, d0[0] \n" - : "+r"(src_a), - "+r"(src_b), - "+r"(count), - "=r"(sse) - : - : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10"); + "vadd.u32 q7, q7, q8 \n" + "vadd.u32 q9, q9, q10 \n" + "vadd.u32 q10, q7, q9 \n" + "vpaddl.u32 q1, q10 \n" + "vadd.u64 d0, d2, d3 \n" + "vmov.32 %3, d0[0] \n" + : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse) + : + : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10"); return sse; } #elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #define HAS_SUMSQUAREERROR_NEON static uint32 SumSquareError_NEON(const uint8* src_a, - const uint8* src_b, int count) { + const uint8* src_b, + int count) { volatile uint32 sse; - asm volatile ( - "eor v16.16b, v16.16b, v16.16b \n" - "eor v18.16b, v18.16b, v18.16b \n" - "eor v17.16b, v17.16b, v17.16b \n" - "eor v19.16b, v19.16b, v19.16b \n" + asm volatile( + "eor v16.16b, v16.16b, v16.16b \n" + "eor v18.16b, v18.16b, v18.16b \n" + "eor v17.16b, v17.16b, v17.16b \n" + "eor v19.16b, v19.16b, v19.16b \n" - "1: \n" - "ld1 {v0.16b}, [%0], #16 \n" - "ld1 {v1.16b}, [%1], #16 \n" - "subs %w2, %w2, #16 \n" - "usubl v2.8h, v0.8b, v1.8b \n" - "usubl2 v3.8h, v0.16b, v1.16b \n" - "smlal v16.4s, v2.4h, v2.4h \n" - "smlal v17.4s, v3.4h, v3.4h \n" - "smlal2 v18.4s, v2.8h, v2.8h \n" - "smlal2 v19.4s, v3.8h, v3.8h \n" - "b.gt 1b \n" + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" + "ld1 {v1.16b}, [%1], #16 \n" + "subs %w2, %w2, #16 \n" + "usubl v2.8h, v0.8b, v1.8b \n" + "usubl2 v3.8h, v0.16b, v1.16b \n" + "smlal v16.4s, v2.4h, v2.4h \n" + "smlal v17.4s, v3.4h, v3.4h \n" + "smlal2 v18.4s, v2.8h, v2.8h \n" + "smlal2 v19.4s, v3.8h, v3.8h \n" + "b.gt 1b \n" - "add v16.4s, v16.4s, v17.4s \n" - "add v18.4s, v18.4s, v19.4s \n" - "add v19.4s, v16.4s, v18.4s \n" - "addv s0, v19.4s \n" - "fmov %w3, s0 \n" - : "+r"(src_a), - "+r"(src_b), - "+r"(count), - "=r"(sse) - : - : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); + "add v16.4s, v16.4s, v17.4s \n" + "add v18.4s, v18.4s, v19.4s \n" + "add v19.4s, v16.4s, v18.4s \n" + "addv s0, v19.4s \n" + "fmov %w3, s0 \n" + : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse) + : + : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); return sse; } #elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) #define HAS_SUMSQUAREERROR_SSE2 -__declspec(naked) -static uint32 SumSquareError_SSE2(const uint8* /*src_a*/, - const uint8* /*src_b*/, int /*count*/) { +__declspec(naked) static uint32 SumSquareError_SSE2(const uint8* /*src_a*/, + const uint8* /*src_b*/, + int /*count*/) { __asm { - mov eax, [esp + 4] // src_a - mov edx, [esp + 8] // src_b - mov ecx, [esp + 12] // count + mov eax, [esp + 4] // src_a + mov edx, [esp + 8] // src_b + mov ecx, [esp + 12] // count pxor xmm0, xmm0 pxor xmm5, xmm5 sub edx, eax @@ -151,47 +147,49 @@ static uint32 SumSquareError_SSE2(const uint8* /*src_a*/, #elif !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) #define HAS_SUMSQUAREERROR_SSE2 static uint32 SumSquareError_SSE2(const uint8* src_a, - const uint8* src_b, int count) { + const uint8* src_b, + int count) { uint32 sse; - asm volatile ( // NOLINT - "pxor %%xmm0,%%xmm0 \n" - "pxor %%xmm5,%%xmm5 \n" - "sub %0,%1 \n" + asm volatile( // NOLINT + "pxor %%xmm0,%%xmm0 \n" + "pxor %%xmm5,%%xmm5 \n" + "sub %0,%1 \n" - "1: \n" - "movdqu (%0),%%xmm1 \n" - "movdqu (%0,%1,1),%%xmm2 \n" - "lea 0x10(%0),%0 \n" - "movdqu %%xmm1,%%xmm3 \n" - "psubusb %%xmm2,%%xmm1 \n" - "psubusb %%xmm3,%%xmm2 \n" - "por %%xmm2,%%xmm1 \n" - "movdqu %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "punpckhbw %%xmm5,%%xmm2 \n" - "pmaddwd %%xmm1,%%xmm1 \n" - "pmaddwd %%xmm2,%%xmm2 \n" - "paddd %%xmm1,%%xmm0 \n" - "paddd %%xmm2,%%xmm0 \n" - "sub $0x10,%2 \n" - "ja 1b \n" + "1: \n" + "movdqu (%0),%%xmm1 \n" + "movdqu (%0,%1,1),%%xmm2 \n" + "lea 0x10(%0),%0 \n" + "movdqu %%xmm1,%%xmm3 \n" + "psubusb %%xmm2,%%xmm1 \n" + "psubusb %%xmm3,%%xmm2 \n" + "por %%xmm2,%%xmm1 \n" + "movdqu %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpckhbw %%xmm5,%%xmm2 \n" + "pmaddwd %%xmm1,%%xmm1 \n" + "pmaddwd %%xmm2,%%xmm2 \n" + "paddd %%xmm1,%%xmm0 \n" + "paddd %%xmm2,%%xmm0 \n" + "sub $0x10,%2 \n" + "ja 1b \n" - "pshufd $0xee,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "pshufd $0x1,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "movd %%xmm0,%3 \n" + "pshufd $0xee,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "pshufd $0x1,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "movd %%xmm0,%3 \n" - : "+r"(src_a), // %0 - "+r"(src_b), // %1 - "+r"(count), // %2 - "=g"(sse) // %3 - : - : "memory", "cc" + : "+r"(src_a), // %0 + "+r"(src_b), // %1 + "+r"(count), // %2 + "=g"(sse) // %3 + : + : "memory", "cc" #if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" + , + "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" #endif - ); // NOLINT + ); // NOLINT return sse; } #endif // LIBYUV_DISABLE_X86 etc @@ -199,20 +197,22 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, #if defined(HAS_SUMSQUAREERROR_SSE2) #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__) static __inline void __cpuid(int cpu_info[4], int info_type) { - asm volatile ( // NOLINT - "mov %%ebx, %%edi \n" - "cpuid \n" - "xchg %%edi, %%ebx \n" - : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) - : "a"(info_type)); + asm volatile( // NOLINT + "mov %%ebx, %%edi \n" + "cpuid \n" + "xchg %%edi, %%ebx \n" + : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), + "=d"(cpu_info[3]) + : "a"(info_type)); } // For gcc/clang but not clangcl. -#elif (defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER) +#elif !defined(_MSC_VER) && (defined(__i386__) || defined(__x86_64__)) static __inline void __cpuid(int cpu_info[4], int info_type) { - asm volatile ( // NOLINT - "cpuid \n" - : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) - : "a"(info_type)); + asm volatile( // NOLINT + "cpuid \n" + : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), + "=d"(cpu_info[3]) + : "a"(info_type)); } #endif @@ -229,7 +229,8 @@ static int CpuHasSSE2() { #endif // HAS_SUMSQUAREERROR_SSE2 static uint32 SumSquareError_C(const uint8* src_a, - const uint8* src_b, int count) { + const uint8* src_b, + int count) { uint32 sse = 0u; for (int x = 0; x < count; ++x) { int diff = src_a[x] - src_b[x]; @@ -239,9 +240,10 @@ static uint32 SumSquareError_C(const uint8* src_a, } double ComputeSumSquareError(const uint8* src_a, - const uint8* src_b, int count) { - uint32 (*SumSquareError)(const uint8* src_a, - const uint8* src_b, int count) = SumSquareError_C; + const uint8* src_b, + int count) { + uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = + SumSquareError_C; #if defined(HAS_SUMSQUAREERROR_NEON) SumSquareError = SumSquareError_NEON; #endif @@ -253,7 +255,7 @@ double ComputeSumSquareError(const uint8* src_a, const int kBlockSize = 1 << 15; uint64 sse = 0; #ifdef _OPENMP -#pragma omp parallel for reduction(+: sse) +#pragma omp parallel for reduction(+ : sse) #endif for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { sse += SumSquareError(src_a + i, src_b + i, kBlockSize); diff --git a/util/psnr_main.cc b/util/psnr_main.cc index 0518ab84..01e87774 100644 --- a/util/psnr_main.cc +++ b/util/psnr_main.cc @@ -71,8 +71,8 @@ bool ExtractResolutionFromFilename(const char* name, // Isolate the .width_height. section of the filename by searching for a // dot or underscore followed by a digit. for (int i = 0; name[i]; ++i) { - if ((name[i] == '.' || name[i] == '_') && - name[i + 1] >= '0' && name[i + 1] <= '9') { + if ((name[i] == '.' || name[i] == '_') && name[i + 1] >= '0' && + name[i + 1] <= '9') { int n = sscanf(name + i + 1, "%dx%d", width_ptr, height_ptr); // NOLINT if (2 == n) { return true; @@ -88,7 +88,7 @@ bool ExtractResolutionFromFilename(const char* name, return false; } fseek(file_org, 0, SEEK_END); - size_t total_size = ftell(file_org); + size_t total_size = ftell(file_org); fseek(file_org, 0, SEEK_SET); uint8* const ch_org = new uint8[total_size]; memset(ch_org, 0, total_size); @@ -109,8 +109,10 @@ bool ExtractResolutionFromFilename(const char* name, // This can be useful when comparing codecs that are inconsistant about Y uint8 ScaleY(uint8 y) { int ny = (y - 16) * 256 / 224; - if (ny < 0) ny = 0; - if (ny > 255) ny = 255; + if (ny < 0) + ny = 0; + if (ny > 255) + ny = 255; return static_cast<uint8>(ny); } @@ -119,16 +121,18 @@ double GetMSE(double sse, double size) { return sse / size; } -void PrintHelp(const char * program) { +void PrintHelp(const char* program) { printf("%s [-options] org_seq rec_seq [rec_seq2.. etc]\n", program); #ifdef HAVE_JPEG printf("jpeg or raw YUV 420 supported.\n"); #endif printf("options:\n"); - printf(" -s <width> <height> .... specify YUV size, mandatory if none of the " - "sequences have the\n"); - printf(" resolution embedded in their filename (ie. " - "name.1920x800_24Hz_P420.yuv)\n"); + printf( + " -s <width> <height> .... specify YUV size, mandatory if none of the " + "sequences have the\n"); + printf( + " resolution embedded in their filename (ie. " + "name.1920x800_24Hz_P420.yuv)\n"); printf(" -psnr .................. compute PSNR (default)\n"); printf(" -ssim .................. compute SSIM\n"); printf(" -mse ................... compute MSE\n"); @@ -146,7 +150,8 @@ void PrintHelp(const char * program) { } void ParseOptions(int argc, const char* argv[]) { - if (argc <= 1) PrintHelp(argv[0]); + if (argc <= 1) + PrintHelp(argv[0]); for (int c = 1; c < argc; ++c) { if (!strcmp(argv[c], "-v")) { verbose = true; @@ -168,16 +173,16 @@ void ParseOptions(int argc, const char* argv[]) { } else if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) { PrintHelp(argv[0]); } else if (!strcmp(argv[c], "-s") && c + 2 < argc) { - image_width = atoi(argv[++c]); // NOLINT - image_height = atoi(argv[++c]); // NOLINT + image_width = atoi(argv[++c]); // NOLINT + image_height = atoi(argv[++c]); // NOLINT } else if (!strcmp(argv[c], "-skip") && c + 2 < argc) { - num_skip_org = atoi(argv[++c]); // NOLINT - num_skip_rec = atoi(argv[++c]); // NOLINT + num_skip_org = atoi(argv[++c]); // NOLINT + num_skip_rec = atoi(argv[++c]); // NOLINT } else if (!strcmp(argv[c], "-frames") && c + 1 < argc) { - num_frames = atoi(argv[++c]); // NOLINT + num_frames = atoi(argv[++c]); // NOLINT #ifdef _OPENMP } else if (!strcmp(argv[c], "-t") && c + 1 < argc) { - num_threads = atoi(argv[++c]); // NOLINT + num_threads = atoi(argv[++c]); // NOLINT #endif } else if (argv[c][0] == '-') { fprintf(stderr, "Unknown option. %s\n", argv[c]); @@ -206,11 +211,9 @@ void ParseOptions(int argc, const char* argv[]) { int org_width, org_height; int rec_width, rec_height; bool org_res_avail = ExtractResolutionFromFilename(argv[fileindex_org], - &org_width, - &org_height); + &org_width, &org_height); bool rec_res_avail = ExtractResolutionFromFilename(argv[fileindex_rec], - &rec_width, - &rec_height); + &rec_width, &rec_height); if (org_res_avail) { if (rec_res_avail) { if ((org_width == rec_width) && (org_height == rec_height)) { @@ -234,11 +237,15 @@ void ParseOptions(int argc, const char* argv[]) { } } -bool UpdateMetrics(uint8* ch_org, uint8* ch_rec, - const int y_size, const int uv_size, const size_t total_size, +bool UpdateMetrics(uint8* ch_org, + uint8* ch_rec, + const int y_size, + const int uv_size, + const size_t total_size, int number_of_frames, metric* cur_distortion_psnr, - metric* distorted_frame, bool do_psnr) { + metric* distorted_frame, + bool do_psnr) { const int uv_offset = (do_swap_uv ? uv_size : 0); const uint8* const u_org = ch_org + y_size + uv_offset; const uint8* const u_rec = ch_rec + y_size; @@ -247,11 +254,11 @@ bool UpdateMetrics(uint8* ch_org, uint8* ch_rec, if (do_psnr) { #ifdef HAVE_JPEG double y_err = static_cast<double>( - libyuv::ComputeSumSquareError(ch_org, ch_rec, y_size)); + libyuv::ComputeSumSquareError(ch_org, ch_rec, y_size)); double u_err = static_cast<double>( - libyuv::ComputeSumSquareError(u_org, u_rec, uv_size)); + libyuv::ComputeSumSquareError(u_org, u_rec, uv_size)); double v_err = static_cast<double>( - libyuv::ComputeSumSquareError(v_org, v_rec, uv_size)); + libyuv::ComputeSumSquareError(v_org, v_rec, uv_size)); #else double y_err = ComputeSumSquareError(ch_org, ch_rec, y_size); double u_err = ComputeSumSquareError(u_org, u_rec, uv_size); @@ -265,17 +272,17 @@ bool UpdateMetrics(uint8* ch_org, uint8* ch_rec, distorted_frame->y = ComputePSNR(y_err, static_cast<double>(y_size)); distorted_frame->u = ComputePSNR(u_err, static_cast<double>(uv_size)); distorted_frame->v = ComputePSNR(v_err, static_cast<double>(uv_size)); - distorted_frame->all = ComputePSNR(total_err, - static_cast<double>(total_size)); + distorted_frame->all = + ComputePSNR(total_err, static_cast<double>(total_size)); } else { distorted_frame->y = CalcSSIM(ch_org, ch_rec, image_width, image_height); - distorted_frame->u = CalcSSIM(u_org, u_rec, (image_width + 1) / 2, - (image_height + 1) / 2); - distorted_frame->v = CalcSSIM(v_org, v_rec, (image_width + 1) / 2, - (image_height + 1) / 2); + distorted_frame->u = + CalcSSIM(u_org, u_rec, (image_width + 1) / 2, (image_height + 1) / 2); + distorted_frame->v = + CalcSSIM(v_org, v_rec, (image_width + 1) / 2, (image_height + 1) / 2); distorted_frame->all = - (distorted_frame->y + distorted_frame->u + distorted_frame->v) - / total_size; + (distorted_frame->y + distorted_frame->u + distorted_frame->v) / + total_size; distorted_frame->y /= y_size; distorted_frame->u /= uv_size; distorted_frame->v /= uv_size; @@ -330,8 +337,8 @@ int main(int argc, const char* argv[]) { } // Open all files to compare to - FILE** file_rec = new FILE* [num_rec]; - memset(file_rec, 0, num_rec * sizeof(FILE*)); // NOLINT + FILE** file_rec = new FILE*[num_rec]; + memset(file_rec, 0, num_rec * sizeof(FILE*)); // NOLINT for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { file_rec[cur_rec] = fopen(argv[fileindex_rec + cur_rec], "rb"); if (file_rec[cur_rec] == NULL) { @@ -347,19 +354,18 @@ int main(int argc, const char* argv[]) { const int y_size = image_width * image_height; const int uv_size = ((image_width + 1) / 2) * ((image_height + 1) / 2); - const size_t total_size = y_size + 2 * uv_size; // NOLINT + const size_t total_size = y_size + 2 * uv_size; // NOLINT #if defined(_MSC_VER) - _fseeki64(file_org, - static_cast<__int64>(num_skip_org) * - static_cast<__int64>(total_size), SEEK_SET); + _fseeki64(file_org, static_cast<__int64>(num_skip_org) * + static_cast<__int64>(total_size), + SEEK_SET); #else fseek(file_org, num_skip_org * total_size, SEEK_SET); #endif for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { #if defined(_MSC_VER) - _fseeki64(file_rec[cur_rec], - static_cast<__int64>(num_skip_rec) * - static_cast<__int64>(total_size), + _fseeki64(file_rec[cur_rec], static_cast<__int64>(num_skip_rec) * + static_cast<__int64>(total_size), SEEK_SET); #else fseek(file_rec[cur_rec], num_skip_rec * total_size, SEEK_SET); @@ -420,7 +426,7 @@ int main(int argc, const char* argv[]) { } int number_of_frames; - for (number_of_frames = 0; ; ++number_of_frames) { + for (number_of_frames = 0;; ++number_of_frames) { if (num_frames && number_of_frames >= num_frames) break; @@ -432,17 +438,11 @@ int main(int argc, const char* argv[]) { memcpy(ch_jpeg, ch_org, bytes_org); memset(ch_org, 0, total_size); - if (0 != libyuv::MJPGToI420(ch_jpeg, bytes_org, - ch_org, - image_width, - ch_org + y_size, - (image_width + 1) / 2, + if (0 != libyuv::MJPGToI420(ch_jpeg, bytes_org, ch_org, image_width, + ch_org + y_size, (image_width + 1) / 2, ch_org + y_size + uv_size, - (image_width + 1) / 2, - image_width, - image_height, - image_width, - image_height)) { + (image_width + 1) / 2, image_width, + image_height, image_width, image_height)) { delete[] ch_jpeg; break; } @@ -453,8 +453,8 @@ int main(int argc, const char* argv[]) { } for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { - size_t bytes_rec = fread(ch_rec, sizeof(uint8), - total_size, file_rec[cur_rec]); + size_t bytes_rec = + fread(ch_rec, sizeof(uint8), total_size, file_rec[cur_rec]); if (bytes_rec < total_size) { #ifdef HAVE_JPEG // Try parsing file as a jpeg. @@ -462,17 +462,11 @@ int main(int argc, const char* argv[]) { memcpy(ch_jpeg, ch_rec, bytes_rec); memset(ch_rec, 0, total_size); - if (0 != libyuv::MJPGToI420(ch_jpeg, bytes_rec, - ch_rec, - image_width, - ch_rec + y_size, - (image_width + 1) / 2, + if (0 != libyuv::MJPGToI420(ch_jpeg, bytes_rec, ch_rec, image_width, + ch_rec + y_size, (image_width + 1) / 2, ch_rec + y_size + uv_size, - (image_width + 1) / 2, - image_width, - image_height, - image_width, - image_height)) { + (image_width + 1) / 2, image_width, + image_height, image_width, image_height)) { delete[] ch_jpeg; break; } @@ -488,10 +482,8 @@ int main(int argc, const char* argv[]) { if (do_psnr) { metric distorted_frame; metric* cur_distortion_psnr = &distortion_psnr[cur_rec]; - bool ismin = UpdateMetrics(ch_org, ch_rec, - y_size, uv_size, total_size, - number_of_frames, - cur_distortion_psnr, + bool ismin = UpdateMetrics(ch_org, ch_rec, y_size, uv_size, total_size, + number_of_frames, cur_distortion_psnr, &distorted_frame, true); if (verbose) { printf("\t%10.6f", distorted_frame.y); @@ -504,10 +496,8 @@ int main(int argc, const char* argv[]) { if (do_ssim) { metric distorted_frame; metric* cur_distortion_ssim = &distortion_ssim[cur_rec]; - bool ismin = UpdateMetrics(ch_org, ch_rec, - y_size, uv_size, total_size, - number_of_frames, - cur_distortion_ssim, + bool ismin = UpdateMetrics(ch_org, ch_rec, y_size, uv_size, total_size, + number_of_frames, cur_distortion_ssim, &distorted_frame, false); if (verbose) { printf("\t%10.6f", distorted_frame.y); @@ -543,24 +533,20 @@ int main(int argc, const char* argv[]) { } if (do_psnr) { - const double global_psnr_y = ComputePSNR( - cur_distortion_psnr->global_y, - static_cast<double>(y_size) * number_of_frames); - const double global_psnr_u = ComputePSNR( - cur_distortion_psnr->global_u, - static_cast<double>(uv_size) * number_of_frames); - const double global_psnr_v = ComputePSNR( - cur_distortion_psnr->global_v, - static_cast<double>(uv_size) * number_of_frames); - const double global_psnr_all = ComputePSNR( - cur_distortion_psnr->global_all, - static_cast<double>(total_size) * number_of_frames); - printf("Global:\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", - global_psnr_y, - global_psnr_u, - global_psnr_v, - global_psnr_all, - number_of_frames); + const double global_psnr_y = + ComputePSNR(cur_distortion_psnr->global_y, + static_cast<double>(y_size) * number_of_frames); + const double global_psnr_u = + ComputePSNR(cur_distortion_psnr->global_u, + static_cast<double>(uv_size) * number_of_frames); + const double global_psnr_v = + ComputePSNR(cur_distortion_psnr->global_v, + static_cast<double>(uv_size) * number_of_frames); + const double global_psnr_all = + ComputePSNR(cur_distortion_psnr->global_all, + static_cast<double>(total_size) * number_of_frames); + printf("Global:\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", global_psnr_y, + global_psnr_u, global_psnr_v, global_psnr_all, number_of_frames); if (show_name) { printf("\t%s", argv[fileindex_rec + cur_rec]); } @@ -570,20 +556,14 @@ int main(int argc, const char* argv[]) { if (!quiet) { printf("Avg:"); if (do_psnr) { - printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", - cur_distortion_psnr->y, - cur_distortion_psnr->u, - cur_distortion_psnr->v, - cur_distortion_psnr->all, - number_of_frames); + printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", cur_distortion_psnr->y, + cur_distortion_psnr->u, cur_distortion_psnr->v, + cur_distortion_psnr->all, number_of_frames); } if (do_ssim) { - printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", - cur_distortion_ssim->y, - cur_distortion_ssim->u, - cur_distortion_ssim->v, - cur_distortion_ssim->all, - number_of_frames); + printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", cur_distortion_ssim->y, + cur_distortion_ssim->u, cur_distortion_ssim->v, + cur_distortion_ssim->all, number_of_frames); } if (show_name) { printf("\t%s", argv[fileindex_rec + cur_rec]); @@ -594,19 +574,15 @@ int main(int argc, const char* argv[]) { printf("Min:"); if (do_psnr) { printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", - cur_distortion_psnr->min_y, - cur_distortion_psnr->min_u, - cur_distortion_psnr->min_v, - cur_distortion_psnr->min_all, - cur_distortion_psnr->min_frame); + cur_distortion_psnr->min_y, cur_distortion_psnr->min_u, + cur_distortion_psnr->min_v, cur_distortion_psnr->min_all, + cur_distortion_psnr->min_frame); } if (do_ssim) { printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", - cur_distortion_ssim->min_y, - cur_distortion_ssim->min_u, - cur_distortion_ssim->min_v, - cur_distortion_ssim->min_all, - cur_distortion_ssim->min_frame); + cur_distortion_ssim->min_y, cur_distortion_ssim->min_u, + cur_distortion_ssim->min_v, cur_distortion_ssim->min_all, + cur_distortion_ssim->min_frame); } if (show_name) { printf("\t%s", argv[fileindex_rec + cur_rec]); @@ -615,20 +591,20 @@ int main(int argc, const char* argv[]) { } if (do_mse) { - double global_mse_y = GetMSE(cur_distortion_psnr->global_y, - static_cast<double>(y_size) * number_of_frames); - double global_mse_u = GetMSE(cur_distortion_psnr->global_u, - static_cast<double>(uv_size) * number_of_frames); - double global_mse_v = GetMSE(cur_distortion_psnr->global_v, - static_cast<double>(uv_size) * number_of_frames); - double global_mse_all = GetMSE(cur_distortion_psnr->global_all, - static_cast<double>(total_size) * number_of_frames); - printf("MSE:\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", - global_mse_y, - global_mse_u, - global_mse_v, - global_mse_all, - number_of_frames); + double global_mse_y = + GetMSE(cur_distortion_psnr->global_y, + static_cast<double>(y_size) * number_of_frames); + double global_mse_u = + GetMSE(cur_distortion_psnr->global_u, + static_cast<double>(uv_size) * number_of_frames); + double global_mse_v = + GetMSE(cur_distortion_psnr->global_v, + static_cast<double>(uv_size) * number_of_frames); + double global_mse_all = + GetMSE(cur_distortion_psnr->global_all, + static_cast<double>(total_size) * number_of_frames); + printf("MSE:\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", global_mse_y, + global_mse_u, global_mse_v, global_mse_all, number_of_frames); if (show_name) { printf("\t%s", argv[fileindex_rec + cur_rec]); } diff --git a/util/ssim.cc b/util/ssim.cc index 5a6399b7..43e725d8 100644 --- a/util/ssim.cc +++ b/util/ssim.cc @@ -16,11 +16,11 @@ extern "C" { #endif -typedef unsigned int uint32; // NOLINT -typedef unsigned short uint16; // NOLINT +typedef unsigned int uint32; // NOLINT +typedef unsigned short uint16; // NOLINT #if !defined(LIBYUV_DISABLE_X86) && !defined(__SSE2__) && \ - (defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2))) + (defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2))) #define __SSE2__ #endif #if !defined(LIBYUV_DISABLE_X86) && defined(__SSE2__) @@ -38,22 +38,29 @@ enum { KERNEL = 3, KERNEL_SIZE = 2 * KERNEL + 1 }; // The maximum value (11 x 11) must be less than 128 to avoid sign // problems during the calls to _mm_mullo_epi16(). static const int K[KERNEL_SIZE] = { - 1, 3, 7, 11, 7, 3, 1 // ~11 * exp(-0.3 * i * i) + 1, 3, 7, 11, 7, 3, 1 // ~11 * exp(-0.3 * i * i) }; static const double kiW[KERNEL + 1 + 1] = { - 1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j] - 1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j] - 1. / 1056., // 1 / sum(i:0..5, j..6) K[i]*K[j] - 1. / 957., // 1 / sum(i:0..4, j..6) K[i]*K[j] - 1. / 726., // 1 / sum(i:0..3, j..6) K[i]*K[j] + 1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j] + 1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j] + 1. / 1056., // 1 / sum(i:0..5, j..6) K[i]*K[j] + 1. / 957., // 1 / sum(i:0..4, j..6) K[i]*K[j] + 1. / 726., // 1 / sum(i:0..3, j..6) K[i]*K[j] }; #if !defined(LIBYUV_DISABLE_X86) && defined(__SSE2__) -#define PWEIGHT(A, B) static_cast<uint16>(K[(A)] * K[(B)]) // weight product -#define MAKE_WEIGHT(L) \ - { { { PWEIGHT(L, 0), PWEIGHT(L, 1), PWEIGHT(L, 2), PWEIGHT(L, 3), \ - PWEIGHT(L, 4), PWEIGHT(L, 5), PWEIGHT(L, 6), 0 } } } +#define PWEIGHT(A, B) static_cast<uint16>(K[(A)] * K[(B)]) // weight product +#define MAKE_WEIGHT(L) \ + { \ + { \ + { \ + PWEIGHT(L, 0) \ + , PWEIGHT(L, 1), PWEIGHT(L, 2), PWEIGHT(L, 3), PWEIGHT(L, 4), \ + PWEIGHT(L, 5), PWEIGHT(L, 6), 0 \ + } \ + } \ + } // We need this union trick to be able to initialize constant static __m128i // values. We can't call _mm_set_epi16() for static compile-time initialization. @@ -62,32 +69,36 @@ static const struct { uint16 i16_[8]; __m128i m_; } values_; -} W0 = MAKE_WEIGHT(0), - W1 = MAKE_WEIGHT(1), - W2 = MAKE_WEIGHT(2), +} W0 = MAKE_WEIGHT(0), W1 = MAKE_WEIGHT(1), W2 = MAKE_WEIGHT(2), W3 = MAKE_WEIGHT(3); - // ... the rest is symmetric. +// ... the rest is symmetric. #undef MAKE_WEIGHT #undef PWEIGHT #endif // Common final expression for SSIM, once the weighted sums are known. -static double FinalizeSSIM(double iw, double xm, double ym, - double xxm, double xym, double yym) { +static double FinalizeSSIM(double iw, + double xm, + double ym, + double xxm, + double xym, + double yym) { const double iwx = xm * iw; const double iwy = ym * iw; double sxx = xxm * iw - iwx * iwx; double syy = yym * iw - iwy * iwy; // small errors are possible, due to rounding. Clamp to zero. - if (sxx < 0.) sxx = 0.; - if (syy < 0.) syy = 0.; + if (sxx < 0.) + sxx = 0.; + if (syy < 0.) + syy = 0.; const double sxsy = sqrt(sxx * syy); const double sxy = xym * iw - iwx * iwy; static const double C11 = (0.01 * 0.01) * (255 * 255); static const double C22 = (0.03 * 0.03) * (255 * 255); static const double C33 = (0.015 * 0.015) * (255 * 255); const double l = (2. * iwx * iwy + C11) / (iwx * iwx + iwy * iwy + C11); - const double c = (2. * sxsy + C22) / (sxx + syy + C22); + const double c = (2. * sxsy + C22) / (sxx + syy + C22); const double s = (sxy + C33) / (sxsy + C33); return l * c * s; } @@ -98,15 +109,21 @@ static double FinalizeSSIM(double iw, double xm, double ym, // Note: worst case of accumulation is a weight of 33 = 11 + 2 * (7 + 3 + 1) // with a diff of 255, squared. The maximum error is thus 0x4388241, // which fits into 32 bits integers. -double GetSSIM(const uint8 *org, const uint8 *rec, - int xo, int yo, int W, int H, int stride) { +double GetSSIM(const uint8* org, + const uint8* rec, + int xo, + int yo, + int W, + int H, + int stride) { uint32 ws = 0, xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0; org += (yo - KERNEL) * stride; org += (xo - KERNEL); rec += (yo - KERNEL) * stride; rec += (xo - KERNEL); for (int y_ = 0; y_ < KERNEL_SIZE; ++y_, org += stride, rec += stride) { - if (((yo - KERNEL + y_) < 0) || ((yo - KERNEL + y_) >= H)) continue; + if (((yo - KERNEL + y_) < 0) || ((yo - KERNEL + y_) >= H)) + continue; const int Wy = K[y_]; for (int x_ = 0; x_ < KERNEL_SIZE; ++x_) { const int Wxy = Wy * K[x_]; @@ -114,8 +131,8 @@ double GetSSIM(const uint8 *org, const uint8 *rec, const int org_x = org[x_]; const int rec_x = rec[x_]; ws += Wxy; - xm += Wxy * org_x; - ym += Wxy * rec_x; + xm += Wxy * org_x; + ym += Wxy * rec_x; xxm += Wxy * org_x * org_x; xym += Wxy * org_x * rec_x; yym += Wxy * rec_x * rec_x; @@ -125,8 +142,11 @@ double GetSSIM(const uint8 *org, const uint8 *rec, return FinalizeSSIM(1. / ws, xm, ym, xxm, xym, yym); } -double GetSSIMFullKernel(const uint8 *org, const uint8 *rec, - int xo, int yo, int stride, +double GetSSIMFullKernel(const uint8* org, + const uint8* rec, + int xo, + int yo, + int stride, double area_weight) { uint32 xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0; @@ -161,8 +181,8 @@ double GetSSIMFullKernel(const uint8 *org, const uint8 *rec, const int ll2 = rec[dy2 - x]; const int lr2 = rec[dy2 + x]; - xm += Wxy * (ul1 + ur1 + ll1 + lr1); - ym += Wxy * (ul2 + ur2 + ll2 + lr2); + xm += Wxy * (ul1 + ur1 + ll1 + lr1); + ym += Wxy * (ul2 + ur2 + ll2 + lr2); xxm += Wxy * (ul1 * ul1 + ur1 * ur1 + ll1 * ll1 + lr1 * lr1); xym += Wxy * (ul1 * ul2 + ur1 * ur2 + ll1 * ll2 + lr1 * lr2); yym += Wxy * (ul2 * ul2 + ur2 * ur2 + ll2 * ll2 + lr2 * lr2); @@ -189,8 +209,8 @@ double GetSSIMFullKernel(const uint8 *org, const uint8 *rec, const int l2 = rec[-y]; const int r2 = rec[y]; - xm += Wxy * (u1 + d1 + l1 + r1); - ym += Wxy * (u2 + d2 + l2 + r2); + xm += Wxy * (u1 + d1 + l1 + r1); + ym += Wxy * (u2 + d2 + l2 + r2); xxm += Wxy * (u1 * u1 + d1 * d1 + l1 * l1 + r1 * r1); xym += Wxy * (u1 * u2 + d1 * d2 + l1 * l2 + r1 * r2); yym += Wxy * (u2 * u2 + d2 * d2 + l2 * l2 + r2 * r2); @@ -201,13 +221,13 @@ double GetSSIMFullKernel(const uint8 *org, const uint8 *rec, const int s1 = org[0]; const int s2 = rec[0]; - xm += Wxy * s1; - ym += Wxy * s2; + xm += Wxy * s1; + ym += Wxy * s2; xxm += Wxy * s1 * s1; xym += Wxy * s1 * s2; yym += Wxy * s2 * s2; -#else // __SSE2__ +#else // __SSE2__ org += (yo - KERNEL) * stride + (xo - KERNEL); rec += (yo - KERNEL) * stride + (xo - KERNEL); @@ -221,29 +241,31 @@ double GetSSIMFullKernel(const uint8 *org, const uint8 *rec, // Read 8 pixels at line #L, and convert to 16bit, perform weighting // and acccumulate. -#define LOAD_LINE_PAIR(L, WEIGHT) do { \ - const __m128i v0 = \ - _mm_loadl_epi64(reinterpret_cast<const __m128i*>(org + (L) * stride)); \ - const __m128i v1 = \ - _mm_loadl_epi64(reinterpret_cast<const __m128i*>(rec + (L) * stride)); \ - const __m128i w0 = _mm_unpacklo_epi8(v0, zero); \ - const __m128i w1 = _mm_unpacklo_epi8(v1, zero); \ - const __m128i ww0 = _mm_mullo_epi16(w0, (WEIGHT).values_.m_); \ - const __m128i ww1 = _mm_mullo_epi16(w1, (WEIGHT).values_.m_); \ - x = _mm_add_epi32(x, _mm_unpacklo_epi16(ww0, zero)); \ - y = _mm_add_epi32(y, _mm_unpacklo_epi16(ww1, zero)); \ - x = _mm_add_epi32(x, _mm_unpackhi_epi16(ww0, zero)); \ - y = _mm_add_epi32(y, _mm_unpackhi_epi16(ww1, zero)); \ - xx = _mm_add_epi32(xx, _mm_madd_epi16(ww0, w0)); \ - xy = _mm_add_epi32(xy, _mm_madd_epi16(ww0, w1)); \ - yy = _mm_add_epi32(yy, _mm_madd_epi16(ww1, w1)); \ -} while (0) - -#define ADD_AND_STORE_FOUR_EPI32(M, OUT) do { \ - uint32 tmp[4]; \ - _mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), (M)); \ - (OUT) = tmp[3] + tmp[2] + tmp[1] + tmp[0]; \ -} while (0) +#define LOAD_LINE_PAIR(L, WEIGHT) \ + do { \ + const __m128i v0 = \ + _mm_loadl_epi64(reinterpret_cast<const __m128i*>(org + (L)*stride)); \ + const __m128i v1 = \ + _mm_loadl_epi64(reinterpret_cast<const __m128i*>(rec + (L)*stride)); \ + const __m128i w0 = _mm_unpacklo_epi8(v0, zero); \ + const __m128i w1 = _mm_unpacklo_epi8(v1, zero); \ + const __m128i ww0 = _mm_mullo_epi16(w0, (WEIGHT).values_.m_); \ + const __m128i ww1 = _mm_mullo_epi16(w1, (WEIGHT).values_.m_); \ + x = _mm_add_epi32(x, _mm_unpacklo_epi16(ww0, zero)); \ + y = _mm_add_epi32(y, _mm_unpacklo_epi16(ww1, zero)); \ + x = _mm_add_epi32(x, _mm_unpackhi_epi16(ww0, zero)); \ + y = _mm_add_epi32(y, _mm_unpackhi_epi16(ww1, zero)); \ + xx = _mm_add_epi32(xx, _mm_madd_epi16(ww0, w0)); \ + xy = _mm_add_epi32(xy, _mm_madd_epi16(ww0, w1)); \ + yy = _mm_add_epi32(yy, _mm_madd_epi16(ww1, w1)); \ + } while (0) + +#define ADD_AND_STORE_FOUR_EPI32(M, OUT) \ + do { \ + uint32 tmp[4]; \ + _mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), (M)); \ + (OUT) = tmp[3] + tmp[2] + tmp[1] + tmp[0]; \ + } while (0) LOAD_LINE_PAIR(0, W0); LOAD_LINE_PAIR(1, W1); @@ -266,10 +288,14 @@ double GetSSIMFullKernel(const uint8 *org, const uint8 *rec, return FinalizeSSIM(area_weight, xm, ym, xxm, xym, yym); } -static int start_max(int x, int y) { return (x > y) ? x : y; } +static int start_max(int x, int y) { + return (x > y) ? x : y; +} -double CalcSSIM(const uint8 *org, const uint8 *rec, - const int image_width, const int image_height) { +double CalcSSIM(const uint8* org, + const uint8* rec, + const int image_width, + const int image_height) { double SSIM = 0.; const int KERNEL_Y = (image_height < KERNEL) ? image_height : KERNEL; const int KERNEL_X = (image_width < KERNEL) ? image_width : KERNEL; @@ -284,7 +310,7 @@ double CalcSSIM(const uint8 *org, const uint8 *rec, } #ifdef _OPENMP - #pragma omp parallel for reduction(+: SSIM) +#pragma omp parallel for reduction(+ : SSIM) #endif for (int j = KERNEL_Y; j < image_height - KERNEL_Y; ++j) { for (int i = 0; i < KERNEL_X; ++i) { @@ -302,8 +328,8 @@ double CalcSSIM(const uint8 *org, const uint8 *rec, // NOTE: we could use similar method for the left-most pixels too. const int kScratchWidth = 8; const int kScratchStride = kScratchWidth + KERNEL + 1; - uint8 scratch_org[KERNEL_SIZE * kScratchStride] = { 0 }; - uint8 scratch_rec[KERNEL_SIZE * kScratchStride] = { 0 }; + uint8 scratch_org[KERNEL_SIZE * kScratchStride] = {0}; + uint8 scratch_rec[KERNEL_SIZE * kScratchStride] = {0}; for (int k = 0; k < KERNEL_SIZE; ++k) { const int offset = @@ -311,9 +337,9 @@ double CalcSSIM(const uint8 *org, const uint8 *rec, memcpy(scratch_org + k * kScratchStride, org + offset, kScratchWidth); memcpy(scratch_rec + k * kScratchStride, rec + offset, kScratchWidth); } - for (int k = 0; k <= KERNEL_X + 1; ++k) { - SSIM += GetSSIMFullKernel(scratch_org, scratch_rec, - KERNEL + k, KERNEL, kScratchStride, kiW[k]); + for (int k = 0; k <= KERNEL_X + 1; ++k) { + SSIM += GetSSIMFullKernel(scratch_org, scratch_rec, KERNEL + k, KERNEL, + kScratchStride, kiW[k]); } } } @@ -333,4 +359,3 @@ double CalcLSSIM(double ssim) { #ifdef __cplusplus } // extern "C" #endif - diff --git a/util/ssim.h b/util/ssim.h index fb2a0e95..4647f45d 100644 --- a/util/ssim.h +++ b/util/ssim.h @@ -24,8 +24,10 @@ typedef unsigned char uint8; #define UINT8_TYPE_DEFINED #endif -double CalcSSIM(const uint8* org, const uint8* rec, - const int image_width, const int image_height); +double CalcSSIM(const uint8* org, + const uint8* rec, + const int image_width, + const int image_height); double CalcLSSIM(double ssim); |