diff options
author | Frank Barchard <fbarchard@google.com> | 2021-02-05 16:14:25 -0800 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2021-02-06 00:26:55 +0000 |
commit | 942c5084482d8592883be66151e0dea502f4cbc0 (patch) | |
tree | e02d37c46fb6e9d06d5165fc86ce483b70928524 | |
parent | 60d37a064bc0307017537ed3091b1b0204213855 (diff) | |
download | libyuv-942c5084482d8592883be66151e0dea502f4cbc0.tar.gz |
BT.2020 Full Range yuvconstants
new color util to compute constants needed based on white point.
[ RUN ] LibYUVColorTest.TestFullYUVV
hist -2 -1 0 1 2
red 0 1627136 13670144 1479936 0
green 319285 3456836 9243059 3440771 317265
blue 0 1561088 14202112 1014016 0
Bug: libyuv:877, b/178283356
Change-Id: If432ebfab76b01302fdb416a153c4f26ca0832d6
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2678859
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
-rw-r--r-- | README.chromium | 2 | ||||
-rw-r--r-- | include/libyuv/convert.h | 2 | ||||
-rw-r--r-- | include/libyuv/convert_argb.h | 24 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/convert_from.cc | 2 | ||||
-rw-r--r-- | source/row_common.cc | 312 | ||||
-rw-r--r-- | source/scale.cc | 18 | ||||
-rw-r--r-- | source/scale_any.cc | 2 | ||||
-rw-r--r-- | source/scale_common.cc | 6 | ||||
-rw-r--r-- | source/scale_gcc.cc | 661 | ||||
-rw-r--r-- | source/scale_neon.cc | 6 | ||||
-rw-r--r-- | source/scale_neon64.cc | 18 | ||||
-rw-r--r-- | unit_test/color_test.cc | 131 | ||||
-rw-r--r-- | unit_test/convert_test.cc | 56 | ||||
-rw-r--r-- | util/color.cc | 118 |
15 files changed, 761 insertions, 599 deletions
diff --git a/README.chromium b/README.chromium index d27d1aa3..bdd05f1f 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1774 +Version: 1775 License: BSD License File: LICENSE diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index 50ffc2f0..137b30f1 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -213,7 +213,7 @@ int I010ToI410(const uint16_t* src_y, // Convert I012 to I412 #define I012ToI412 I010ToI410 -// Convert I212 to I412 +// Convert I210 to I410 LIBYUV_API int I210ToI410(const uint16_t* src_y, int src_stride_y, diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index d9cc5bd2..cf7f923e 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -21,18 +21,20 @@ extern "C" { #endif // Conversion matrix for YUV to RGB -LIBYUV_API extern const struct YuvConstants kYuvI601Constants; // BT.601 -LIBYUV_API extern const struct YuvConstants kYuvJPEGConstants; // JPeg -LIBYUV_API extern const struct YuvConstants kYuvF709Constants; // BT.709 full -LIBYUV_API extern const struct YuvConstants kYuvH709Constants; // BT.709 -LIBYUV_API extern const struct YuvConstants kYuv2020Constants; // BT.2020 +LIBYUV_API extern const struct YuvConstants kYuvI601Constants; // BT.601 +LIBYUV_API extern const struct YuvConstants kYuvJPEGConstants; // JPeg +LIBYUV_API extern const struct YuvConstants kYuvF709Constants; // BT.709 full +LIBYUV_API extern const struct YuvConstants kYuvH709Constants; // BT.709 +LIBYUV_API extern const struct YuvConstants kYuv2020Constants; // BT.2020 +LIBYUV_API extern const struct YuvConstants kYuvV2020Constants; // BT.2020 full // Conversion matrix for YVU to BGR -LIBYUV_API extern const struct YuvConstants kYvuI601Constants; // BT.601 -LIBYUV_API extern const struct YuvConstants kYvuJPEGConstants; // JPeg -LIBYUV_API extern const struct YuvConstants kYvuF709Constants; // BT.709 full -LIBYUV_API extern const struct YuvConstants kYvuH709Constants; // BT.709 -LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020 +LIBYUV_API extern const struct YuvConstants kYvuI601Constants; // BT.601 +LIBYUV_API extern const struct YuvConstants kYvuJPEGConstants; // JPeg +LIBYUV_API extern const struct YuvConstants kYvuF709Constants; // BT.709 full +LIBYUV_API extern const struct YuvConstants kYvuH709Constants; // BT.709 +LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020 +LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full // Macros for end swapped destination Matrix conversions. // Swap UV and pass mirrored kYvuJPEGConstants matrix. @@ -42,6 +44,8 @@ LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020 #define kYuvF709ConstantsVU kYvuF709Constants #define kYuvH709ConstantsVU kYvuH709Constants #define kYuv2020ConstantsVU kYvu2020Constants +#define kYuvV2020ConstantsVU kYvuV2020Constants + #define NV12ToABGRMatrix(a, b, c, d, e, f, g, h, i) \ NV21ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i) #define NV21ToABGRMatrix(a, b, c, d, e, f, g, h, i) \ diff --git a/include/libyuv/version.h b/include/libyuv/version.h index ff3c9dec..a57dfa53 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1774 +#define LIBYUV_VERSION 1775 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert_from.cc b/source/convert_from.cc index 6524f969..591e2782 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -30,6 +30,8 @@ static __inline int Abs(int v) { } // I420 To any I4xx YUV format with mirroring. +// TODO(fbarchard): Consider kFilterNone for Y, or CopyPlane + static int I420ToI4xx(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, diff --git a/source/row_common.cc b/source/row_common.cc index c3942cf7..eb889c83 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1330,234 +1330,218 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { // Macros to create SIMD specific yuv to rgb conversion constants. #if defined(__aarch64__) -#define MAKEYUVCONSTANTS(name, YG, YGB, UB, UG, VG, VR, BB, BG, BR) \ +#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \ + const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \ + {UB, VR, UB, VR, UB, VR, UB, VR}, {UB, VR, UB, VR, UB, VR, UB, VR}, \ + {UG, VG, UG, VG, UG, VG, UG, VG}, {UG, VG, UG, VG, UG, VG, UG, VG}, \ + {BB, BG, BR, YB, 0, 0, 0, 0}, {0x0101 * YG, YG, 0, 0}}; \ + const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \ + {VR, UB, VR, UB, VR, UB, VR, UB}, {VR, UB, VR, UB, VR, UB, VR, UB}, \ + {VG, UG, VG, UG, VG, UG, VG, UG}, {VG, UG, VG, UG, VG, UG, VG, UG}, \ + {BR, BG, BB, YB, 0, 0, 0, 0}, {0x0101 * YG, YG, 0, 0}}; + +#elif defined(__arm__) +#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \ const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \ - {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, \ - {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, \ - {UG, VG, UG, VG, UG, VG, UG, VG}, \ - {UG, VG, UG, VG, UG, VG, UG, VG}, \ - {BB, BG, BR, YGB, 0, 0, 0, 0}, \ + {UB, UB, UB, UB, VR, VR, VR, VR, 0, 0, 0, 0, 0, 0, 0, 0}, \ + {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, \ + {BB, BG, BR, YB, 0, 0, 0, 0}, \ {0x0101 * YG, YG, 0, 0}}; \ const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \ - {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, \ - {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, \ - {VG, UG, VG, UG, VG, UG, VG, UG}, \ - {VG, UG, VG, UG, VG, UG, VG, UG}, \ - {BR, BG, BB, YGB, 0, 0, 0, 0}, \ - {0x0101 * YG, YG, 0, 0}}; - -#elif defined(__arm__) -#define MAKEYUVCONSTANTS(name, YG, YGB, UB, UG, VG, VR, BB, BG, BR) \ - const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \ - {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, \ - {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, \ - {BB, BG, BR, YGB, 0, 0, 0, 0}, \ - {0x0101 * YG, YG, 0, 0}}; \ - const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \ - {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, \ - {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, \ - {BR, BG, BB, YGB, 0, 0, 0, 0}, \ + {VR, VR, VR, VR, UB, UB, UB, UB, 0, 0, 0, 0, 0, 0, 0, 0}, \ + {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, \ + {BR, BG, BB, YB, 0, 0, 0, 0}, \ {0x0101 * YG, YG, 0, 0}}; #else -#define MAKEYUVCONSTANTS(name, YG, YGB, UB, UG, VG, VR, BB, BG, BR) \ - const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \ - {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, \ - UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, \ - {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \ - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \ - {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, \ - 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, \ - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \ - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \ - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \ - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \ - {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, \ - YGB, YGB}}; \ - const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \ - {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, \ - VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, \ - {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, \ - VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, \ - {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, \ - 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, \ - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \ - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \ - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \ - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \ - {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, \ - YGB, YGB}}; +#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \ + const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \ + {-UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, \ + -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0}, \ + {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \ + UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \ + {0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, \ + 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR}, \ + {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \ + {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \ + {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \ + {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \ + {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}; \ + const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \ + {-VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, \ + -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0}, \ + {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, \ + VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, \ + {0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, \ + 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB}, \ + {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \ + {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \ + {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \ + {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \ + {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}; #endif // TODO(fbarchard): Generate SIMD structures from float matrix. -// BT.601 YUV to RGB reference -// R = (Y - 16) * 1.164 - V * -1.596 -// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 -// B = (Y - 16) * 1.164 - U * -2.018 +// Bias values to round, and subtract 128 from U and V. +#define BB (-UB * 128 + YB) +#define BG (UG * 128 + VG * 128 + YB) +#define BR (-VR * 128 + YB) -// Y contribution to R,G,B. Scale and bias. -#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ -#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ +// BT.601 limited range YUV to RGB reference +// R = (Y - 16) * 1.164 + V * 1.596 +// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 +// B = (Y - 16) * 1.164 + U * 2.018 +// KR = 0.299; KB = 0.114 // U and V contributions to R,G,B. -#define UB -128 /* max(-128, round(-2.018 * 64)) */ -#define UG 25 /* round(0.391 * 64) */ -#define VG 52 /* round(0.813 * 64) */ -#define VR -102 /* round(-1.596 * 64) */ +#define UB 128 /* max(128, round(2.018 * 64)) */ +#define UG 25 /* round(0.391 * 64) */ +#define VG 52 /* round(0.813 * 64) */ +#define VR 102 /* round(1.596 * 64) */ -// Bias values to subtract 16 from Y and 128 from U and V. -#define BB (UB * 128 + YGB) -#define BG (UG * 128 + VG * 128 + YGB) -#define BR (VR * 128 + YGB) +// Y contribution to R,G,B. Scale and bias. +#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ +#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ -MAKEYUVCONSTANTS(I601, YG, YGB, UB, UG, VG, VR, BB, BG, BR) +MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR, BB, BG, BR) -#undef BB -#undef BG -#undef BR -#undef YGB +#undef YG +#undef YB #undef UB #undef UG #undef VG #undef VR -#undef YG -// JPEG YUV to RGB reference -// * R = Y - V * -1.40200 -// * G = Y - U * 0.34414 - V * 0.71414 -// * B = Y - U * -1.77200 - -// Y contribution to R,G,B. Scale and bias. -#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ -#define YGB 32 /* 64 / 2 */ +// BT.601 full range YUV to RGB reference (aka JPEG) +// * R = Y + V * 1.40200 +// * G = Y - U * 0.34414 - V * 0.71414 +// * B = Y + U * 1.77200 +// KR = 0.299; KB = 0.114 // U and V contributions to R,G,B. -#define UB -113 /* round(-1.77200 * 64) */ -#define UG 22 /* round(0.34414 * 64) */ -#define VG 46 /* round(0.71414 * 64) */ -#define VR -90 /* round(-1.40200 * 64) */ +#define UB 113 /* round(1.77200 * 64) */ +#define UG 22 /* round(0.34414 * 64) */ +#define VG 46 /* round(0.71414 * 64) */ +#define VR 90 /* round(1.40200 * 64) */ -// Bias values to round, and subtract 128 from U and V. -#define BB (UB * 128 + YGB) -#define BG (UG * 128 + VG * 128 + YGB) -#define BR (VR * 128 + YGB) +// Y contribution to R,G,B. Scale and bias. +#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ +#define YB 32 /* 64 / 2 */ -MAKEYUVCONSTANTS(JPEG, YG, YGB, UB, UG, VG, VR, BB, BG, BR) +MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR) -#undef BB -#undef BG -#undef BR -#undef YGB +#undef YG +#undef YB #undef UB #undef UG #undef VG #undef VR -#undef YG -// BT.709 YUV to RGB reference -// R = (Y - 16) * 1.164 - V * -1.793 -// G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533 -// B = (Y - 16) * 1.164 - U * -2.112 - -// Y contribution to R,G,B. Scale and bias. -#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ -#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ +// BT.709 limited range YUV to RGB reference +// R = (Y - 16) * 1.164 + V * 1.793 +// G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533 +// B = (Y - 16) * 1.164 + U * 2.112 +// KR = 0.2126, KB = 0.0722 // TODO(fbarchard): Find way to express 2.112 instead of 2.0. // U and V contributions to R,G,B. -#define UB -128 /* max(-128, round(-2.112 * 64)) */ -#define UG 14 /* round(0.213 * 64) */ -#define VG 34 /* round(0.533 * 64) */ -#define VR -115 /* round(-1.793 * 64) */ +#define UB 128 /* max(128, round(2.112 * 64)) */ +#define UG 14 /* round(0.213 * 64) */ +#define VG 34 /* round(0.533 * 64) */ +#define VR 115 /* round(1.793 * 64) */ -// Bias values to round, and subtract 128 from U and V. -#define BB (UB * 128 + YGB) -#define BG (UG * 128 + VG * 128 + YGB) -#define BR (VR * 128 + YGB) +// Y contribution to R,G,B. Scale and bias. +#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ +#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ -MAKEYUVCONSTANTS(H709, YG, YGB, UB, UG, VG, VR, BB, BG, BR) +MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR, BB, BG, BR) -#undef BB -#undef BG -#undef BR -#undef YGB +#undef YG +#undef YB #undef UB #undef UG #undef VG #undef VR -#undef YG // BT.709 full range YUV to RGB reference -// R = Y - V * -1.5748 -// G = Y - U * 0.18732 - V * 0.46812 -// B = Y - U * -1.8556 -// WR = 0.2126 -// WB = 0.0722 -// WR and WB given, the equations are: -// R = Y + (2 * (1 - WR)) * V; -// G = Y - ((2 * ((WR * (1 - WR) * V) + (WB * (1 - WB) * U))) / (1 - WB - WR)); -// B = Y + (2 * (1 - WB)) * U; - -// Y contribution to R,G,B. Scale and bias. (same as jpeg) -#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */ -#define YGB 32 /* 64 / 2 */ +// R = Y + V * 1.5748 +// G = Y - U * 0.18732 - V * 0.46812 +// B = Y + U * 1.8556 +// KR = 0.2126, KB = 0.0722 // U and V contributions to R,G,B. -#define UB -119 /* round(-1.8556 * 64) */ -#define UG 12 /* round(0.18732 * 64) */ -#define VG 30 /* round(0.46812 * 64) */ -#define VR -101 /* round(-1.5748 * 64) */ +#define UB 119 /* round(1.8556 * 64) */ +#define UG 12 /* round(0.18732 * 64) */ +#define VG 30 /* round(0.46812 * 64) */ +#define VR 101 /* round(1.5748 * 64) */ -// Bias values to round, and subtract 128 from U and V. -#define BB (UB * 128 + YGB) -#define BG (UG * 128 + VG * 128 + YGB) -#define BR (VR * 128 + YGB) +// Y contribution to R,G,B. Scale and bias. (same as jpeg) +#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */ +#define YB 32 /* 64 / 2 */ -MAKEYUVCONSTANTS(F709, YG, YGB, UB, UG, VG, VR, BB, BG, BR) +MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR) -#undef BB -#undef BG -#undef BR -#undef YGB +#undef YG +#undef YB #undef UB #undef UG #undef VG #undef VR -#undef YG - -// BT.2020 YUV to RGB reference -// R = (Y - 16) * 1.164384 - V * -1.67867 -// G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042 -// B = (Y - 16) * 1.164384 - U * -2.14177 -// Y contribution to R,G,B. Scale and bias. -#define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */ -#define YGB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */ +// BT.2020 limited range YUV to RGB reference +// R = (Y - 16) * 1.164384 + V * 1.67867 +// G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042 +// B = (Y - 16) * 1.164384 + U * 2.14177 +// KR = 0.2627; KB = 0.0593 // TODO(fbarchard): Improve accuracy; the B channel is off by 7%. // U and V contributions to R,G,B. -#define UB -128 /* max(-128, round(-2.142 * 64)) */ -#define UG 12 /* round(0.187326 * 64) */ -#define VG 42 /* round(0.65042 * 64) */ -#define VR -107 /* round(-1.67867 * 64) */ +#define UB 128 /* max(128, round(2.142 * 64)) */ +#define UG 12 /* round(0.187326 * 64) */ +#define VG 42 /* round(0.65042 * 64) */ +#define VR 107 /* round(1.67867 * 64) */ -// Bias values to round, and subtract 128 from U and V. -#define BB (UB * 128 + YGB) -#define BG (UG * 128 + VG * 128 + YGB) -#define BR (VR * 128 + YGB) +// Y contribution to R,G,B. Scale and bias. +#define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */ +#define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */ -MAKEYUVCONSTANTS(2020, YG, YGB, UB, UG, VG, VR, BB, BG, BR) +MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR, BB, BG, BR) -#undef BB -#undef BG -#undef BR -#undef YGB +#undef YG +#undef YB #undef UB #undef UG #undef VG #undef VR + +// BT.2020 full range YUV to RGB reference +// R = Y + V * 1.474600 +// G = Y - U * 0.164553 - V * 0.571353 +// B = Y + U * 1.881400 +// KR = 0.2627; KB = 0.0593 + +#define UB 120 /* round(1.881400 * 64) */ +#define UG 11 /* round(0.164553 * 64) */ +#define VG 37 /* round(0.571353 * 64) */ +#define VR 94 /* round(1.474600 * 64) */ + +// Y contribution to R,G,B. Scale and bias. (same as jpeg) +#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */ +#define YB 32 /* 64 / 2 */ + +MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR) + #undef YG +#undef YB +#undef UB +#undef UG +#undef VG +#undef VR + +#undef BB +#undef BG +#undef BR #undef MAKEYUVCONSTANTS diff --git a/source/scale.cc b/source/scale.cc index 34c05699..16771cd8 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -1336,7 +1336,7 @@ void ScalePlaneBilinearUp(int src_width, } } -// Scale plane, horizontally 2 times, vertically any time. +// Scale plane, horizontally up by 2 times. // Uses linear filter horizontally, nearest vertically. // This is an optimized version for scaling up a plane to 2 times of // its original width, using linear interpolation. @@ -1356,7 +1356,7 @@ void ScalePlaneUp2_Linear(int src_width, int dy; // This function can only scale up by 2 times horizontally. - assert(src_width * 2 == dst_width || src_width * 2 == dst_width + 1); + assert(src_width == ((dst_width + 1) / 2)); #ifdef HAS_SCALEROWUP2LINEAR_SSE2 if (TestCpuFlag(kCpuHasSSE2)) { @@ -1396,7 +1396,7 @@ void ScalePlaneUp2_Linear(int src_width, } } -// Scale plane, 2 times. +// Scale plane, up by 2 times. // This is an optimized version for scaling up a plane to 2 times of // its original size, using bilinear interpolation. // This is used to scale U and V planes of I420 to I444. @@ -1414,7 +1414,7 @@ void ScalePlaneUp2_Bilinear(int src_width, int x; // This function can only scale up by 2 times. - assert(src_width * 2 == dst_width || src_width * 2 == dst_width + 1); + assert(src_width == ((dst_width + 1) / 2)); assert(src_height * 2 == dst_height || src_height * 2 == dst_height + 1); #ifdef HAS_SCALEROWUP2LINEAR_SSE2 @@ -1449,7 +1449,7 @@ void ScalePlaneUp2_Bilinear(int src_width, for (x = 0; x < src_height - 1; ++x) { Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width); src_ptr += src_stride; - // TODO test performance of writing one row of destination at a time + // TODO: Test performance of writing one row of destination at a time. dst_ptr += 2 * dst_stride; } if (!(dst_height & 1)) { @@ -1458,7 +1458,7 @@ void ScalePlaneUp2_Bilinear(int src_width, } } -// Scale at most 14bit plane, horizontally 2 times. +// Scale at most 14 bit plane, horizontally up by 2 times. // This is an optimized version for scaling up a plane to 2 times of // its original width, using linear interpolation. // stride is in count of uint16_t. @@ -1478,7 +1478,7 @@ void ScalePlaneUp2_16_Linear(int src_width, int dy; // This function can only scale up by 2 times horizontally. - assert(src_width * 2 == dst_width || src_width * 2 == dst_width + 1); + assert(src_width == ((dst_width + 1) / 2)); #ifdef HAS_SCALEROWUP2LINEAR_SSE2 if (TestCpuFlag(kCpuHasSSE2)) { @@ -1512,7 +1512,7 @@ void ScalePlaneUp2_16_Linear(int src_width, } } -// Scale at most 12bit plane, up 2 times. +// Scale at most 12 bit plane, up by 2 times. // This is an optimized version for scaling up a plane to 2 times of // its original size, using bilinear interpolation. // stride is in count of uint16_t. @@ -1531,7 +1531,7 @@ void ScalePlaneUp2_16_Bilinear(int src_width, int x; // This function can only scale up by 2 times. - assert(src_width * 2 == dst_width || src_width * 2 == dst_width + 1); + assert(src_width == ((dst_width + 1) / 2)); assert(src_height * 2 == dst_height || src_height * 2 == dst_height + 1); #ifdef HAS_SCALEROWUP2LINEAR_SSE2 diff --git a/source/scale_any.cc b/source/scale_any.cc index 5fd27ae6..79394985 100644 --- a/source/scale_any.cc +++ b/source/scale_any.cc @@ -625,7 +625,7 @@ CANY(ScaleARGBFilterCols_Any_MSA, dst_ptr[dst_width - 1] = src_ptr[(dst_width / 2) - 1]; \ } -// Even the C version need to be wrapped, because boundary pixels have to +// Even the C versions need to be wrapped, because boundary pixels have to // be handled differently SUH2LANY(ScaleRowUp2_Linear_Any_C, diff --git a/source/scale_common.cc b/source/scale_common.cc index f53e2de9..8d41c03d 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -400,7 +400,7 @@ void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr, } } -// sample position: (O is src sample position, X is dst sample position) +// Sample position: (O is src sample position, X is dst sample position) // // v dst_ptr at here v stop at here // X O X X O X X O X X O X X O X @@ -417,7 +417,7 @@ void ScaleRowUp2_Linear_C(const uint8_t* src_ptr, } } -// sample position: (O is src sample position, X is dst sample position) +// Sample position: (O is src sample position, X is dst sample position) // // src_ptr at here // X v X X X X X X X X X @@ -451,7 +451,7 @@ void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr, } } -// only suitable for at most 14bit range. +// Only suitable for at most 14 bit range. void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) { diff --git a/source/scale_gcc.cc b/source/scale_gcc.cc index cfbbba98..db3c9687 100644 --- a/source/scale_gcc.cc +++ b/source/scale_gcc.cc @@ -197,7 +197,6 @@ void ScaleRowDown2_AVX2(const uint8_t* src_ptr, int dst_width) { (void)src_stride; asm volatile( - LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" @@ -485,7 +484,6 @@ void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, "m"(kShuf2) // %2 ); asm volatile( - LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" @@ -532,7 +530,6 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, "m"(kRound34) // %2 ); asm volatile( - LABELALIGN "1: \n" "movdqu (%0),%%xmm6 \n" @@ -599,7 +596,6 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, ); asm volatile( - LABELALIGN "1: \n" "movdqu (%0),%%xmm6 \n" @@ -692,7 +688,6 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, "m"(kScaleAb2) // %3 ); asm volatile( - LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" @@ -736,7 +731,6 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, "m"(kScaleAc33) // %2 ); asm volatile( - LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" @@ -790,7 +784,6 @@ void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) { asm volatile( - "pxor %%xmm0,%%xmm0 \n" // 0 "pcmpeqw %%xmm6,%%xmm6 \n" "psrlw $15,%%xmm6 \n" @@ -847,7 +840,6 @@ void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr, ptrdiff_t dst_stride, int dst_width) { asm volatile( - LABELALIGN "1: \n" "pxor %%xmm0,%%xmm0 \n" // 0 @@ -962,7 +954,6 @@ void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) { asm volatile( - "pxor %%xmm0,%%xmm0 \n" // 0 "pcmpeqw %%xmm6,%%xmm6 \n" "psrlw $15,%%xmm6 \n" @@ -1015,7 +1006,6 @@ void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr, ptrdiff_t dst_stride, int dst_width) { asm volatile( - "pxor %%xmm0,%%xmm0 \n" // 0 "pcmpeqw %%xmm7,%%xmm7 \n" "psrlw $15,%%xmm7 \n" @@ -1124,29 +1114,28 @@ void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) { asm volatile( - - "pcmpeqw %%xmm4,%%xmm4 \n" - "psrlw $15,%%xmm4 \n" - "psllw $1,%%xmm4 \n" // all 2 - "movdqu %3,%%xmm3 \n" + "pcmpeqw %%xmm4,%%xmm4 \n" + "psrlw $15,%%xmm4 \n" + "psllw $1,%%xmm4 \n" // all 2 + "movdqu %3,%%xmm3 \n" LABELALIGN "1: \n" - "movq (%0),%%xmm0 \n" // 01234567 - "movq 1(%0),%%xmm1 \n" // 12345678 - "punpcklwd %%xmm0,%%xmm0 \n" // 0101232345456767 - "punpcklwd %%xmm1,%%xmm1 \n" // 1212343456567878 - "movdqa %%xmm0,%%xmm2 \n" - "punpckhdq %%xmm1,%%xmm2 \n" // 4545565667677878 - "punpckldq %%xmm1,%%xmm0 \n" // 0101121223233434 - "pmaddubsw %%xmm3,%%xmm2 \n" // 3*near+far (hi) - "pmaddubsw %%xmm3,%%xmm0 \n" // 3*near+far (lo) - "paddw %%xmm4,%%xmm0 \n" // 3*near+far+2 (lo) - "paddw %%xmm4,%%xmm2 \n" // 3*near+far+2 (hi) - "psrlw $2,%%xmm0 \n" // 3/4*near+1/4*far (lo) - "psrlw $2,%%xmm2 \n" // 3/4*near+1/4*far (hi) - "vpackuswb %%xmm2,%%xmm0,%%xmm0 \n" - "vmovdqu %%xmm0,(%1) \n" + "movq (%0),%%xmm0 \n" // 01234567 + "movq 1(%0),%%xmm1 \n" // 12345678 + "punpcklwd %%xmm0,%%xmm0 \n" // 0101232345456767 + "punpcklwd %%xmm1,%%xmm1 \n" // 1212343456567878 + "movdqa %%xmm0,%%xmm2 \n" + "punpckhdq %%xmm1,%%xmm2 \n" // 4545565667677878 + "punpckldq %%xmm1,%%xmm0 \n" // 0101121223233434 + "pmaddubsw %%xmm3,%%xmm2 \n" // 3*near+far (hi) + "pmaddubsw %%xmm3,%%xmm0 \n" // 3*near+far (lo) + "paddw %%xmm4,%%xmm0 \n" // 3*near+far+2 (lo) + "paddw %%xmm4,%%xmm2 \n" // 3*near+far+2 (hi) + "psrlw $2,%%xmm0 \n" // 3/4*near+1/4*far (lo) + "psrlw $2,%%xmm2 \n" // 3/4*near+1/4*far (hi) + "vpackuswb %%xmm2,%%xmm0,%%xmm0 \n" + "vmovdqu %%xmm0,(%1) \n" "lea 0x8(%0),%0 \n" "lea 0x10(%1),%1 \n" // 8 sample to 16 sample @@ -1167,76 +1156,75 @@ void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr, ptrdiff_t dst_stride, int dst_width) { asm volatile( - - "pcmpeqw %%xmm6,%%xmm6 \n" - "psrlw $15,%%xmm6 \n" - "psllw $3,%%xmm6 \n" // all 8 - "movdqu %5,%%xmm7 \n" + "pcmpeqw %%xmm6,%%xmm6 \n" + "psrlw $15,%%xmm6 \n" + "psllw $3,%%xmm6 \n" // all 8 + "movdqu %5,%%xmm7 \n" LABELALIGN "1: \n" - "movq (%0),%%xmm0 \n" // 01234567 - "movq 1(%0),%%xmm1 \n" // 12345678 - "punpcklwd %%xmm0,%%xmm0 \n" // 0101232345456767 - "punpcklwd %%xmm1,%%xmm1 \n" // 1212343456567878 - "movdqa %%xmm0,%%xmm2 \n" - "punpckhdq %%xmm1,%%xmm2 \n" // 4545565667677878 - "punpckldq %%xmm1,%%xmm0 \n" // 0101121223233434 - "pmaddubsw %%xmm7,%%xmm2 \n" // 3*near+far (1, hi) - "pmaddubsw %%xmm7,%%xmm0 \n" // 3*near+far (1, lo) - - "movq (%0,%3),%%xmm1 \n" - "movq 1(%0,%3),%%xmm4 \n" - "punpcklwd %%xmm1,%%xmm1 \n" - "punpcklwd %%xmm4,%%xmm4 \n" - "movdqa %%xmm1,%%xmm3 \n" - "punpckhdq %%xmm4,%%xmm3 \n" - "punpckldq %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm7,%%xmm3 \n" // 3*near+far (2, hi) - "pmaddubsw %%xmm7,%%xmm1 \n" // 3*near+far (2, lo) + "movq (%0),%%xmm0 \n" // 01234567 + "movq 1(%0),%%xmm1 \n" // 12345678 + "punpcklwd %%xmm0,%%xmm0 \n" // 0101232345456767 + "punpcklwd %%xmm1,%%xmm1 \n" // 1212343456567878 + "movdqa %%xmm0,%%xmm2 \n" + "punpckhdq %%xmm1,%%xmm2 \n" // 4545565667677878 + "punpckldq %%xmm1,%%xmm0 \n" // 0101121223233434 + "pmaddubsw %%xmm7,%%xmm2 \n" // 3*near+far (1, hi) + "pmaddubsw %%xmm7,%%xmm0 \n" // 3*near+far (1, lo) + + "movq (%0,%3),%%xmm1 \n" + "movq 1(%0,%3),%%xmm4 \n" + "punpcklwd %%xmm1,%%xmm1 \n" + "punpcklwd %%xmm4,%%xmm4 \n" + "movdqa %%xmm1,%%xmm3 \n" + "punpckhdq %%xmm4,%%xmm3 \n" + "punpckldq %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm7,%%xmm3 \n" // 3*near+far (2, hi) + "pmaddubsw %%xmm7,%%xmm1 \n" // 3*near+far (2, lo) // xmm0 xmm2 // xmm1 xmm3 - "movdqa %%xmm0,%%xmm4 \n" - "movdqa %%xmm1,%%xmm5 \n" - "paddw %%xmm0,%%xmm4 \n" // 6*near+2*far (1, lo) - "paddw %%xmm6,%%xmm5 \n" // 3*near+far+8 (2, lo) - "paddw %%xmm0,%%xmm4 \n" // 9*near+3*far (1, lo) - "paddw %%xmm5,%%xmm4 \n" // 9 3 3 1 + 8 (1, lo) - "psrlw $4,%%xmm4 \n" // ^ div by 16 (1, lo) - - "movdqa %%xmm1,%%xmm5 \n" - "paddw %%xmm1,%%xmm5 \n" // 6*near+2*far (2, lo) - "paddw %%xmm6,%%xmm0 \n" // 3*near+far+8 (1, lo) - "paddw %%xmm1,%%xmm5 \n" // 9*near+3*far (2, lo) - "paddw %%xmm0,%%xmm5 \n" // 9 3 3 1 + 8 (2, lo) - "psrlw $4,%%xmm5 \n" // ^ div by 16 (2, lo) - - "movdqa %%xmm2,%%xmm0 \n" - "movdqa %%xmm3,%%xmm1 \n" - "paddw %%xmm2,%%xmm0 \n" // 6*near+2*far (1, hi) - "paddw %%xmm6,%%xmm1 \n" // 3*near+far+8 (2, hi) - "paddw %%xmm2,%%xmm0 \n" // 9*near+3*far (1, hi) - "paddw %%xmm1,%%xmm0 \n" // 9 3 3 1 + 8 (1, hi) - "psrlw $4,%%xmm0 \n" // ^ div by 16 (1, hi) - - "movdqa %%xmm3,%%xmm1 \n" - "paddw %%xmm3,%%xmm1 \n" // 6*near+2*far (2, hi) - "paddw %%xmm6,%%xmm2 \n" // 3*near+far+8 (1, hi) - "paddw %%xmm3,%%xmm1 \n" // 9*near+3*far (2, hi) - "paddw %%xmm2,%%xmm1 \n" // 9 3 3 1 + 8 (2, hi) - "psrlw $4,%%xmm1 \n" // ^ div by 16 (2, hi) - - "packuswb %%xmm0,%%xmm4 \n" - "movdqu %%xmm4,(%1) \n" // store above - "packuswb %%xmm1,%%xmm5 \n" - "movdqu %%xmm5,(%1,%4) \n" // store below - - "lea 0x8(%0),%0 \n" - "lea 0x10(%1),%1 \n" // 8 sample to 16 sample - "sub $0x10,%2 \n" - "jg 1b \n" + "movdqa %%xmm0,%%xmm4 \n" + "movdqa %%xmm1,%%xmm5 \n" + "paddw %%xmm0,%%xmm4 \n" // 6*near+2*far (1, lo) + "paddw %%xmm6,%%xmm5 \n" // 3*near+far+8 (2, lo) + "paddw %%xmm0,%%xmm4 \n" // 9*near+3*far (1, lo) + "paddw %%xmm5,%%xmm4 \n" // 9 3 3 1 + 8 (1, lo) + "psrlw $4,%%xmm4 \n" // ^ div by 16 (1, lo) + + "movdqa %%xmm1,%%xmm5 \n" + "paddw %%xmm1,%%xmm5 \n" // 6*near+2*far (2, lo) + "paddw %%xmm6,%%xmm0 \n" // 3*near+far+8 (1, lo) + "paddw %%xmm1,%%xmm5 \n" // 9*near+3*far (2, lo) + "paddw %%xmm0,%%xmm5 \n" // 9 3 3 1 + 8 (2, lo) + "psrlw $4,%%xmm5 \n" // ^ div by 16 (2, lo) + + "movdqa %%xmm2,%%xmm0 \n" + "movdqa %%xmm3,%%xmm1 \n" + "paddw %%xmm2,%%xmm0 \n" // 6*near+2*far (1, hi) + "paddw %%xmm6,%%xmm1 \n" // 3*near+far+8 (2, hi) + "paddw %%xmm2,%%xmm0 \n" // 9*near+3*far (1, hi) + "paddw %%xmm1,%%xmm0 \n" // 9 3 3 1 + 8 (1, hi) + "psrlw $4,%%xmm0 \n" // ^ div by 16 (1, hi) + + "movdqa %%xmm3,%%xmm1 \n" + "paddw %%xmm3,%%xmm1 \n" // 6*near+2*far (2, hi) + "paddw %%xmm6,%%xmm2 \n" // 3*near+far+8 (1, hi) + "paddw %%xmm3,%%xmm1 \n" // 9*near+3*far (2, hi) + "paddw %%xmm2,%%xmm1 \n" // 9 3 3 1 + 8 (2, hi) + "psrlw $4,%%xmm1 \n" // ^ div by 16 (2, hi) + + "packuswb %%xmm0,%%xmm4 \n" + "movdqu %%xmm4,(%1) \n" // store above + "packuswb %%xmm1,%%xmm5 \n" + "movdqu %%xmm5,(%1,%4) \n" // store below + + "lea 0x8(%0),%0 \n" + "lea 0x10(%1),%1 \n" // 8 sample to 16 sample + "sub $0x10,%2 \n" + "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 @@ -1257,30 +1245,29 @@ void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) { asm volatile( - - "vpcmpeqw %%ymm4,%%ymm4,%%ymm4 \n" - "vpsrlw $15,%%ymm4,%%ymm4 \n" - "vpsllw $1,%%ymm4,%%ymm4 \n" // all 2 - "vmovdqu %3,%%ymm3 \n" + "vpcmpeqw %%ymm4,%%ymm4,%%ymm4 \n" + "vpsrlw $15,%%ymm4,%%ymm4 \n" + "vpsllw $1,%%ymm4,%%ymm4 \n" // all 2 + "vmovdqu %3,%%ymm3 \n" LABELALIGN "1: \n" - "vmovdqu (%0),%%xmm0 \n" // 0123456789ABCDEF - "vmovdqu 1(%0),%%xmm1 \n" // 123456789ABCDEF0 - "vpermq $0b11011000,%%ymm0,%%ymm0 \n" - "vpermq $0b11011000,%%ymm1,%%ymm1 \n" - "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" - "vpunpcklwd %%ymm1,%%ymm1,%%ymm1 \n" - "vpunpckhdq %%ymm1,%%ymm0,%%ymm2 \n" - "vpunpckldq %%ymm1,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm3,%%ymm2,%%ymm1 \n" // 3*near+far (hi) - "vpmaddubsw %%ymm3,%%ymm0,%%ymm0 \n" // 3*near+far (lo) - "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" // 3*near+far+2 (lo) - "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" // 3*near+far+2 (hi) - "vpsrlw $2,%%ymm0,%%ymm0 \n" // 3/4*near+1/4*far (lo) - "vpsrlw $2,%%ymm1,%%ymm1 \n" // 3/4*near+1/4*far (hi) - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0,(%1) \n" + "vmovdqu (%0),%%xmm0 \n" // 0123456789ABCDEF + "vmovdqu 1(%0),%%xmm1 \n" // 123456789ABCDEF0 + "vpermq $0b11011000,%%ymm0,%%ymm0 \n" + "vpermq $0b11011000,%%ymm1,%%ymm1 \n" + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" + "vpunpcklwd %%ymm1,%%ymm1,%%ymm1 \n" + "vpunpckhdq %%ymm1,%%ymm0,%%ymm2 \n" + "vpunpckldq %%ymm1,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm3,%%ymm2,%%ymm1 \n" // 3*near+far (hi) + "vpmaddubsw %%ymm3,%%ymm0,%%ymm0 \n" // 3*near+far (lo) + "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" // 3*near+far+2 (lo) + "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" // 3*near+far+2 (hi) + "vpsrlw $2,%%ymm0,%%ymm0 \n" // 3/4*near+1/4*far (lo) + "vpsrlw $2,%%ymm1,%%ymm1 \n" // 3/4*near+1/4*far (hi) + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" "lea 0x10(%0),%0 \n" "lea 0x20(%1),%1 \n" // 16 sample to 32 sample @@ -1301,72 +1288,71 @@ void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr, ptrdiff_t dst_stride, int dst_width) { asm volatile( - - "vpcmpeqw %%ymm6,%%ymm6,%%ymm6 \n" - "vpsrlw $15,%%ymm6,%%ymm6 \n" - "vpsllw $3,%%ymm6,%%ymm6 \n" // all 8 - "vmovdqu %5,%%ymm7 \n" + "vpcmpeqw %%ymm6,%%ymm6,%%ymm6 \n" + "vpsrlw $15,%%ymm6,%%ymm6 \n" + "vpsllw $3,%%ymm6,%%ymm6 \n" // all 8 + "vmovdqu %5,%%ymm7 \n" LABELALIGN "1: \n" - "vmovdqu (%0),%%xmm0 \n" // 0123456789ABCDEF - "vmovdqu 1(%0),%%xmm1 \n" // 123456789ABCDEF0 - "vpermq $0b11011000,%%ymm0,%%ymm0 \n" - "vpermq $0b11011000,%%ymm1,%%ymm1 \n" - "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" - "vpunpcklwd %%ymm1,%%ymm1,%%ymm1 \n" - "vpunpckhdq %%ymm1,%%ymm0,%%ymm2 \n" - "vpunpckldq %%ymm1,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm7,%%ymm2,%%ymm1 \n" // 3*near+far (1, hi) - "vpmaddubsw %%ymm7,%%ymm0,%%ymm0 \n" // 3*near+far (1, lo) - - "vmovdqu (%0,%3),%%xmm2 \n" // 0123456789ABCDEF - "vmovdqu 1(%0,%3),%%xmm3 \n" // 123456789ABCDEF0 - "vpermq $0b11011000,%%ymm2,%%ymm2 \n" - "vpermq $0b11011000,%%ymm3,%%ymm3 \n" - "vpunpcklwd %%ymm2,%%ymm2,%%ymm2 \n" - "vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" - "vpunpckhdq %%ymm3,%%ymm2,%%ymm4 \n" - "vpunpckldq %%ymm3,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm7,%%ymm4,%%ymm3 \n" // 3*near+far (2, hi) - "vpmaddubsw %%ymm7,%%ymm2,%%ymm2 \n" // 3*near+far (2, lo) + "vmovdqu (%0),%%xmm0 \n" // 0123456789ABCDEF + "vmovdqu 1(%0),%%xmm1 \n" // 123456789ABCDEF0 + "vpermq $0b11011000,%%ymm0,%%ymm0 \n" + "vpermq $0b11011000,%%ymm1,%%ymm1 \n" + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" + "vpunpcklwd %%ymm1,%%ymm1,%%ymm1 \n" + "vpunpckhdq %%ymm1,%%ymm0,%%ymm2 \n" + "vpunpckldq %%ymm1,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm7,%%ymm2,%%ymm1 \n" // 3*near+far (1, hi) + "vpmaddubsw %%ymm7,%%ymm0,%%ymm0 \n" // 3*near+far (1, lo) + + "vmovdqu (%0,%3),%%xmm2 \n" // 0123456789ABCDEF + "vmovdqu 1(%0,%3),%%xmm3 \n" // 123456789ABCDEF0 + "vpermq $0b11011000,%%ymm2,%%ymm2 \n" + "vpermq $0b11011000,%%ymm3,%%ymm3 \n" + "vpunpcklwd %%ymm2,%%ymm2,%%ymm2 \n" + "vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" + "vpunpckhdq %%ymm3,%%ymm2,%%ymm4 \n" + "vpunpckldq %%ymm3,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm7,%%ymm4,%%ymm3 \n" // 3*near+far (2, hi) + "vpmaddubsw %%ymm7,%%ymm2,%%ymm2 \n" // 3*near+far (2, lo) // ymm0 ymm1 // ymm2 ymm3 - "vpaddw %%ymm0,%%ymm0,%%ymm4 \n" // 6*near+2*far (1, lo) - "vpaddw %%ymm6,%%ymm2,%%ymm5 \n" // 3*near+far+8 (2, lo) - "vpaddw %%ymm4,%%ymm0,%%ymm4 \n" // 9*near+3*far (1, lo) - "vpaddw %%ymm4,%%ymm5,%%ymm4 \n" // 9 3 3 1 + 8 (1, lo) - "vpsrlw $4,%%ymm4,%%ymm4 \n" // ^ div by 16 (1, lo) - - "vpaddw %%ymm2,%%ymm2,%%ymm5 \n" // 6*near+2*far (2, lo) - "vpaddw %%ymm6,%%ymm0,%%ymm0 \n" // 3*near+far+8 (1, lo) - "vpaddw %%ymm5,%%ymm2,%%ymm5 \n" // 9*near+3*far (2, lo) - "vpaddw %%ymm5,%%ymm0,%%ymm5 \n" // 9 3 3 1 + 8 (2, lo) - "vpsrlw $4,%%ymm5,%%ymm5 \n" // ^ div by 16 (2, lo) - - "vpaddw %%ymm1,%%ymm1,%%ymm0 \n" // 6*near+2*far (1, hi) - "vpaddw %%ymm6,%%ymm3,%%ymm2 \n" // 3*near+far+8 (2, hi) - "vpaddw %%ymm0,%%ymm1,%%ymm0 \n" // 9*near+3*far (1, hi) - "vpaddw %%ymm0,%%ymm2,%%ymm0 \n" // 9 3 3 1 + 8 (1, hi) - "vpsrlw $4,%%ymm0,%%ymm0 \n" // ^ div by 16 (1, hi) - - "vpaddw %%ymm3,%%ymm3,%%ymm2 \n" // 6*near+2*far (2, hi) - "vpaddw %%ymm6,%%ymm1,%%ymm1 \n" // 3*near+far+8 (1, hi) - "vpaddw %%ymm2,%%ymm3,%%ymm2 \n" // 9*near+3*far (2, hi) - "vpaddw %%ymm2,%%ymm1,%%ymm2 \n" // 9 3 3 1 + 8 (2, hi) - "vpsrlw $4,%%ymm2,%%ymm2 \n" // ^ div by 16 (2, hi) - - "vpackuswb %%ymm0,%%ymm4,%%ymm4 \n" - "vmovdqu %%ymm4,(%1) \n" // store above - "vpackuswb %%ymm2,%%ymm5,%%ymm5 \n" - "vmovdqu %%ymm5,(%1,%4) \n" // store below - - "lea 0x10(%0),%0 \n" - "lea 0x20(%1),%1 \n" // 16 sample to 32 sample - "sub $0x20,%2 \n" - "jg 1b \n" + "vpaddw %%ymm0,%%ymm0,%%ymm4 \n" // 6*near+2*far (1, lo) + "vpaddw %%ymm6,%%ymm2,%%ymm5 \n" // 3*near+far+8 (2, lo) + "vpaddw %%ymm4,%%ymm0,%%ymm4 \n" // 9*near+3*far (1, lo) + "vpaddw %%ymm4,%%ymm5,%%ymm4 \n" // 9 3 3 1 + 8 (1, lo) + "vpsrlw $4,%%ymm4,%%ymm4 \n" // ^ div by 16 (1, lo) + + "vpaddw %%ymm2,%%ymm2,%%ymm5 \n" // 6*near+2*far (2, lo) + "vpaddw %%ymm6,%%ymm0,%%ymm0 \n" // 3*near+far+8 (1, lo) + "vpaddw %%ymm5,%%ymm2,%%ymm5 \n" // 9*near+3*far (2, lo) + "vpaddw %%ymm5,%%ymm0,%%ymm5 \n" // 9 3 3 1 + 8 (2, lo) + "vpsrlw $4,%%ymm5,%%ymm5 \n" // ^ div by 16 (2, lo) + + "vpaddw %%ymm1,%%ymm1,%%ymm0 \n" // 6*near+2*far (1, hi) + "vpaddw %%ymm6,%%ymm3,%%ymm2 \n" // 3*near+far+8 (2, hi) + "vpaddw %%ymm0,%%ymm1,%%ymm0 \n" // 9*near+3*far (1, hi) + "vpaddw %%ymm0,%%ymm2,%%ymm0 \n" // 9 3 3 1 + 8 (1, hi) + "vpsrlw $4,%%ymm0,%%ymm0 \n" // ^ div by 16 (1, hi) + + "vpaddw %%ymm3,%%ymm3,%%ymm2 \n" // 6*near+2*far (2, hi) + "vpaddw %%ymm6,%%ymm1,%%ymm1 \n" // 3*near+far+8 (1, hi) + "vpaddw %%ymm2,%%ymm3,%%ymm2 \n" // 9*near+3*far (2, hi) + "vpaddw %%ymm2,%%ymm1,%%ymm2 \n" // 9 3 3 1 + 8 (2, hi) + "vpsrlw $4,%%ymm2,%%ymm2 \n" // ^ div by 16 (2, hi) + + "vpackuswb %%ymm0,%%ymm4,%%ymm4 \n" + "vmovdqu %%ymm4,(%1) \n" // store above + "vpackuswb %%ymm2,%%ymm5,%%ymm5 \n" + "vmovdqu %%ymm5,(%1,%4) \n" // store below + + "lea 0x10(%0),%0 \n" + "lea 0x20(%1),%1 \n" // 16 sample to 32 sample + "sub $0x20,%2 \n" + "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 @@ -1386,35 +1372,34 @@ void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) { asm volatile( - - "vmovdqu %3,%%ymm3 \n" - "vpcmpeqw %%ymm4,%%ymm4,%%ymm4 \n" - "vpsrlw $15,%%ymm4,%%ymm4 \n" - "vpsllw $1,%%ymm4,%%ymm4 \n" // all 2 + "vmovdqu %3,%%ymm3 \n" + "vpcmpeqw %%ymm4,%%ymm4,%%ymm4 \n" + "vpsrlw $15,%%ymm4,%%ymm4 \n" + "vpsllw $1,%%ymm4,%%ymm4 \n" // all 2 LABELALIGN "1: \n" - "vmovdqu (%0),%%xmm0 \n" // 01234567 (16b) - "vmovdqu 2(%0),%%xmm1 \n" // 12345678 (16b) - - "vpermq $0b11011000,%%ymm0,%%ymm0 \n" // 0123000045670000 - "vpermq $0b11011000,%%ymm1,%%ymm1 \n" // 1234000056780000 - - "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" // 0101232345456767 - "vpunpckldq %%ymm1,%%ymm1,%%ymm1 \n" // 1212343456567878 - "vpunpckhqdq %%ymm1,%%ymm0,%%ymm2 \n" // 2323343467677878 - "vpunpcklqdq %%ymm1,%%ymm0,%%ymm1 \n" // 0101121245455656 - "vpmaddwd %%ymm3,%%ymm1,%%ymm0 \n" // 3*near+far (lo) - "vpmaddwd %%ymm3,%%ymm2,%%ymm1 \n" // 3*near+far (hi) - "vpackssdw %%ymm1,%%ymm0,%%ymm0 \n" // 3*near+far - "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" // 3*near+far+2 - "vpsrlw $2,%%ymm0,%%ymm0 \n" // 3/4*near+1/4*far - "vmovdqu %%ymm0,(%1) \n" - - "lea 0x10(%0),%0 \n" - "lea 0x20(%1),%1 \n" // 8 sample to 16 sample - "sub $0x10,%2 \n" - "jg 1b \n" + "vmovdqu (%0),%%xmm0 \n" // 01234567 (16b) + "vmovdqu 2(%0),%%xmm1 \n" // 12345678 (16b) + + "vpermq $0b11011000,%%ymm0,%%ymm0 \n" // 0123000045670000 + "vpermq $0b11011000,%%ymm1,%%ymm1 \n" // 1234000056780000 + + "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" // 0101232345456767 + "vpunpckldq %%ymm1,%%ymm1,%%ymm1 \n" // 1212343456567878 + "vpunpckhqdq %%ymm1,%%ymm0,%%ymm2 \n" // 2323343467677878 + "vpunpcklqdq %%ymm1,%%ymm0,%%ymm1 \n" // 0101121245455656 + "vpmaddwd %%ymm3,%%ymm1,%%ymm0 \n" // 3*near+far (lo) + "vpmaddwd %%ymm3,%%ymm2,%%ymm1 \n" // 3*near+far (hi) + "vpackssdw %%ymm1,%%ymm0,%%ymm0 \n" // 3*near+far + "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" // 3*near+far+2 + "vpsrlw $2,%%ymm0,%%ymm0 \n" // 3/4*near+1/4*far + "vmovdqu %%ymm0,(%1) \n" + + "lea 0x10(%0),%0 \n" + "lea 0x20(%1),%1 \n" // 8 sample to 16 sample + "sub $0x10,%2 \n" + "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 @@ -1427,37 +1412,36 @@ void ScaleRowUp2_Linear_16_AVX2_Full(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) { asm volatile( - - "vmovdqu %3,%%ymm3 \n" - "vpcmpeqd %%ymm4,%%ymm4,%%ymm4 \n" - "vpsrld $31,%%ymm4,%%ymm4 \n" - "vpslld $1,%%ymm4,%%ymm4 \n" // all 2 + "vmovdqu %3,%%ymm3 \n" + "vpcmpeqd %%ymm4,%%ymm4,%%ymm4 \n" + "vpsrld $31,%%ymm4,%%ymm4 \n" + "vpslld $1,%%ymm4,%%ymm4 \n" // all 2 LABELALIGN "1: \n" - "vmovdqu (%0),%%xmm0 \n" // 01234567 (16b) - "vmovdqu 2(%0),%%xmm1 \n" // 12345678 (16b) - - "vpermq $0b11011000,%%ymm0,%%ymm0 \n" // 0123000045670000 - "vpermq $0b11011000,%%ymm1,%%ymm1 \n" // 1234000056780000 - - "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" // 0101232345456767 - "vpunpckldq %%ymm1,%%ymm1,%%ymm1 \n" // 1212343456567878 - "vpunpckhqdq %%ymm1,%%ymm0,%%ymm2 \n" // 2323343467677878 - "vpunpcklqdq %%ymm1,%%ymm0,%%ymm1 \n" // 0101121245455656 - "vpmaddwd %%ymm3,%%ymm1,%%ymm0 \n" // 3*near+far (lo) - "vpmaddwd %%ymm3,%%ymm2,%%ymm1 \n" // 3*near+far (hi) - "vpaddd %%ymm4,%%ymm0,%%ymm0 \n" // 3*near+far+2 (lo) - "vpaddd %%ymm4,%%ymm1,%%ymm1 \n" // 3*near+far+2 (hi) - "vpsrad $2,%%ymm0,%%ymm0 \n" // 3/4*near+1/4*far (lo) - "vpsrad $2,%%ymm1,%%ymm1 \n" // 3/4*near+1/4*far (hi) - "vpackssdw %%ymm1,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0,(%1) \n" - - "lea 0x10(%0),%0 \n" - "lea 0x20(%1),%1 \n" // 8 sample to 16 sample - "sub $0x10,%2 \n" - "jg 1b \n" + "vmovdqu (%0),%%xmm0 \n" // 01234567 (16b) + "vmovdqu 2(%0),%%xmm1 \n" // 12345678 (16b) + + "vpermq $0b11011000,%%ymm0,%%ymm0 \n" // 0123000045670000 + "vpermq $0b11011000,%%ymm1,%%ymm1 \n" // 1234000056780000 + + "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" // 0101232345456767 + "vpunpckldq %%ymm1,%%ymm1,%%ymm1 \n" // 1212343456567878 + "vpunpckhqdq %%ymm1,%%ymm0,%%ymm2 \n" // 2323343467677878 + "vpunpcklqdq %%ymm1,%%ymm0,%%ymm1 \n" // 0101121245455656 + "vpmaddwd %%ymm3,%%ymm1,%%ymm0 \n" // 3*near+far (lo) + "vpmaddwd %%ymm3,%%ymm2,%%ymm1 \n" // 3*near+far (hi) + "vpaddd %%ymm4,%%ymm0,%%ymm0 \n" // 3*near+far+2 (lo) + "vpaddd %%ymm4,%%ymm1,%%ymm1 \n" // 3*near+far+2 (hi) + "vpsrad $2,%%ymm0,%%ymm0 \n" // 3/4*near+1/4*far (lo) + "vpsrad $2,%%ymm1,%%ymm1 \n" // 3/4*near+1/4*far (hi) + "vpackssdw %%ymm1,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + + "lea 0x10(%0),%0 \n" + "lea 0x20(%1),%1 \n" // 8 sample to 16 sample + "sub $0x10,%2 \n" + "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 @@ -1473,57 +1457,56 @@ void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr, ptrdiff_t dst_stride, int dst_width) { asm volatile( - - "vmovdqu %5,%%ymm5 \n" - "vpcmpeqw %%ymm4,%%ymm4,%%ymm4 \n" - "vpsrlw $15,%%ymm4,%%ymm4 \n" - "vpsllw $3,%%ymm4,%%ymm4 \n" // all 8 + "vmovdqu %5,%%ymm5 \n" + "vpcmpeqw %%ymm4,%%ymm4,%%ymm4 \n" + "vpsrlw $15,%%ymm4,%%ymm4 \n" + "vpsllw $3,%%ymm4,%%ymm4 \n" // all 8 LABELALIGN "1: \n" - "vmovdqu (%0),%%xmm0 \n" // 01234567 (16b) - "vmovdqu 2(%0),%%xmm1 \n" // 12345678 (16b) - "vpermq $0b11011000,%%ymm0,%%ymm0 \n" // 0123000045670000 - "vpermq $0b11011000,%%ymm1,%%ymm1 \n" // 1234000056780000 - "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" // 0101232345456767 - "vpunpckldq %%ymm1,%%ymm1,%%ymm1 \n" // 1212343456567878 - "vpunpckhqdq %%ymm1,%%ymm0,%%ymm2 \n" // 2323343467677878 - "vpunpcklqdq %%ymm1,%%ymm0,%%ymm1 \n" // 0101121245455656 - "vpmaddwd %%ymm5,%%ymm1,%%ymm0 \n" // 3*near+far (1, lo) - "vpmaddwd %%ymm5,%%ymm2,%%ymm1 \n" // 3*near+far (1, hi) - "vpackssdw %%ymm1,%%ymm0,%%ymm2 \n" // 3*near+far (1) - - "vmovdqu (%0,%3,2),%%xmm0 \n" // 01234567 (16b) - "vmovdqu 2(%0,%3,2),%%xmm1 \n" // 12345678 (16b) - "vpermq $0b11011000,%%ymm0,%%ymm0 \n" // 0123000045670000 - "vpermq $0b11011000,%%ymm1,%%ymm1 \n" // 1234000056780000 - "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" // 0101232345456767 - "vpunpckldq %%ymm1,%%ymm1,%%ymm1 \n" // 1212343456567878 - "vpunpckhqdq %%ymm1,%%ymm0,%%ymm3 \n" // 2323343467677878 - "vpunpcklqdq %%ymm1,%%ymm0,%%ymm1 \n" // 0101121245455656 - "vpmaddwd %%ymm5,%%ymm1,%%ymm0 \n" // 3*near+far (2, lo) - "vpmaddwd %%ymm5,%%ymm3,%%ymm1 \n" // 3*near+far (2, hi) - "vpackssdw %%ymm1,%%ymm0,%%ymm3 \n" // 3*near+far (2) - - "vpaddw %%ymm2,%%ymm2,%%ymm0 \n" // 6*near+2*far (1) - "vpaddw %%ymm4,%%ymm3,%%ymm1 \n" // 3*near+far+8 (2) - "vpaddw %%ymm0,%%ymm2,%%ymm0 \n" // 9*near+3*far (1) - "vpaddw %%ymm0,%%ymm1,%%ymm0 \n" // 9 3 3 1 + 8 (1) - "vpsrlw $4,%%ymm0,%%ymm0 \n" // ^ div by 16 - "vmovdqu %%ymm0,(%1) \n" // store above - - "vpaddw %%ymm3,%%ymm3,%%ymm0 \n" // 6*near+2*far (2) - "vpaddw %%ymm4,%%ymm2,%%ymm1 \n" // 3*near+far+8 (1) - "vpaddw %%ymm0,%%ymm3,%%ymm0 \n" // 9*near+3*far (2) - "vpaddw %%ymm0,%%ymm1,%%ymm0 \n" // 9 3 3 1 + 8 (2) - "vpsrlw $4,%%ymm0,%%ymm0 \n" // ^ div by 16 - "vmovdqu %%ymm0,(%1,%4,2) \n" // store below - - "lea 0x10(%0),%0 \n" - "lea 0x20(%1),%1 \n" // 8 sample to 16 sample - "sub $0x10,%2 \n" - "jg 1b \n" + "vmovdqu (%0),%%xmm0 \n" // 01234567 (16b) + "vmovdqu 2(%0),%%xmm1 \n" // 12345678 (16b) + "vpermq $0b11011000,%%ymm0,%%ymm0 \n" // 0123000045670000 + "vpermq $0b11011000,%%ymm1,%%ymm1 \n" // 1234000056780000 + "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" // 0101232345456767 + "vpunpckldq %%ymm1,%%ymm1,%%ymm1 \n" // 1212343456567878 + "vpunpckhqdq %%ymm1,%%ymm0,%%ymm2 \n" // 2323343467677878 + "vpunpcklqdq %%ymm1,%%ymm0,%%ymm1 \n" // 0101121245455656 + "vpmaddwd %%ymm5,%%ymm1,%%ymm0 \n" // 3*near+far (1, lo) + "vpmaddwd %%ymm5,%%ymm2,%%ymm1 \n" // 3*near+far (1, hi) + "vpackssdw %%ymm1,%%ymm0,%%ymm2 \n" // 3*near+far (1) + + "vmovdqu (%0,%3,2),%%xmm0 \n" // 01234567 (16b) + "vmovdqu 2(%0,%3,2),%%xmm1 \n" // 12345678 (16b) + "vpermq $0b11011000,%%ymm0,%%ymm0 \n" // 0123000045670000 + "vpermq $0b11011000,%%ymm1,%%ymm1 \n" // 1234000056780000 + "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" // 0101232345456767 + "vpunpckldq %%ymm1,%%ymm1,%%ymm1 \n" // 1212343456567878 + "vpunpckhqdq %%ymm1,%%ymm0,%%ymm3 \n" // 2323343467677878 + "vpunpcklqdq %%ymm1,%%ymm0,%%ymm1 \n" // 0101121245455656 + "vpmaddwd %%ymm5,%%ymm1,%%ymm0 \n" // 3*near+far (2, lo) + "vpmaddwd %%ymm5,%%ymm3,%%ymm1 \n" // 3*near+far (2, hi) + "vpackssdw %%ymm1,%%ymm0,%%ymm3 \n" // 3*near+far (2) + + "vpaddw %%ymm2,%%ymm2,%%ymm0 \n" // 6*near+2*far (1) + "vpaddw %%ymm4,%%ymm3,%%ymm1 \n" // 3*near+far+8 (2) + "vpaddw %%ymm0,%%ymm2,%%ymm0 \n" // 9*near+3*far (1) + "vpaddw %%ymm0,%%ymm1,%%ymm0 \n" // 9 3 3 1 + 8 (1) + "vpsrlw $4,%%ymm0,%%ymm0 \n" // ^ div by 16 + "vmovdqu %%ymm0,(%1) \n" // store above + + "vpaddw %%ymm3,%%ymm3,%%ymm0 \n" // 6*near+2*far (2) + "vpaddw %%ymm4,%%ymm2,%%ymm1 \n" // 3*near+far+8 (1) + "vpaddw %%ymm0,%%ymm3,%%ymm0 \n" // 9*near+3*far (2) + "vpaddw %%ymm0,%%ymm1,%%ymm0 \n" // 9 3 3 1 + 8 (2) + "vpsrlw $4,%%ymm0,%%ymm0 \n" // ^ div by 16 + "vmovdqu %%ymm0,(%1,%4,2) \n" // store below + + "lea 0x10(%0),%0 \n" + "lea 0x20(%1),%1 \n" // 8 sample to 16 sample + "sub $0x10,%2 \n" + "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 @@ -1540,70 +1523,69 @@ void ScaleRowUp2_Bilinear_16_AVX2_Full(const uint16_t* src_ptr, ptrdiff_t dst_stride, int dst_width) { asm volatile( - - "vmovdqu %5,%%ymm7 \n" - "vpcmpeqd %%ymm6,%%ymm6,%%ymm6 \n" - "vpsrld $31,%%ymm6,%%ymm6 \n" - "vpslld $3,%%ymm6,%%ymm6 \n" // all 8 + "vmovdqu %5,%%ymm7 \n" + "vpcmpeqd %%ymm6,%%ymm6,%%ymm6 \n" + "vpsrld $31,%%ymm6,%%ymm6 \n" + "vpslld $3,%%ymm6,%%ymm6 \n" // all 8 LABELALIGN "1: \n" - "vmovdqu (%0),%%xmm0 \n" // 01234567 (16b) - "vmovdqu 2(%0),%%xmm1 \n" // 12345678 (16b) - "vpermq $0b11011000,%%ymm0,%%ymm0 \n" // 0123000045670000 - "vpermq $0b11011000,%%ymm1,%%ymm1 \n" // 1234000056780000 - "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" // 0101232345456767 - "vpunpckldq %%ymm1,%%ymm1,%%ymm1 \n" // 1212343456567878 - "vpunpckhqdq %%ymm1,%%ymm0,%%ymm2 \n" // 2323343467677878 - "vpunpcklqdq %%ymm1,%%ymm0,%%ymm1 \n" // 0101121245455656 - "vpmaddwd %%ymm7,%%ymm1,%%ymm0 \n" // 3*near+far (1, lo) - "vpmaddwd %%ymm7,%%ymm2,%%ymm1 \n" // 3*near+far (1, hi) - - "vmovdqu (%0,%3,2),%%xmm2 \n" // 01234567 (16b) - "vmovdqu 2(%0,%3,2),%%xmm3 \n" // 12345678 (16b) - "vpermq $0b11011000,%%ymm2,%%ymm2 \n" // 0123000045670000 - "vpermq $0b11011000,%%ymm3,%%ymm3 \n" // 1234000056780000 - "vpunpckldq %%ymm2,%%ymm2,%%ymm2 \n" // 0101232345456767 - "vpunpckldq %%ymm3,%%ymm3,%%ymm3 \n" // 1212343456567878 - "vpunpckhqdq %%ymm3,%%ymm2,%%ymm4 \n" // 2323343467677878 - "vpunpcklqdq %%ymm3,%%ymm2,%%ymm3 \n" // 0101121245455656 - "vpmaddwd %%ymm7,%%ymm3,%%ymm2 \n" // 3*near+far (2, lo) - "vpmaddwd %%ymm7,%%ymm4,%%ymm3 \n" // 3*near+far (2, hi) - - "vpaddd %%ymm0,%%ymm0,%%ymm4 \n" // 6*near+2*far (1, lo) - "vpaddd %%ymm6,%%ymm2,%%ymm5 \n" // 3*near+far+8 (2, lo) - "vpaddd %%ymm4,%%ymm0,%%ymm4 \n" // 9*near+3*far (1, lo) - "vpaddd %%ymm4,%%ymm5,%%ymm4 \n" // 9 3 3 1 + 8 (1, lo) - "vpsrad $4,%%ymm4,%%ymm4 \n" // ^ div by 16 (1, lo) - - "vpaddd %%ymm2,%%ymm2,%%ymm5 \n" // 6*near+2*far (2, lo) - "vpaddd %%ymm6,%%ymm0,%%ymm0 \n" // 3*near+far+8 (1, lo) - "vpaddd %%ymm5,%%ymm2,%%ymm5 \n" // 9*near+3*far (2, lo) - "vpaddd %%ymm5,%%ymm0,%%ymm5 \n" // 9 3 3 1 + 8 (2, lo) - "vpsrad $4,%%ymm5,%%ymm5 \n" // ^ div by 16 (2, lo) - - "vpaddd %%ymm1,%%ymm1,%%ymm0 \n" // 6*near+2*far (1, hi) - "vpaddd %%ymm6,%%ymm3,%%ymm2 \n" // 3*near+far+8 (2, hi) - "vpaddd %%ymm0,%%ymm1,%%ymm0 \n" // 9*near+3*far (1, hi) - "vpaddd %%ymm0,%%ymm2,%%ymm0 \n" // 9 3 3 1 + 8 (1, hi) - "vpsrad $4,%%ymm0,%%ymm0 \n" // ^ div by 16 (1, hi) - - "vpaddd %%ymm3,%%ymm3,%%ymm2 \n" // 6*near+2*far (2, hi) - "vpaddd %%ymm6,%%ymm1,%%ymm1 \n" // 3*near+far+8 (1, hi) - "vpaddd %%ymm2,%%ymm3,%%ymm2 \n" // 9*near+3*far (2, hi) - "vpaddd %%ymm2,%%ymm1,%%ymm2 \n" // 9 3 3 1 + 8 (2, hi) - "vpsrad $4,%%ymm2,%%ymm2 \n" // ^ div by 16 (2, hi) - - "vpackssdw %%ymm0,%%ymm4,%%ymm4 \n" - "vmovdqu %%ymm4,(%1) \n" // store above - "vpackssdw %%ymm2,%%ymm5,%%ymm5 \n" - "vmovdqu %%ymm5,(%1,%4,2) \n" // store below - - "lea 0x10(%0),%0 \n" - "lea 0x20(%1),%1 \n" // 8 sample to 16 sample - "sub $0x10,%2 \n" - "jg 1b \n" + "vmovdqu (%0),%%xmm0 \n" // 01234567 (16b) + "vmovdqu 2(%0),%%xmm1 \n" // 12345678 (16b) + "vpermq $0b11011000,%%ymm0,%%ymm0 \n" // 0123000045670000 + "vpermq $0b11011000,%%ymm1,%%ymm1 \n" // 1234000056780000 + "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" // 0101232345456767 + "vpunpckldq %%ymm1,%%ymm1,%%ymm1 \n" // 1212343456567878 + "vpunpckhqdq %%ymm1,%%ymm0,%%ymm2 \n" // 2323343467677878 + "vpunpcklqdq %%ymm1,%%ymm0,%%ymm1 \n" // 0101121245455656 + "vpmaddwd %%ymm7,%%ymm1,%%ymm0 \n" // 3*near+far (1, lo) + "vpmaddwd %%ymm7,%%ymm2,%%ymm1 \n" // 3*near+far (1, hi) + + "vmovdqu (%0,%3,2),%%xmm2 \n" // 01234567 (16b) + "vmovdqu 2(%0,%3,2),%%xmm3 \n" // 12345678 (16b) + "vpermq $0b11011000,%%ymm2,%%ymm2 \n" // 0123000045670000 + "vpermq $0b11011000,%%ymm3,%%ymm3 \n" // 1234000056780000 + "vpunpckldq %%ymm2,%%ymm2,%%ymm2 \n" // 0101232345456767 + "vpunpckldq %%ymm3,%%ymm3,%%ymm3 \n" // 1212343456567878 + "vpunpckhqdq %%ymm3,%%ymm2,%%ymm4 \n" // 2323343467677878 + "vpunpcklqdq %%ymm3,%%ymm2,%%ymm3 \n" // 0101121245455656 + "vpmaddwd %%ymm7,%%ymm3,%%ymm2 \n" // 3*near+far (2, lo) + "vpmaddwd %%ymm7,%%ymm4,%%ymm3 \n" // 3*near+far (2, hi) + + "vpaddd %%ymm0,%%ymm0,%%ymm4 \n" // 6*near+2*far (1, lo) + "vpaddd %%ymm6,%%ymm2,%%ymm5 \n" // 3*near+far+8 (2, lo) + "vpaddd %%ymm4,%%ymm0,%%ymm4 \n" // 9*near+3*far (1, lo) + "vpaddd %%ymm4,%%ymm5,%%ymm4 \n" // 9 3 3 1 + 8 (1, lo) + "vpsrad $4,%%ymm4,%%ymm4 \n" // ^ div by 16 (1, lo) + + "vpaddd %%ymm2,%%ymm2,%%ymm5 \n" // 6*near+2*far (2, lo) + "vpaddd %%ymm6,%%ymm0,%%ymm0 \n" // 3*near+far+8 (1, lo) + "vpaddd %%ymm5,%%ymm2,%%ymm5 \n" // 9*near+3*far (2, lo) + "vpaddd %%ymm5,%%ymm0,%%ymm5 \n" // 9 3 3 1 + 8 (2, lo) + "vpsrad $4,%%ymm5,%%ymm5 \n" // ^ div by 16 (2, lo) + + "vpaddd %%ymm1,%%ymm1,%%ymm0 \n" // 6*near+2*far (1, hi) + "vpaddd %%ymm6,%%ymm3,%%ymm2 \n" // 3*near+far+8 (2, hi) + "vpaddd %%ymm0,%%ymm1,%%ymm0 \n" // 9*near+3*far (1, hi) + "vpaddd %%ymm0,%%ymm2,%%ymm0 \n" // 9 3 3 1 + 8 (1, hi) + "vpsrad $4,%%ymm0,%%ymm0 \n" // ^ div by 16 (1, hi) + + "vpaddd %%ymm3,%%ymm3,%%ymm2 \n" // 6*near+2*far (2, hi) + "vpaddd %%ymm6,%%ymm1,%%ymm1 \n" // 3*near+far+8 (1, hi) + "vpaddd %%ymm2,%%ymm3,%%ymm2 \n" // 9*near+3*far (2, hi) + "vpaddd %%ymm2,%%ymm1,%%ymm2 \n" // 9 3 3 1 + 8 (2, hi) + "vpsrad $4,%%ymm2,%%ymm2 \n" // ^ div by 16 (2, hi) + + "vpackssdw %%ymm0,%%ymm4,%%ymm4 \n" + "vmovdqu %%ymm4,(%1) \n" // store above + "vpackssdw %%ymm2,%%ymm5,%%ymm5 \n" + "vmovdqu %%ymm5,(%1,%4,2) \n" // store below + + "lea 0x10(%0),%0 \n" + "lea 0x20(%1),%1 \n" // 8 sample to 16 sample + "sub $0x10,%2 \n" + "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 @@ -1620,7 +1602,6 @@ void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { asm volatile( - "pxor %%xmm5,%%xmm5 \n" // 16 pixel loop. @@ -1653,7 +1634,6 @@ void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { asm volatile( - "vpxor %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN @@ -1776,8 +1756,8 @@ void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, "x"(kFsub80), // %8 "x"(kFadd40) // %9 #else - "m"(kFsub80), // %8 - "m"(kFadd40) // %9 + "m"(kFsub80), // %8 + "m"(kFadd40) // %9 #endif : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); @@ -1793,7 +1773,6 @@ void ScaleColsUp2_SSE2(uint8_t* dst_ptr, (void)x; (void)dx; asm volatile( - LABELALIGN "1: \n" "movdqu (%1),%%xmm0 \n" @@ -1820,7 +1799,6 @@ void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, int dst_width) { (void)src_stride; asm volatile( - LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" @@ -1844,7 +1822,6 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, int dst_width) { (void)src_stride; asm volatile( - LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" @@ -1870,7 +1847,6 @@ void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int dst_width) { asm volatile( - LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" @@ -2057,7 +2033,6 @@ void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb, (void)x; (void)dx; asm volatile( - LABELALIGN "1: \n" "movdqu (%1),%%xmm0 \n" diff --git a/source/scale_neon.cc b/source/scale_neon.cc index 51061655..e260dc95 100644 --- a/source/scale_neon.cc +++ b/source/scale_neon.cc @@ -509,7 +509,6 @@ void ScaleRowUp2_Linear_NEON(const uint8_t* src_ptr, int dst_width) { const uint8_t* src_temp = src_ptr + 1; asm volatile( - "vmov.u16 q15, #3 \n" "1: \n" @@ -527,7 +526,7 @@ void ScaleRowUp2_Linear_NEON(const uint8_t* src_ptr, "vst2.8 {d0, d1}, [%1]! \n" // store "subs %2, %2, #16 \n" // 8 sample -> 16 sample - "bgt 1b \n" + "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width), // %2 @@ -548,7 +547,6 @@ void ScaleRowUp2_Bilinear_NEON(const uint8_t* src_ptr, const uint8_t* src_temp1 = src_ptr1 + 1; asm volatile( - "vmov.u16 q15, #3 \n" "1: \n" @@ -612,7 +610,6 @@ void ScaleRowUp2_Linear_16_NEON(const uint16_t* src_ptr, int dst_width) { const uint16_t* src_temp = src_ptr + 1; asm volatile( - "vmov.u16 q15, #3 \n" "1: \n" @@ -649,7 +646,6 @@ void ScaleRowUp2_Bilinear_16_NEON(const uint16_t* src_ptr, const uint16_t* src_temp1 = src_ptr1 + 1; asm volatile( - "vmov.u16 q15, #3 \n" "1: \n" diff --git a/source/scale_neon64.cc b/source/scale_neon64.cc index 514dde4c..4b4f2fb1 100644 --- a/source/scale_neon64.cc +++ b/source/scale_neon64.cc @@ -540,7 +540,6 @@ void ScaleRowUp2_Linear_NEON(const uint8_t* src_ptr, int dst_width) { const uint8_t* src_temp = src_ptr + 1; asm volatile( - "movi v31.8b, #3 \n" "1: \n" @@ -580,7 +579,6 @@ void ScaleRowUp2_Bilinear_NEON(const uint8_t* src_ptr, const uint8_t* src_temp1 = src_ptr1 + 1; asm volatile( - "movi v31.8b, #3 \n" "movi v30.8h, #3 \n" @@ -637,7 +635,6 @@ void ScaleRowUp2_Linear_16_NEON(const uint16_t* src_ptr, int dst_width) { const uint16_t* src_temp = src_ptr + 1; asm volatile( - "movi v31.8h, #3 \n" "1: \n" @@ -675,7 +672,6 @@ void ScaleRowUp2_Bilinear_16_NEON(const uint16_t* src_ptr, const uint16_t* src_temp1 = src_ptr1 + 1; asm volatile( - "movi v31.8h, #3 \n" "1: \n" @@ -1317,13 +1313,13 @@ void ScaleUVRowDownEven_NEON(const uint8_t* src_ptr, (void)src_stride; asm volatile( "1: \n" - "ld1 {v0.h}[0], [%0], %6 \n" - "ld1 {v1.h}[0], [%1], %6 \n" - "ld1 {v2.h}[0], [%2], %6 \n" - "ld1 {v3.h}[0], [%3], %6 \n" - "subs %w5, %w5, #4 \n" // 4 pixels per loop. - "st4 {v0.h, v1.h, v2.h, v3.h}[0], [%4], #8 \n" - "b.gt 1b \n" + "ld1 {v0.h}[0], [%0], %6 \n" + "ld1 {v1.h}[0], [%1], %6 \n" + "ld1 {v2.h}[0], [%2], %6 \n" + "ld1 {v3.h}[0], [%3], %6 \n" + "subs %w5, %w5, #4 \n" // 4 pixels per loop. + "st4 {v0.h, v1.h, v2.h, v3.h}[0], [%4], #8 \n" + "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(src1_ptr), // %1 "+r"(src2_ptr), // %2 diff --git a/unit_test/color_test.cc b/unit_test/color_test.cc index 95247214..60bdfdd6 100644 --- a/unit_test/color_test.cc +++ b/unit_test/color_test.cc @@ -257,6 +257,32 @@ static void YUVUToRGB(int y, int u, int v, int* r, int* g, int* b) { *r = orig_pixels[2]; } +#define V422ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) + +static void YUVVToRGB(int y, int u, int v, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); + + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + memset(orig_u, u, kHalfPixels); + memset(orig_v, v, kHalfPixels); + + /* YUV converted to ARGB. */ + V422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2, + orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + static void YToRGB(int y, int* r, int* g, int* b) { const int kWidth = 16; const int kHeight = 1; @@ -405,21 +431,21 @@ TEST_F(LibYUVColorTest, TestRoundToByte) { EXPECT_LE(allb, 255); } -// BT.601 YUV to RGB reference +// BT.601 limited range YUV to RGB reference static void YUVToRGBReference(int y, int u, int v, int* r, int* g, int* b) { *r = RoundToByte((y - 16) * 1.164 - (v - 128) * -1.596); *g = RoundToByte((y - 16) * 1.164 - (u - 128) * 0.391 - (v - 128) * 0.813); *b = RoundToByte((y - 16) * 1.164 - (u - 128) * -2.018); } -// JPEG YUV to RGB reference +// BT.601 full range YUV to RGB reference (aka JPEG) static void YUVJToRGBReference(int y, int u, int v, int* r, int* g, int* b) { *r = RoundToByte(y - (v - 128) * -1.40200); *g = RoundToByte(y - (u - 128) * 0.34414 - (v - 128) * 0.71414); *b = RoundToByte(y - (u - 128) * -1.77200); } -// BT.709 YUV to RGB reference +// BT.709 limited range YUV to RGB reference // See also http://www.equasys.de/colorconversion.html static void YUVHToRGBReference(int y, int u, int v, int* r, int* g, int* b) { *r = RoundToByte((y - 16) * 1.164 - (v - 128) * -1.793); @@ -434,7 +460,7 @@ static void YUVFToRGBReference(int y, int u, int v, int* r, int* g, int* b) { *b = RoundToByte(y - (u - 128) * -1.8556); } -// BT.2020 YUV to RGB reference +// BT.2020 limited range YUV to RGB reference static void YUVUToRGBReference(int y, int u, int v, int* r, int* g, int* b) { *r = RoundToByte((y - 16) * 1.164384 - (v - 128) * -1.67867); *g = RoundToByte((y - 16) * 1.164384 - (u - 128) * 0.187326 - @@ -442,6 +468,13 @@ static void YUVUToRGBReference(int y, int u, int v, int* r, int* g, int* b) { *b = RoundToByte((y - 16) * 1.164384 - (u - 128) * -2.14177); } +// BT.2020 full range YUV to RGB reference +static void YUVVToRGBReference(int y, int u, int v, int* r, int* g, int* b) { + *r = RoundToByte(y + (v - 128) * 1.474600); + *g = RoundToByte(y - (u - 128) * 0.164553 - (v - 128) * 0.571353); + *b = RoundToByte(y + (u - 128) * 1.881400); +} + TEST_F(LibYUVColorTest, TestYUV) { int r0, g0, b0, r1, g1, b1; @@ -573,16 +606,12 @@ static void PrintHistogram(int rh[256], int gh[256], int bh[256]) { #else #define FASTSTEP 5 #endif + +// BT.601 limited range. TEST_F(LibYUVColorTest, TestFullYUV) { - int rh[256] = { - 0, - }; - int gh[256] = { - 0, - }; - int bh[256] = { - 0, - }; + int rh[256] = { 0, }; + int gh[256] = { 0, }; + int bh[256] = { 0, }; for (int u = 0; u < 256; ++u) { for (int v = 0; v < 256; ++v) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { @@ -602,16 +631,11 @@ TEST_F(LibYUVColorTest, TestFullYUV) { PrintHistogram(rh, gh, bh); } +// BT.601 full range. TEST_F(LibYUVColorTest, TestFullYUVJ) { - int rh[256] = { - 0, - }; - int gh[256] = { - 0, - }; - int bh[256] = { - 0, - }; + int rh[256] = { 0, }; + int gh[256] = { 0, }; + int bh[256] = { 0, }; for (int u = 0; u < 256; ++u) { for (int v = 0; v < 256; ++v) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { @@ -631,16 +655,11 @@ TEST_F(LibYUVColorTest, TestFullYUVJ) { PrintHistogram(rh, gh, bh); } +// BT.709 limited range. TEST_F(LibYUVColorTest, TestFullYUVH) { - int rh[256] = { - 0, - }; - int gh[256] = { - 0, - }; - int bh[256] = { - 0, - }; + int rh[256] = { 0, }; + int gh[256] = { 0, }; + int bh[256] = { 0, }; for (int u = 0; u < 256; ++u) { for (int v = 0; v < 256; ++v) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { @@ -661,16 +680,11 @@ TEST_F(LibYUVColorTest, TestFullYUVH) { PrintHistogram(rh, gh, bh); } +// BT.709 full range. TEST_F(LibYUVColorTest, TestFullYUVF) { - int rh[256] = { - 0, - }; - int gh[256] = { - 0, - }; - int bh[256] = { - 0, - }; + int rh[256] = { 0, }; + int gh[256] = { 0, }; + int bh[256] = { 0, }; for (int u = 0; u < 256; ++u) { for (int v = 0; v < 256; ++v) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { @@ -690,16 +704,11 @@ TEST_F(LibYUVColorTest, TestFullYUVF) { PrintHistogram(rh, gh, bh); } +// BT.2020 limited range. TEST_F(LibYUVColorTest, TestFullYUVU) { - int rh[256] = { - 0, - }; - int gh[256] = { - 0, - }; - int bh[256] = { - 0, - }; + int rh[256] = { 0, }; + int gh[256] = { 0, }; + int bh[256] = { 0, }; for (int u = 0; u < 256; ++u) { for (int v = 0; v < 256; ++v) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { @@ -719,6 +728,30 @@ TEST_F(LibYUVColorTest, TestFullYUVU) { } PrintHistogram(rh, gh, bh); } + +// BT.2020 full range. +TEST_F(LibYUVColorTest, TestFullYUVV) { + int rh[256] = { 0, }; + int gh[256] = { 0, }; + int bh[256] = { 0, }; + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { + for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { + int r0, g0, b0, r1, g1, b1; + int y = RANDOM256(y2); + YUVVToRGBReference(y, u, v, &r0, &g0, &b0); + YUVVToRGB(y, u, v, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, 2); + EXPECT_NEAR(b0, b1, ERROR_B); + ++rh[r1 - r0 + 128]; + ++gh[g1 - g0 + 128]; + ++bh[b1 - b0 + 128]; + } + } + } + PrintHistogram(rh, gh, bh); +} #undef FASTSTEP TEST_F(LibYUVColorTest, TestGreyYUVJ) { diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index c180811a..20703200 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -558,7 +558,7 @@ TESTBIPLANARTOBP(NV12, 2, 2, NV12Mirror, 2, 2) TESTBIPLANARTOP(NV12, 2, 2, I420, 2, 2) TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2) -// Provide matrix wrappers +// Provide matrix wrappers for full range bt.709 #define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \ I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) #define F420ToARGB(a, b, c, d, e, f, g, h, i, j) \ @@ -572,6 +572,20 @@ TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2) #define F444ToARGB(a, b, c, d, e, f, g, h, i, j) \ I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) +// Provide matrix wrappers for full range bt.2020 +#define V420ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) +#define V420ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) +#define V422ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) +#define V422ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) +#define V444ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuV2020Constants, i, j) +#define V444ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) + #define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) #define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ @@ -643,6 +657,8 @@ TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1) TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(U420, 2, 2, ARGB, 4, 4, 1) TESTPLANARTOB(U420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(V420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(V420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1) @@ -667,6 +683,8 @@ TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1) TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1) TESTPLANARTOB(U422, 2, 1, ARGB, 4, 4, 1) TESTPLANARTOB(U422, 2, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) @@ -677,6 +695,8 @@ TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1) TESTPLANARTOB(H444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(U444, 1, 1, ARGB, 4, 4, 1) TESTPLANARTOB(U444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(V444, 1, 1, ARGB, 4, 4, 1) +TESTPLANARTOB(V444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) @@ -772,6 +792,12 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) #define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ l, m) +#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) #define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ l, m) @@ -796,6 +822,12 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) #define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ l, m) +#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) #define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ l, m) @@ -820,6 +852,12 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) #define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ l, m) +#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1) @@ -829,6 +867,8 @@ TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1) TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1) TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1) TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1) TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1) TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1) @@ -837,6 +877,8 @@ TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1) TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1) TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1) TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1) TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1) TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1) @@ -845,6 +887,8 @@ TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1) TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1) TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1) TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1) #define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \ BPP_B, W1280, N, NEG, OFF) \ @@ -2771,6 +2815,8 @@ TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4) TESTPLANARTOE(H422, 2, 1, ABGR, 1, 4, ARGB, 4) TESTPLANARTOE(U422, 2, 1, ARGB, 1, 4, ARGB, 4) TESTPLANARTOE(U422, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(V422, 2, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(V422, 2, 1, ABGR, 1, 4, ARGB, 4) TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4) TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4) TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ABGR, 4) @@ -2781,6 +2827,8 @@ TESTPLANARTOE(H444, 1, 1, ARGB, 1, 4, ARGB, 4) TESTPLANARTOE(H444, 1, 1, ABGR, 1, 4, ARGB, 4) TESTPLANARTOE(U444, 1, 1, ARGB, 1, 4, ARGB, 4) TESTPLANARTOE(U444, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTPLANARTOE(V444, 1, 1, ARGB, 1, 4, ARGB, 4) +TESTPLANARTOE(V444, 1, 1, ABGR, 1, 4, ARGB, 4) TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4) TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4) TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4) @@ -2862,6 +2910,8 @@ TESTQPLANARTOE(F420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(F420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(U420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(U420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(V420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(V420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(I422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(J422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) @@ -2872,6 +2922,8 @@ TESTQPLANARTOE(H422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(H422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(U422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(U422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(V422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(V422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(I444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(J444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) @@ -2880,6 +2932,8 @@ TESTQPLANARTOE(H444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(H444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(U444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(U444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(V444Alpha, 1, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(V444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) #define TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, W1280, N, NEG, \ OFF, FMT_C, BPP_C) \ diff --git a/util/color.cc b/util/color.cc new file mode 100644 index 00000000..2333276b --- /dev/null +++ b/util/color.cc @@ -0,0 +1,118 @@ +/* + * Copyright 2021 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +// This utility computes values needed to generate yuvconstants based on +// white point values. +// The yuv formulas are tuned for 8 bit YUV channels. + +// For those MCs that can be represented as kr and kb: +// Full range +// float M[3][3] {{1,0,2*(1-kr)},{1,-((2*kb)/((2-kb)*(1-kb-kr))),-((2*kr)/((2-kr)*(1-kb-kr)))},{1,2*(1-kb),0}}; +// float B[3] {1+(256*(1-kr))/255,1-(256*kb)/(255*(2-kb)*(1-kb-kr))-(256*kr)/(255*(2-kr)*(1-kb-kr)),1+(256*(1-kb))/255}; +// Limited range +// float M[3][3] {{85/73,0,255/112-(255*kr)/112},{85/73,-((255*kb)/(112*(2-kb)*(1-kb-kr))),-((255*kr)/(112*(2-kr)*(1-kb-kr)))},{85/73,255/112-(255*kb)/112,0}}; +// float B[3] {77662/43435-(1537*kr)/1785,203/219-(1537*kb)/(1785*(2-kb)*(1-kb-kr))-(1537*kr)/(1785*(2-kr)*(1-kb-kr)),77662/43435-(1537*kb)/1785}; + +// mc bt +// 1 bt.709 KR = 0.2126; KB = 0.0722 +// 4 fcc KR = 0.30; KB = 0.11 +// 6 bt.601 KR = 0.299; KB = 0.114 +// 7 SMPTE 240M KR = 0.212; KB = 0.087 +// 10 bt2020 KR = 0.2627; KB = 0.0593 + +// BT.709 full range YUV to RGB reference +// R = Y + V * 1.5748 +// G = Y - U * 0.18732 - V * 0.46812 +// B = Y + U * 1.8556 +// KR = 0.2126 +// KB = 0.0722 + +// https://mymusing.co/bt601-yuv-to-rgb-conversion-color/ + +// // Y contribution to R,G,B. Scale and bias. +// #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ +// #define YB 32 /* 64 / 2 */ +// +// // U and V contributions to R,G,B. +// #define UB 113 /* round(1.77200 * 64) */ +// #define UG 22 /* round(0.34414 * 64) */ +// #define VG 46 /* round(0.71414 * 64) */ +// #define VR 90 /* round(1.40200 * 64) */ +// +// // Bias values to round, and subtract 128 from U and V. +// #define BB (-UB * 128 + YB) +// #define BG (UG * 128 + VG * 128 + YB) +// #define BR (-VR * 128 + YB) + +int round(float v) { + return (int) (v + 0.5); +} + +int main(int argc, const char* argv[]) { + + if (argc < 2) { + printf("color kr kb\n"); + return -1; + } + float kr = atof(argv[1]); + float kb = atof(argv[2]); + float kg = 1 - kr - kb; + + float vr = 2 * (1 - kr); + float ug = 2 * ((1 - kb) * kb / kg); + float vg = 2 * ((1 - kr) * kr / kg); + float ub = 2 * (1 - kb); + + printf("Full range\n"); + printf("R = Y + V * %5f\n", vr); + printf("G = Y - U * %6f - V * %6f\n", ug, vg); + printf("B = Y + U * %5f\n", ub); + + printf("KR = %4f; ", kr); + printf("KB = %4f\n", kb); +// printf("KG = %4f\n", kg); +// #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ +// #define YB 32 /* 64 / 2 */ +// +// // U and V contributions to R,G,B. + + printf("UB %-3d /* round(%f * 64) */\n", round(ub * 64), ub); + printf("UG %-3d /* round(%f * 64) */\n", round(ug * 64), ug); + printf("VG %-3d /* round(%f * 64) */\n", round(vg * 64), vg); + printf("VR %-3d /* round(%f * 64) */\n", round(vr * 64), vr); + + vr = 255.f / 224.f * 2 * (1 - kr); + ug = 255.f / 224.f * 2 * ((1 - kb) * kb / kg); + vg = 255.f / 224.f * 2 * ((1 - kr) * kr / kg); + ub = 255.f / 224.f * 2 * (1 - kb); + + printf("Limited range\n"); + printf("R = (Y - 16) * 1.164 + V * %5f\n", vr); + printf("G = (Y - 16) * 1.164 - U * %6f - V * %6f\n", ug, vg); + printf("B = (Y - 16) * 1.164 + U * %5f\n", ub); + +// printf("KG = %4f\n", kg); +// #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ +// #define YB 32 /* 64 / 2 */ +// +// // U and V contributions to R,G,B. + + printf("UB %-3d /* round(%f * 64) */\n", round(ub * 64), ub); + printf("UG %-3d /* round(%f * 64) */\n", round(ug * 64), ug); + printf("VG %-3d /* round(%f * 64) */\n", round(vg * 64), vg); + printf("VR %-3d /* round(%f * 64) */\n", round(vr * 64), vr); + + return 0; +} + |