diff options
author | Frank Barchard <fbarchard@google.com> | 2022-09-30 15:12:37 -0700 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-09-30 22:41:21 +0000 |
commit | 00950840d1c9bcbb3eb6ebc5aac5793e71166c8b (patch) | |
tree | a261be18062391f039e4e230ce5714f0059ae845 /source/row_common.cc | |
parent | 9ba40a8f03673b79d3236e79707723fdf99f76b6 (diff) | |
download | libyuv-00950840d1c9bcbb3eb6ebc5aac5793e71166c8b.tar.gz |
YUY2ToNV12 using YUY2ToY and YUY2ToNVUV
- Optimized YUY2ToNV12 that reduces it from 3 steps to 2 steps
- Was SplitUV, memcpy Y, InterpolateUV
- Now YUY2ToY, YUY2ToNVUV
- rollback LIBYUV_UNLIMITED_DATA
3840x2160 1000 iterations:
Pixel 2 Cortex A73
Was YUY2ToNV12_Opt (6515 ms)
Now YUY2ToNV12_Opt (3350 ms)
AB7 Mediatek P35 Cortex A53
Was YUY2ToNV12_Opt (6435 ms)
Now YUY2ToNV12_Opt (3301 ms)
Skylake AVX2 x64
Was YUY2ToNV12_Opt (1872 ms)
Now YUY2ToNV12_Opt (1657 ms)
SSE2 x64
Was YUY2ToNV12_Opt (2008 ms)
Now YUY2ToNV12_Opt (1691 ms)
Windows Skylake AVX2 32 bit x86
Was YUY2ToNV12_Opt (2161 ms)
Now YUY2ToNV12_Opt (1628 ms)
Bug: libyuv:943
Change-Id: I6c2ba2ae765413426baf770b837de114f808f6d0
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3929843
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/row_common.cc')
-rw-r--r-- | source/row_common.cc | 31 |
1 files changed, 25 insertions, 6 deletions
diff --git a/source/row_common.cc b/source/row_common.cc index a177d8bb..8bfa4b8c 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -22,9 +22,13 @@ extern "C" { #endif // This macro controls YUV to RGB using unsigned math to extend range of -// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B. -// Enable this macro for backwards compatiability with limited range 0 to 2. -// LIBYUV_LIMITED_DATA +// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B: +// LIBYUV_UNLIMITED_DATA + +// Macros to enable unlimited data for each colorspace +// LIBYUV_UNLIMITED_BT601 +// LIBYUV_UNLIMITED_BT709 +// LIBYUV_UNLIMITED_BT2020 // The following macro from row_win makes the C code match the row_win code, // which is 7 bit fixed point for ARGBToI420: @@ -1480,7 +1484,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { // KR = 0.299; KB = 0.114 // U and V contributions to R,G,B. -#if !defined(LIBYUV_LIMITED_DATA) +#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT601) #define UB 129 /* round(2.018 * 64) */ #else #define UB 128 /* max(128, round(2.018 * 64)) */ @@ -1534,7 +1538,7 @@ MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR) // KR = 0.2126, KB = 0.0722 // U and V contributions to R,G,B. -#if !defined(LIBYUV_LIMITED_DATA) +#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT709) #define UB 135 /* round(2.112 * 64) */ #else #define UB 128 /* max(128, round(2.112 * 64)) */ @@ -1588,7 +1592,7 @@ MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR) // KR = 0.2627; KB = 0.0593 // U and V contributions to R,G,B. -#if !defined(LIBYUV_LIMITED_DATA) +#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT2020) #define UB 137 /* round(2.142 * 64) */ #else #define UB 128 /* max(128, round(2.142 * 64)) */ @@ -3094,6 +3098,21 @@ void YUY2ToUVRow_C(const uint8_t* src_yuy2, } } +// Filter 2 rows of YUY2 UV's (422) into UV (NV12). +void YUY2ToNVUVRow_C(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_uv, + int width) { + // Output a row of UV values, filtering 2 rows of YUY2. + int x; + for (x = 0; x < width; x += 2) { + dst_uv[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1; + dst_uv[1] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1; + src_yuy2 += 4; + dst_uv += 2; + } +} + // Copy row of YUY2 UV's (422) into U and V (422). void YUY2ToUV422Row_C(const uint8_t* src_yuy2, uint8_t* dst_u, |