diff options
author | Yuan Tong <tongyuan200097@gmail.com> | 2021-02-03 14:21:07 +0800 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2021-02-03 10:53:02 +0000 |
commit | fc61dde1eb4b7807201fa20cd0a7d023363558b2 (patch) | |
tree | 9cb82fea30f1da03c77e51d4f8f8c6ed6d9d6024 /source/scale.cc | |
parent | c28d4049364d75710b1c49697a5814ab572af641 (diff) | |
download | libyuv-fc61dde1eb4b7807201fa20cd0a7d023363558b2.tar.gz |
Add special optimization for I420ToI444 and I422ToI444
These functions use (bi)linear filter, to scale U and V planes to the size of Y plane.
This will help enhance the quality of YUV to RGB conversion.
Also added 10bit and 12bit version:
I010ToI410
I210ToI410
I012ToI412
I212ToI412
libyuv_unittest --gtest_filter=LibYUVConvertTest.I42*ToI444*:LibYUVConvertTest.I*1*ToI41*
R=fbarchard@chromium.org
Change-Id: Ie4a711a5ba28f2ff1f44c021f7a5c149022264c5
Bug: libyuv:872
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2658097
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/scale.cc')
-rw-r--r-- | source/scale.cc | 280 |
1 files changed, 280 insertions, 0 deletions
diff --git a/source/scale.cc b/source/scale.cc index cf3c0332..34c05699 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -1336,6 +1336,238 @@ void ScalePlaneBilinearUp(int src_width, } } +// Scale plane, horizontally 2 times, vertically any time. +// Uses linear filter horizontally, nearest vertically. +// This is an optimized version for scaling up a plane to 2 times of +// its original width, using linear interpolation. +// This is used to scale U and V planes of I422 to I444. +void ScalePlaneUp2_Linear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { + void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = + ScaleRowUp2_Linear_Any_C; + int i; + int y; + int dy; + + // This function can only scale up by 2 times horizontally. + assert(src_width * 2 == dst_width || src_width * 2 == dst_width + 1); + +#ifdef HAS_SCALEROWUP2LINEAR_SSE2 + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; + } +#endif + +#ifdef HAS_SCALEROWUP2LINEAR_SSSE3 + if (TestCpuFlag(kCpuHasSSSE3)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; + } +#endif + +#ifdef HAS_SCALEROWUP2LINEAR_AVX2 + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; + } +#endif + +#ifdef HAS_SCALEROWUP2LINEAR_NEON + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; + } +#endif + + if (dst_height == 1) { + ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, + dst_width); + } else { + dy = FixedDiv(src_height - 1, dst_height - 1); + y = (1 << 15) - 1; + for (i = 0; i < dst_height; ++i) { + ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); + dst_ptr += dst_stride; + y += dy; + } + } +} + +// Scale plane, 2 times. +// This is an optimized version for scaling up a plane to 2 times of +// its original size, using bilinear interpolation. +// This is used to scale U and V planes of I420 to I444. +void ScalePlaneUp2_Bilinear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { + void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = + ScaleRowUp2_Bilinear_Any_C; + int x; + + // This function can only scale up by 2 times. + assert(src_width * 2 == dst_width || src_width * 2 == dst_width + 1); + assert(src_height * 2 == dst_height || src_height * 2 == dst_height + 1); + +#ifdef HAS_SCALEROWUP2LINEAR_SSE2 + if (TestCpuFlag(kCpuHasSSE2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + } +#endif + +#ifdef HAS_SCALEROWUP2LINEAR_SSSE3 + if (TestCpuFlag(kCpuHasSSSE3)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + } +#endif + +#ifdef HAS_SCALEROWUP2LINEAR_AVX2 + if (TestCpuFlag(kCpuHasAVX2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + } +#endif + +#ifdef HAS_SCALEROWUP2LINEAR_NEON + if (TestCpuFlag(kCpuHasNEON)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + } +#endif + + if (src_height == 1) { + Scale2RowUp(src_ptr, 0, dst_ptr, dst_stride, dst_width); + } else { + Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); + dst_ptr += dst_stride; + for (x = 0; x < src_height - 1; ++x) { + Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width); + src_ptr += src_stride; + // TODO test performance of writing one row of destination at a time + dst_ptr += 2 * dst_stride; + } + if (!(dst_height & 1)) { + Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); + } + } +} + +// Scale at most 14bit plane, horizontally 2 times. +// This is an optimized version for scaling up a plane to 2 times of +// its original width, using linear interpolation. +// stride is in count of uint16_t. +// This is used to scale U and V planes of I210 to I410 and I212 to I412. +void ScalePlaneUp2_16_Linear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { + void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + int i; + int y; + int dy; + + // This function can only scale up by 2 times horizontally. + assert(src_width * 2 == dst_width || src_width * 2 == dst_width + 1); + +#ifdef HAS_SCALEROWUP2LINEAR_SSE2 + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2; + } +#endif + +#ifdef HAS_SCALEROWUP2LINEAR_AVX2 + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2; + } +#endif + +#ifdef HAS_SCALEROWUP2LINEAR_NEON + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON; + } +#endif + + if (dst_height == 1) { + ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, + dst_width); + } else { + dy = FixedDiv(src_height - 1, dst_height - 1); + y = (1 << 15) - 1; + for (i = 0; i < dst_height; ++i) { + ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); + dst_ptr += dst_stride; + y += dy; + } + } +} + +// Scale at most 12bit plane, up 2 times. +// This is an optimized version for scaling up a plane to 2 times of +// its original size, using bilinear interpolation. +// stride is in count of uint16_t. +// This is used to scale U and V planes of I010 to I410 and I012 to I412. +void ScalePlaneUp2_16_Bilinear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { + void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = + ScaleRowUp2_Bilinear_16_Any_C; + int x; + + // This function can only scale up by 2 times. + assert(src_width * 2 == dst_width || src_width * 2 == dst_width + 1); + assert(src_height * 2 == dst_height || src_height * 2 == dst_height + 1); + +#ifdef HAS_SCALEROWUP2LINEAR_SSE2 + if (TestCpuFlag(kCpuHasSSE2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSE2; + } +#endif + +#ifdef HAS_SCALEROWUP2LINEAR_AVX2 + if (TestCpuFlag(kCpuHasAVX2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2; + } +#endif + +#ifdef HAS_SCALEROWUP2LINEAR_NEON + if (TestCpuFlag(kCpuHasNEON)) { + Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON; + } +#endif + + if (src_height == 1) { + Scale2RowUp(src_ptr, 0, dst_ptr, dst_stride, dst_width); + } else { + Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); + dst_ptr += dst_stride; + for (x = 0; x < src_height - 1; ++x) { + Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width); + src_ptr += src_stride; + dst_ptr += 2 * dst_stride; + } + if (!(dst_height & 1)) { + Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); + } + } +} + void ScalePlaneBilinearUp_16(int src_width, int src_height, int dst_width, @@ -1627,6 +1859,17 @@ void ScalePlane(const uint8_t* src, dst_stride, src, dst); return; } + if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { + ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); + return; + } + if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && + (filtering == kFilterBilinear || filtering == kFilterBox)) { + ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); + return; + } if (filtering && dst_height > src_height) { ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); @@ -1724,6 +1967,43 @@ void ScalePlane_16(const uint16_t* src, dst_stride, src, dst); } +LIBYUV_API +void ScalePlane_12(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering) { + // Simplify filtering when possible. + filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, + filtering); + + // Negative height means invert the image. + if (src_height < 0) { + src_height = -src_height; + src = src + (src_height - 1) * src_stride; + src_stride = -src_stride; + } + + if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { + ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); + return; + } + if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && + (filtering == kFilterBilinear || filtering == kFilterBox)) { + ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); + return; + } + + ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride, + dst_width, dst_height, filtering); +} + // Scale an I420 image. // This function in turn calls a scaling function for each plane. |