diff options
author | Frank Barchard <fbarchard@google.com> | 2022-09-19 18:44:41 -0700 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-09-20 02:00:52 +0000 |
commit | 248172e2ba2d3f09e4d3258e396e8f53ff594157 (patch) | |
tree | 63996424d51074f4781bed42fcc00be9bb93893f /source/convert_argb.cc | |
parent | be50557db77cac055507470f46482e74156deb78 (diff) | |
download | libyuv-248172e2ba2d3f09e4d3258e396e8f53ff594157.tar.gz |
I422ToRGB24, I422ToRAW, I422ToRGB24MatrixFilter conversion functions added.
- YUV to RGB use linear for first and last row.
- add assert(yuvconstants)
- rename pointers to match row functions.
- use macros that match row functions.
- use 12 bit upsampler for conversions of 10 and 12 bits
Cortex A53 AArch32
I420ToRGB24_Opt (3627 ms)
I422ToRGB24_Opt (4099 ms)
I444ToRGB24_Opt (4186 ms)
I420ToRGB24Filter_Opt (5451 ms)
I422ToRGB24Filter_Opt (5430 ms)
AVX2
Was I420ToRGB24Filter_Opt (583 ms)
Now I420ToRGB24Filter_Opt (560 ms)
Neon Cortex A7
Was I420ToRGB24Filter_Opt (5447 ms)
Now I420ToRGB24Filter_Opt (5439 ms)
Bug: libyuv:938
Change-Id: I1731f2dd591073ae11a756f06574103ba0f803c7
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3906082
Reviewed-by: Justin Green <greenjustin@google.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/convert_argb.cc')
-rw-r--r-- | source/convert_argb.cc | 906 |
1 files changed, 646 insertions, 260 deletions
diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 5500fcb2..64425c59 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -10,6 +10,8 @@ #include "libyuv/convert_argb.h" +#include <assert.h> + #include "libyuv/convert_from_argb.h" #include "libyuv/cpu_id.h" #ifdef HAVE_JPEG @@ -67,6 +69,7 @@ int I420ToARGBMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -300,6 +303,7 @@ int I422ToARGBMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -538,6 +542,7 @@ int I444ToARGBMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -749,6 +754,128 @@ int U444ToABGR(const uint8_t* src_y, width, height); } +// Convert I444 to RGB24 with matrix. +LIBYUV_API +int I444ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToRGB24Row_C; + assert(yuvconstants); + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } + // Coalesce rows. + if (src_stride_y == width && src_stride_u == width && src_stride_v == width && + dst_stride_rgb24 == width * 3) { + width *= height; + height = 1; + src_stride_y = src_stride_u = src_stride_v = dst_stride_rgb24 = 0; + } +#if defined(HAS_I444TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + I444ToRGB24Row = I444ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444ToRGB24Row = I444ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I444ToRGB24Row = I444ToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444ToRGB24Row = I444ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I444ToRGB24Row = I444ToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_MSA; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I444ToRGB24Row = I444ToRGB24Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I444ToRGB24Row = I444ToRGB24Row_LSX; + } + } +#endif + + for (y = 0; y < height; ++y) { + I444ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + return 0; +} + +// Convert I444 to RGB24. +LIBYUV_API +int I444ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { + return I444ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + &kYuvI601Constants, width, height); +} + +// Convert I444 to RAW. +LIBYUV_API +int I444ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height) { + return I444ToRGB24Matrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_raw, dst_stride_raw, + &kYvuI601Constants, // Use Yvu matrix + width, height); +} + // Convert 10 bit YUV to ARGB with matrix. // TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to // multiply 10 bit yuv into high bits to allow any number of bits. @@ -769,6 +896,7 @@ int I010ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -928,6 +1056,7 @@ int I012ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I212ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -985,6 +1114,7 @@ int I210ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1139,6 +1269,7 @@ int I410ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1192,6 +1323,7 @@ int I010ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1355,6 +1487,7 @@ int I012ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I212ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1410,6 +1543,7 @@ int I210ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1570,6 +1704,7 @@ int I410ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1619,6 +1754,7 @@ int P010ToARGBMatrix(const uint16_t* src_y, void (*P210ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1669,6 +1805,7 @@ int P210ToARGBMatrix(const uint16_t* src_y, void (*P210ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1717,6 +1854,7 @@ int P010ToAR30Matrix(const uint16_t* src_y, void (*P210ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1767,6 +1905,7 @@ int P210ToAR30Matrix(const uint16_t* src_y, void (*P210ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1825,6 +1964,7 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y, int width) = I422AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1949,6 +2089,7 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y, int width) = I422AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2071,6 +2212,7 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y, int width) = I444AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2314,6 +2456,7 @@ int I010AlphaToARGBMatrix(const uint16_t* src_y, int width) = I210AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2414,6 +2557,7 @@ int I210AlphaToARGBMatrix(const uint16_t* src_y, int width) = I210AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2512,6 +2656,7 @@ int I410AlphaToARGBMatrix(const uint16_t* src_y, int width) = I410AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2599,6 +2744,7 @@ int I400ToARGBMatrix(const uint8_t* src_y, void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I400ToARGBRow_C; + assert(yuvconstants); if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -3516,6 +3662,7 @@ int NV12ToARGBMatrix(const uint8_t* src_y, void (*NV12ToARGBRow)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -3600,6 +3747,7 @@ int NV21ToARGBMatrix(const uint8_t* src_y, void (*NV21ToARGBRow)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -3743,6 +3891,7 @@ int NV12ToRGB24Matrix(const uint8_t* src_y, void (*NV12ToRGB24Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToRGB24Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0) { return -1; } @@ -3803,6 +3952,7 @@ int NV21ToRGB24Matrix(const uint8_t* src_y, void (*NV21ToRGB24Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV21ToRGB24Row_C; + assert(yuvconstants); if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0) { return -1; } @@ -4145,6 +4295,7 @@ int Android420ToARGBMatrix(const uint8_t* src_y, const ptrdiff_t vu_off = src_v - src_u; int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -4245,6 +4396,7 @@ int I422ToRGBAMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } @@ -4356,6 +4508,7 @@ int NV12ToRGB565Matrix(const uint8_t* src_y, void (*NV12ToRGB565Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -4458,6 +4611,7 @@ int I420ToRGBAMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } @@ -4574,6 +4728,7 @@ int I420ToRGB24Matrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB24Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } @@ -4744,6 +4899,121 @@ int H420ToRAW(const uint8_t* src_y, width, height); } +// Convert I422 to RGB24 with matrix. +LIBYUV_API +int I422ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I422ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToRGB24Row_C; + assert(yuvconstants); + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } +#if defined(HAS_I422TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + I422ToRGB24Row = I422ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToRGB24Row = I422ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToRGB24Row = I422ToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I422ToRGB24Row = I422ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I422ToRGB24Row = I422ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToRGB24Row = I422ToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 16)) { + I422ToRGB24Row = I422ToRGB24Row_MSA; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I422ToRGB24Row = I422ToRGB24Row_Any_LASX; + if (IS_ALIGNED(width, 32)) { + I422ToRGB24Row = I422ToRGB24Row_LASX; + } + } +#endif + + for (y = 0; y < height; ++y) { + I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + return 0; +} + +// Convert I422 to RGB24. +LIBYUV_API +int I422ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { + return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + &kYuvI601Constants, width, height); +} + +// Convert I422 to RAW. +LIBYUV_API +int I422ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height) { + return I422ToRGB24Matrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_raw, dst_stride_raw, + &kYvuI601Constants, // Use Yvu matrix + width, height); +} + // Convert I420 to ARGB1555. LIBYUV_API int I420ToARGB1555(const uint8_t* src_y, @@ -4924,6 +5194,7 @@ int I420ToRGB565Matrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -5055,6 +5326,7 @@ int I422ToRGB565Matrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -5298,6 +5570,7 @@ int I420ToAR30Matrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -5421,9 +5694,12 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; - void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_Any_C; + void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, + int dst_width) = ScaleRowUp2_Bilinear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -5474,27 +5750,31 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) +#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif @@ -5506,15 +5786,15 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); + Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -5526,8 +5806,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); } @@ -5551,8 +5831,9 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; - void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = - ScaleRowUp2_Linear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -5604,22 +5885,22 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, #endif #if defined(HAS_SCALEROWUP2_LINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif @@ -5630,8 +5911,8 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, uint8_t* temp_v = row + row_size; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); I444ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -5655,15 +5936,16 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, int width, int height) { int y; - void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, - const uint8_t* v_buf, uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, int width) = - I444ToARGBRow_C; - void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = - ARGBToRGB24Row_C; - void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_Any_C; + void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToRGB24Row_C; + void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, + int dst_width) = ScaleRowUp2_Bilinear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } @@ -5673,141 +5955,96 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } -#if defined(HAS_I444TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I444ToARGBRow = I444ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_I444TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - I444ToARGBRow = I444ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I444ToARGBRow = I444ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I444TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I444ToARGBRow = I444ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_I444TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I444ToARGBRow = I444ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_MSA; - } - } -#endif -#if defined(HAS_I444TOARGBROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - I444ToARGBRow = I444ToARGBRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - I444ToARGBRow = I444ToARGBRow_LASX; - } - } -#endif -#if defined(HAS_ARGBTORGB24ROW_SSSE3) +#if defined(HAS_I444TORGB24ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3; + I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToRGB24Row = ARGBToRGB24Row_SSSE3; + I444ToRGB24Row = I444ToRGB24Row_SSSE3; } } #endif -#if defined(HAS_ARGBTORGB24ROW_AVX2) +#if defined(HAS_I444TORGB24ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToRGB24Row = ARGBToRGB24Row_AVX2; - } - } -#endif -#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) - if (TestCpuFlag(kCpuHasAVX512VBMI)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX512VBMI; + I444ToRGB24Row = I444ToRGB24Row_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToRGB24Row = ARGBToRGB24Row_AVX512VBMI; + I444ToRGB24Row = I444ToRGB24Row_AVX2; } } #endif -#if defined(HAS_ARGBTORGB24ROW_NEON) +#if defined(HAS_I444TORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToRGB24Row = ARGBToRGB24Row_NEON; + I444ToRGB24Row = I444ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_NEON; } } #endif -#if defined(HAS_ARGBTORGB24ROW_MSA) +#if defined(HAS_I444TORGB24ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToRGB24Row = ARGBToRGB24Row_MSA; + I444ToRGB24Row = I444ToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_MSA; } } #endif -#if defined(HAS_ARGBTORGB24ROW_LASX) +#if defined(HAS_I444TORGB24ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_LASX; + I444ToRGB24Row = I444ToRGB24Row_Any_LASX; if (IS_ALIGNED(width, 32)) { - ARGBToRGB24Row = ARGBToRGB24Row_LASX; + I444ToRGB24Row = I444ToRGB24Row_LASX; } } #endif -// TODO: Fix HAS macros to match function names -#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) + +#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) + +#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) + +#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_NEON) + +#if defined(HAS_SCALEROWUP2_BILINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif // alloc 4 lines temp const int row_size = (width + 31) & ~31; - align_buffer_64(row, row_size * 8); + align_buffer_64(row, row_size * 4); uint8_t* temp_u_1 = row; uint8_t* temp_u_2 = row + row_size; uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; - uint8_t* temp_argb = row + row_size * 4; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); - I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width); - ARGBToRGB24Row(temp_argb, dst_rgb24, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); + I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width); dst_rgb24 += dst_stride_rgb24; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); - I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width); - ARGBToRGB24Row(temp_argb, dst_rgb24, width); + Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width); + I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width); dst_rgb24 += dst_stride_rgb24; src_y += src_stride_y; - I444ToARGBRow(src_y, temp_u_2, temp_v_2, temp_argb, yuvconstants, width); - ARGBToRGB24Row(temp_argb, dst_rgb24, width); + I444ToRGB24Row(src_y, temp_u_2, temp_v_2, dst_rgb24, yuvconstants, width); dst_rgb24 += dst_stride_rgb24; src_y += src_stride_y; src_u += src_stride_u; @@ -5815,10 +6052,9 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); - I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width); - ARGBToRGB24Row(temp_argb, dst_rgb24, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); + I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width); } free_aligned_buffer_64(row); @@ -5841,9 +6077,12 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToAR30Row_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_12)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -5870,21 +6109,24 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -5896,15 +6138,15 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); + Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -5916,8 +6158,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); } @@ -5942,8 +6184,9 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToAR30Row_C; - void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, - int dst_width) = ScaleRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -5972,17 +6215,17 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -5993,8 +6236,8 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, uint16_t* temp_v = (uint16_t*)(row) + row_size; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear_12(src_u, temp_u, width); + ScaleRowUp2_Linear_12(src_v, temp_v, width); I410ToAR30Row(src_y, temp_u, temp_v, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -6021,9 +6264,12 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToARGBRow_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_12)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -6050,21 +6296,24 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -6076,15 +6325,15 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); + Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6096,8 +6345,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); } @@ -6121,8 +6370,9 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToARGBRow_C; - void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, - int dst_width) = ScaleRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -6151,17 +6401,17 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -6172,8 +6422,8 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, uint16_t* temp_v = (uint16_t*)(row) + row_size; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear_12(src_u, temp_u, width); + ScaleRowUp2_Linear_12(src_v, temp_v, width); I410ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6208,9 +6458,12 @@ static int I420AlphaToARGBMatrixBilinear( int width) = I444AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_Any_C; + void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, + int dst_width) = ScaleRowUp2_Bilinear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6293,27 +6546,32 @@ static int I420AlphaToARGBMatrixBilinear( } } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) + +#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif @@ -6325,8 +6583,8 @@ static int I420AlphaToARGBMatrixBilinear( uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6337,8 +6595,8 @@ static int I420AlphaToARGBMatrixBilinear( src_a += src_stride_a; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); + Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6360,8 +6618,8 @@ static int I420AlphaToARGBMatrixBilinear( } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6395,8 +6653,9 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, int width) = I444AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = - ScaleRowUp2_Linear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6481,22 +6740,22 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, #endif #if defined(HAS_SCALEROWUP2_LINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif @@ -6507,8 +6766,8 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, uint8_t* temp_v = row + row_size; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); I444AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6548,9 +6807,12 @@ static int I010AlphaToARGBMatrixBilinear( int width) = I410AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_12)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6610,21 +6872,24 @@ static int I010AlphaToARGBMatrixBilinear( } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -6636,8 +6901,8 @@ static int I010AlphaToARGBMatrixBilinear( uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6648,8 +6913,8 @@ static int I010AlphaToARGBMatrixBilinear( src_a += src_stride_a; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); + Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6671,8 +6936,8 @@ static int I010AlphaToARGBMatrixBilinear( } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6706,8 +6971,9 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, int width) = I410AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, - int dst_width) = ScaleRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6769,17 +7035,17 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -6790,8 +7056,8 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, uint16_t* temp_v = (uint16_t*)(row) + row_size; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); I410AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6820,9 +7086,10 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, void (*P410ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleUVRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_16)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -6851,19 +7118,19 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON; } #endif @@ -6873,13 +7140,13 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, uint16_t* temp_uv_1 = (uint16_t*)(row); uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; - Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6890,7 +7157,7 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); } @@ -6911,8 +7178,9 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, void (*P410ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C; - void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = - ScaleUVRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv, + int dst_width) = ScaleUVRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -6941,19 +7209,19 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON; } #endif @@ -6962,7 +7230,7 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, uint16_t* temp_uv = (uint16_t*)(row); for (y = 0; y < height; ++y) { - ScaleRowUp(src_uv, temp_uv, width); + ScaleRowUp2_Linear(src_uv, temp_uv, width); P410ToARGBRow(src_y, temp_uv, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6986,9 +7254,10 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, void (*P410ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleUVRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_16)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -7017,19 +7286,19 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON; } #endif @@ -7039,13 +7308,13 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, uint16_t* temp_uv_1 = (uint16_t*)(row); uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; - Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -7056,7 +7325,7 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); } @@ -7077,8 +7346,9 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, void (*P410ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C; - void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = - ScaleUVRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv, + int dst_width) = ScaleUVRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -7107,19 +7377,19 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON; } #endif @@ -7128,7 +7398,7 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, uint16_t* temp_uv = (uint16_t*)(row); for (y = 0; y < height; ++y) { - ScaleRowUp(src_uv, temp_uv, width); + ScaleRowUp2_Linear(src_uv, temp_uv, width); P410ToAR30Row(src_y, temp_uv, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -7139,6 +7409,128 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, return 0; } +static int I422ToRGB24MatrixLinear(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToRGB24Row_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } +#if defined(HAS_I444TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + I444ToRGB24Row = I444ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444ToRGB24Row = I444ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I444ToRGB24Row = I444ToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444ToRGB24Row = I444ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; + } +#endif + + // alloc 2 lines temp + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2); + uint8_t* temp_u = row; + uint8_t* temp_v = row + row_size; + + for (y = 0; y < height; ++y) { + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); + I444ToRGB24Row(src_y, temp_u, temp_v, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + + free_aligned_buffer_64(row); + return 0; +} + +LIBYUV_API +int I422ToRGB24MatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter) { + switch (filter) { + case kFilterNone: + return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + yuvconstants, width, height); + case kFilterBilinear: + case kFilterBox: + case kFilterLinear: + return I422ToRGB24MatrixLinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_rgb24, dst_stride_rgb24, yuvconstants, width, height); + } + + return -1; +} + LIBYUV_API int I420ToARGBMatrixFilter(const uint8_t* src_y, int src_stride_y, @@ -7217,14 +7609,12 @@ int I420ToRGB24MatrixFilter(const uint8_t* src_y, return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_rgb24, dst_stride_rgb24, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I420ToRGB24MatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_rgb24, dst_stride_rgb24, yuvconstants, width, height); - case kFilterLinear: - // TODO: Implement Linear using Bilinear with Scale2RowUp stride 0 - return -1; } return -1; @@ -7248,13 +7638,12 @@ int I010ToAR30MatrixFilter(const uint16_t* src_y, return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_ar30, dst_stride_ar30, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I010ToAR30MatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_ar30, dst_stride_ar30, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; @@ -7307,13 +7696,12 @@ int I010ToARGBMatrixFilter(const uint16_t* src_y, return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I010ToARGBMatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; @@ -7370,14 +7758,13 @@ int I420AlphaToARGBMatrixFilter(const uint8_t* src_y, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I420AlphaToARGBMatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); - case kFilterLinear: - return -1; } return -1; @@ -7439,14 +7826,13 @@ int I010AlphaToARGBMatrixFilter(const uint16_t* src_y, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I010AlphaToARGBMatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); - case kFilterLinear: - return -1; } return -1; @@ -7486,6 +7872,8 @@ int I210AlphaToARGBMatrixFilter(const uint16_t* src_y, return -1; } +// TODO(fb): Verify this function works correctly. P010 is like NV12 but 10 bit +// UV is biplanar. LIBYUV_API int P010ToARGBMatrixFilter(const uint16_t* src_y, int src_stride_y, @@ -7502,13 +7890,12 @@ int P010ToARGBMatrixFilter(const uint16_t* src_y, return P010ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb, dst_stride_argb, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return P010ToARGBMatrixBilinear(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb, dst_stride_argb, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; @@ -7557,13 +7944,12 @@ int P010ToAR30MatrixFilter(const uint16_t* src_y, return P010ToAR30Matrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_ar30, dst_stride_ar30, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return P010ToAR30MatrixBilinear(src_y, src_stride_y, src_uv, src_stride_uv, dst_ar30, dst_stride_ar30, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; |