diff options
author | Frank Barchard <fbarchard@google.com> | 2022-08-16 10:22:05 -0700 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2022-08-16 22:07:38 +0000 |
commit | 65e7c9d5706a77d1949da59bfcb0817c252ef8d6 (patch) | |
tree | 3cb55897ef7833792f07952fbf76b43606197c00 /source/convert_from_argb.cc | |
parent | 1c5a8bb17ac4092da557e55cf519bf4df105d8f1 (diff) | |
download | libyuv-65e7c9d5706a77d1949da59bfcb0817c252ef8d6.tar.gz |
MM21ToYUY2 and ABGRToJ420 conversion
MM21 to YUY2 use zip1 for performance
Cortex A510
Was MM21ToYUY2 (612 ms)
Now MM21ToYUY2 (573 ms)
Prefetches help Cortex A53
Was MM21ToYUY2 (4998 ms)
Now MM21ToYUY2 (1900 ms)
Pixel 4 Cortex A76
Was MM21ToYUY2 (215 ms)
Now MM21ToYUY2 (173 ms)
ABGRToJ420
- NEON, SSSE3 and AVX2 row functions
- J400, J420 and J422 formats.
- Added AVX2 for UV on ARGBToJ420. Was SSSE3
Same code/performance as ARGBToJ420 but with constants re-ordered.
Pixel 4
ABGRToJ420_Opt (623 ms)
ABGRToJ422_Opt (702 ms)
ABGRToJ400_Opt (238 ms)
Skylake Xeon
With LIBYUV_BIT_EXACT which uses C for UV
ABGRToJ420_Opt (988 ms)
ABGRToJ422_Opt (1872 ms)
ABGRToJ400_Opt (186 ms)
Skylake Xeon using AVX2
ABGRToJ420_Opt (251 ms)
ABGRToJ422_Opt (245 ms)
ABGRToJ400_Opt (184 ms)
Skylake Xeon using SSSE3
ABGRToJ420_Opt (328 ms)
ABGRToJ422_Opt (362 ms)
ABGRToJ400_Opt (185 ms)
Bug: b/238137982
Change-Id: I559c3fe3fb80fa2ce5be3d8218736f9cbc627666
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3832111
Reviewed-by: Justin Green <greenjustin@google.com>
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/convert_from_argb.cc')
-rw-r--r-- | source/convert_from_argb.cc | 626 |
1 files changed, 468 insertions, 158 deletions
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index e50c2af3..2f38a488 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -1858,19 +1858,19 @@ int ARGBToJ420(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height) { int y; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; - if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { + if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1879,6 +1879,22 @@ int ARGBToJ420(const uint8_t* src_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } +#if defined(HAS_ARGBTOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYJRow = ARGBToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVJRow = ARGBToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_NEON; + } + } +#endif #if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYJRow = ARGBToYJRow_Any_SSSE3; @@ -1903,19 +1919,11 @@ int ARGBToJ420(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYJRow = ARGBToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVJRow = ARGBToUVJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_NEON; +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; } } #endif @@ -1931,16 +1939,6 @@ int ARGBToJ420(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYJROW_LSX) && defined(HAS_ARGBTOUVJROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYJRow = ARGBToYJRow_Any_LSX; - ARGBToUVJRow = ARGBToUVJRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_LSX; - ARGBToUVJRow = ARGBToUVJRow_LSX; - } - } -#endif #if defined(HAS_ARGBTOYJROW_LASX) && defined(HAS_ARGBTOUVJROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYJRow = ARGBToYJRow_Any_LASX; @@ -1953,16 +1951,16 @@ int ARGBToJ420(const uint8_t* src_argb, #endif for (y = 0; y < height - 1; y += 2) { - ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width); + ARGBToUVJRow(src_argb, src_stride_argb, dst_uj, dst_vj, width); ARGBToYJRow(src_argb, dst_yj, width); ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width); src_argb += src_stride_argb * 2; dst_yj += dst_stride_yj * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; } if (height & 1) { - ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); + ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width); ARGBToYJRow(src_argb, dst_yj, width); } return 0; @@ -1974,19 +1972,19 @@ int ARGBToJ422(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height) { int y; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; - if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { + if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1997,10 +1995,10 @@ int ARGBToJ422(const uint8_t* src_argb, } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_yj == width && - dst_stride_u * 2 == width && dst_stride_v * 2 == width) { + dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) { width *= height; height = 1; - src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0; + src_stride_argb = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0; } #if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { @@ -2026,6 +2024,14 @@ int ARGBToJ422(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; + } + } +#endif #if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYJRow = ARGBToYJRow_Any_NEON; @@ -2076,266 +2082,570 @@ int ARGBToJ422(const uint8_t* src_argb, #endif for (y = 0; y < height; ++y) { - ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); + ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width); ARGBToYJRow(src_argb, dst_yj, width); src_argb += src_stride_argb; dst_yj += dst_stride_yj; - dst_u += dst_stride_u; - dst_v += dst_stride_v; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; } return 0; } -// Convert ARGB to AR64. +// Convert ARGB to J400. LIBYUV_API -int ARGBToAR64(const uint8_t* src_argb, +int ARGBToJ400(const uint8_t* src_argb, int src_stride_argb, - uint16_t* dst_ar64, - int dst_stride_ar64, + uint8_t* dst_yj, + int dst_stride_yj, int width, int height) { int y; - void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, - int width) = ARGBToAR64Row_C; - if (!src_argb || !dst_ar64 || width <= 0 || height == 0) { + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = + ARGBToYJRow_C; + if (!src_argb || !dst_yj || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) { + if (src_stride_argb == width * 4 && dst_stride_yj == width) { width *= height; height = 1; - src_stride_argb = dst_stride_ar64 = 0; + src_stride_argb = dst_stride_yj = 0; } -#if defined(HAS_ARGBTOAR64ROW_SSSE3) +#if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3; - if (IS_ALIGNED(width, 4)) { - ARGBToAR64Row = ARGBToAR64Row_SSSE3; + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOAR64ROW_AVX2) +#if defined(HAS_ARGBTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToAR64Row = ARGBToAR64Row_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBToAR64Row = ARGBToAR64Row_AVX2; + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_AVX2; } } #endif -#if defined(HAS_ARGBTOAR64ROW_NEON) +#if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToAR64Row = ARGBToAR64Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToAR64Row = ARGBToAR64Row_NEON; + ARGBToYJRow = ARGBToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYJRow = ARGBToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_MSA; } } #endif for (y = 0; y < height; ++y) { - ARGBToAR64Row(src_argb, dst_ar64, width); + ARGBToYJRow(src_argb, dst_yj, width); src_argb += src_stride_argb; - dst_ar64 += dst_stride_ar64; + dst_yj += dst_stride_yj; } return 0; } -// Convert ARGB to AB64. +// Convert RGBA to J400. LIBYUV_API -int ARGBToAB64(const uint8_t* src_argb, - int src_stride_argb, - uint16_t* dst_ab64, - int dst_stride_ab64, +int RGBAToJ400(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_yj, + int dst_stride_yj, int width, int height) { int y; - void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, - int width) = ARGBToAB64Row_C; - if (!src_argb || !dst_ab64 || width <= 0 || height == 0) { + void (*RGBAToYJRow)(const uint8_t* src_rgba, uint8_t* dst_yj, int width) = + RGBAToYJRow_C; + if (!src_rgba || !dst_yj || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; + src_rgba = src_rgba + (height - 1) * src_stride_rgba; + src_stride_rgba = -src_stride_rgba; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) { + if (src_stride_rgba == width * 4 && dst_stride_yj == width) { width *= height; height = 1; - src_stride_argb = dst_stride_ab64 = 0; + src_stride_rgba = dst_stride_yj = 0; } -#if defined(HAS_ARGBTOAB64ROW_SSSE3) +#if defined(HAS_RGBATOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3; - if (IS_ALIGNED(width, 4)) { - ARGBToAB64Row = ARGBToAB64Row_SSSE3; + RGBAToYJRow = RGBAToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGBAToYJRow = RGBAToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOAB64ROW_AVX2) +#if defined(HAS_RGBATOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToAB64Row = ARGBToAB64Row_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBToAB64Row = ARGBToAB64Row_AVX2; + RGBAToYJRow = RGBAToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGBAToYJRow = RGBAToYJRow_AVX2; } } #endif -#if defined(HAS_ARGBTOAB64ROW_NEON) +#if defined(HAS_RGBATOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToAB64Row = ARGBToAB64Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToAB64Row = ARGBToAB64Row_NEON; + RGBAToYJRow = RGBAToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RGBAToYJRow = RGBAToYJRow_NEON; + } + } +#endif +#if defined(HAS_RGBATOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RGBAToYJRow = RGBAToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RGBAToYJRow = RGBAToYJRow_MSA; } } #endif for (y = 0; y < height; ++y) { - ARGBToAB64Row(src_argb, dst_ab64, width); - src_argb += src_stride_argb; - dst_ab64 += dst_stride_ab64; + RGBAToYJRow(src_rgba, dst_yj, width); + src_rgba += src_stride_rgba; + dst_yj += dst_stride_yj; } return 0; } -// Convert ARGB to J400. +// Convert ABGR to J420. (JPeg full range I420). LIBYUV_API -int ARGBToJ400(const uint8_t* src_argb, - int src_stride_argb, +int ABGRToJ420(const uint8_t* src_abgr, + int src_stride_abgr, uint8_t* dst_yj, int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height) { int y; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = - ARGBToYJRow_C; - if (!src_argb || !dst_yj || width <= 0 || height == 0) { + void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_uj, uint8_t* dst_vj, int width) = + ABGRToUVJRow_C; + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; + } +#if defined(HAS_ABGRTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToYJRow = ABGRToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVJRow = ABGRToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVJRow = ABGRToUVJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToYJRow = ABGRToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToUVJRow = ABGRToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ABGRToYJRow = ABGRToYJRow_Any_MSA; + ABGRToUVJRow = ABGRToUVJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_MSA; + ABGRToUVJRow = ABGRToUVJRow_MSA; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYJRow = ABGRToYJRow_Any_LSX; + ABGRToUVJRow = ABGRToUVJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_LSX; + ABGRToUVJRow = ABGRToUVJRow_LSX; + } + } +#endif + + for (y = 0; y < height - 1; y += 2) { + ABGRToUVJRow(src_abgr, src_stride_abgr, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + ABGRToYJRow(src_abgr + src_stride_abgr, dst_yj + dst_stride_yj, width); + src_abgr += src_stride_abgr * 2; + dst_yj += dst_stride_yj * 2; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; + } + if (height & 1) { + ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + } + return 0; +} + +// Convert ABGR to J422. (JPeg full range I422). +LIBYUV_API +int ABGRToJ422(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, + int width, + int height) { + int y; + void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_uj, uint8_t* dst_vj, int width) = + ABGRToUVJRow_C; + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_yj == width) { + if (src_stride_abgr == width * 4 && dst_stride_yj == width && + dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) { width *= height; height = 1; - src_stride_argb = dst_stride_yj = 0; + src_stride_abgr = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0; } -#if defined(HAS_ARGBTOYJROW_SSSE3) +#if defined(HAS_ABGRTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; + ABGRToYJRow = ABGRToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYJROW_AVX2) +#if defined(HAS_ABGRTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; + ABGRToYJRow = ABGRToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; + ABGRToYJRow = ABGRToYJRow_AVX2; } } #endif -#if defined(HAS_ARGBTOYJROW_NEON) +#if defined(HAS_ABGRTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVJRow = ABGRToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVJRow = ABGRToUVJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYJRow = ARGBToYJRow_Any_NEON; + ABGRToYJRow = ABGRToYJRow_Any_NEON; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_NEON; + ABGRToYJRow = ABGRToYJRow_NEON; } } #endif -#if defined(HAS_ARGBTOYJROW_MSA) +#if defined(HAS_ABGRTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToUVJRow = ABGRToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYJRow = ARGBToYJRow_Any_MSA; + ABGRToYJRow = ABGRToYJRow_Any_MSA; + ABGRToUVJRow = ABGRToUVJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_MSA; + ABGRToYJRow = ABGRToYJRow_MSA; + } + if (IS_ALIGNED(width, 32)) { + ABGRToUVJRow = ABGRToUVJRow_MSA; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYJRow = ABGRToYJRow_Any_LSX; + ABGRToUVJRow = ABGRToUVJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_LSX; + ABGRToUVJRow = ABGRToUVJRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LASX) && defined(HAS_ABGRTOUVJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYJRow = ABGRToYJRow_Any_LASX; + ABGRToUVJRow = ABGRToUVJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_LASX; + ABGRToUVJRow = ABGRToUVJRow_LASX; } } #endif for (y = 0; y < height; ++y) { - ARGBToYJRow(src_argb, dst_yj, width); - src_argb += src_stride_argb; + ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + src_abgr += src_stride_abgr; dst_yj += dst_stride_yj; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; } return 0; } -// Convert RGBA to J400. +// Convert ABGR to J400. LIBYUV_API -int RGBAToJ400(const uint8_t* src_rgba, - int src_stride_rgba, +int ABGRToJ400(const uint8_t* src_abgr, + int src_stride_abgr, uint8_t* dst_yj, int dst_stride_yj, int width, int height) { int y; - void (*RGBAToYJRow)(const uint8_t* src_rgba, uint8_t* dst_yj, int width) = - RGBAToYJRow_C; - if (!src_rgba || !dst_yj || width <= 0 || height == 0) { + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_rgba = src_rgba + (height - 1) * src_stride_rgba; - src_stride_rgba = -src_stride_rgba; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; } // Coalesce rows. - if (src_stride_rgba == width * 4 && dst_stride_yj == width) { + if (src_stride_abgr == width * 4 && dst_stride_yj == width) { width *= height; height = 1; - src_stride_rgba = dst_stride_yj = 0; + src_stride_abgr = dst_stride_yj = 0; } -#if defined(HAS_RGBATOYJROW_SSSE3) +#if defined(HAS_ABGRTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - RGBAToYJRow = RGBAToYJRow_Any_SSSE3; + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - RGBAToYJRow = RGBAToYJRow_SSSE3; + ABGRToYJRow = ABGRToYJRow_SSSE3; } } #endif -#if defined(HAS_RGBATOYJROW_AVX2) +#if defined(HAS_ABGRTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - RGBAToYJRow = RGBAToYJRow_Any_AVX2; + ABGRToYJRow = ABGRToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - RGBAToYJRow = RGBAToYJRow_AVX2; + ABGRToYJRow = ABGRToYJRow_AVX2; } } #endif -#if defined(HAS_RGBATOYJROW_NEON) +#if defined(HAS_ABGRTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - RGBAToYJRow = RGBAToYJRow_Any_NEON; + ABGRToYJRow = ABGRToYJRow_Any_NEON; if (IS_ALIGNED(width, 16)) { - RGBAToYJRow = RGBAToYJRow_NEON; + ABGRToYJRow = ABGRToYJRow_NEON; } } #endif -#if defined(HAS_RGBATOYJROW_MSA) +#if defined(HAS_ABGRTOYJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { - RGBAToYJRow = RGBAToYJRow_Any_MSA; + ABGRToYJRow = ABGRToYJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { - RGBAToYJRow = RGBAToYJRow_MSA; + ABGRToYJRow = ABGRToYJRow_MSA; } } #endif for (y = 0; y < height; ++y) { - RGBAToYJRow(src_rgba, dst_yj, width); - src_rgba += src_stride_rgba; + ABGRToYJRow(src_abgr, dst_yj, width); + src_abgr += src_stride_abgr; dst_yj += dst_stride_yj; } return 0; } +// Convert ARGB to AR64. +LIBYUV_API +int ARGBToAR64(const uint8_t* src_argb, + int src_stride_argb, + uint16_t* dst_ar64, + int dst_stride_ar64, + int width, + int height) { + int y; + void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, + int width) = ARGBToAR64Row_C; + if (!src_argb || !dst_ar64 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_ar64 = 0; + } +#if defined(HAS_ARGBTOAR64ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBToAR64Row = ARGBToAR64Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOAR64ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToAR64Row = ARGBToAR64Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBToAR64Row = ARGBToAR64Row_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOAR64ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToAR64Row = ARGBToAR64Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToAR64Row = ARGBToAR64Row_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToAR64Row(src_argb, dst_ar64, width); + src_argb += src_stride_argb; + dst_ar64 += dst_stride_ar64; + } + return 0; +} + +// Convert ARGB to AB64. +LIBYUV_API +int ARGBToAB64(const uint8_t* src_argb, + int src_stride_argb, + uint16_t* dst_ab64, + int dst_stride_ab64, + int width, + int height) { + int y; + void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, + int width) = ARGBToAB64Row_C; + if (!src_argb || !dst_ab64 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_ab64 = 0; + } +#if defined(HAS_ARGBTOAB64ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBToAB64Row = ARGBToAB64Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOAB64ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToAB64Row = ARGBToAB64Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBToAB64Row = ARGBToAB64Row_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOAB64ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToAB64Row = ARGBToAB64Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToAB64Row = ARGBToAB64Row_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToAB64Row(src_argb, dst_ab64, width); + src_argb += src_stride_argb; + dst_ab64 += dst_stride_ab64; + } + return 0; +} + // Enabled if 1 pass is available #if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) #define HAS_RAWTOYJROW @@ -2355,7 +2665,7 @@ int RAWToJNV21(const uint8_t* src_raw, int halfwidth = (width + 1) >> 1; #if defined(HAS_RAWTOYJROW) void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = RAWToUVJRow_C; void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = RAWToYJRow_C; @@ -2363,12 +2673,12 @@ int RAWToJNV21(const uint8_t* src_raw, void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYJRow_C; #endif - void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, + void (*MergeUVRow_)(const uint8_t* src_uj, const uint8_t* src_vj, uint8_t* dst_vu, int width) = MergeUVRow_C; if (!src_raw || !dst_y || !dst_vu || width <= 0 || height == 0) { return -1; @@ -2490,8 +2800,8 @@ int RAWToJNV21(const uint8_t* src_raw, #endif { // Allocate a row of uv. - align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + align_buffer_64(row_uj, ((halfwidth + 31) & ~31) * 2); + uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31); #if !defined(HAS_RAWTOYJROW) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; @@ -2500,15 +2810,15 @@ int RAWToJNV21(const uint8_t* src_raw, for (y = 0; y < height - 1; y += 2) { #if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, src_stride_raw, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + RAWToUVJRow(src_raw, src_stride_raw, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); RAWToYJRow(src_raw, dst_y, width); RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); #else RAWToARGBRow(src_raw, row, width); RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); - ARGBToUVJRow(row, kRowSize, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + ARGBToUVJRow(row, kRowSize, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); ARGBToYJRow(row, dst_y, width); ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); #endif @@ -2518,20 +2828,20 @@ int RAWToJNV21(const uint8_t* src_raw, } if (height & 1) { #if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, 0, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + RAWToUVJRow(src_raw, 0, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); RAWToYJRow(src_raw, dst_y, width); #else RAWToARGBRow(src_raw, row, width); - ARGBToUVJRow(row, 0, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + ARGBToUVJRow(row, 0, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); ARGBToYJRow(row, dst_y, width); #endif } #if !defined(HAS_RAWTOYJROW) free_aligned_buffer_64(row); #endif - free_aligned_buffer_64(row_u); + free_aligned_buffer_64(row_uj); } return 0; } |