diff options
author | Frank Barchard <fbarchard@google.com> | 2021-10-19 00:02:50 -0700 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2021-10-19 07:54:50 +0000 |
commit | b179f1847a7cc17957eab399610cb9ef163bb715 (patch) | |
tree | 7c34ecfb0598a3b09d69f8e7b9525480a9b4749d /source/convert.cc | |
parent | f0cfc1f1c8a4bf0e9b1e73b6ef87bdfc6e2566ae (diff) | |
download | libyuv-b179f1847a7cc17957eab399610cb9ef163bb715.tar.gz |
Enable SIMD for exact RGB to Y conversions
Bug: libyuv:908, b/202888439
Change-Id: Icc5470b85d91b441ded9958ee04b4f32246646f0
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3230489
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Mirko Bonadei <mbonadei@chromium.org>
Diffstat (limited to 'source/convert.cc')
-rw-r--r-- | source/convert.cc | 491 |
1 files changed, 275 insertions, 216 deletions
diff --git a/source/convert.cc b/source/convert.cc index 69f7fb6e..b5e241e1 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -1368,38 +1368,54 @@ int ARGBToI420(const uint8_t* src_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } -#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON) +#if defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYRow = ARGBToYRow_Any_NEON; - ARGBToUVRow = ARGBToUVRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } } } #endif -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif #if defined(HAS_ARGBTOYROW_MMI) && defined(HAS_ARGBTOUVROW_MMI) if (TestCpuFlag(kCpuHasMMI)) { ARGBToYRow = ARGBToYRow_Any_MMI; @@ -1560,26 +1576,38 @@ int ABGRToI420(const uint8_t* src_abgr, src_abgr = src_abgr + (height - 1) * src_stride_abgr; src_stride_abgr = -src_stride_abgr; } -#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3) +#if defined(HAS_ABGRTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToUVRow = ABGRToUVRow_Any_SSSE3; ABGRToYRow = ABGRToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_SSSE3; ABGRToYRow = ABGRToYRow_SSSE3; } } #endif -#if defined(HAS_ABGRTOYROW_AVX2) && defined(HAS_ABGRTOUVROW_AVX2) +#if defined(HAS_ABGRTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVRow = ABGRToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToUVRow = ABGRToUVRow_Any_AVX2; ABGRToYRow = ABGRToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ABGRToUVRow = ABGRToUVRow_AVX2; ABGRToYRow = ABGRToYRow_AVX2; } } #endif +#if defined(HAS_ABGRTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVRow = ABGRToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVRow = ABGRToUVRow_AVX2; + } + } +#endif #if defined(HAS_ABGRTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ABGRToYRow = ABGRToYRow_Any_NEON; @@ -1662,16 +1690,22 @@ int RGBAToI420(const uint8_t* src_rgba, src_rgba = src_rgba + (height - 1) * src_stride_rgba; src_stride_rgba = -src_stride_rgba; } -#if defined(HAS_RGBATOYROW_SSSE3) && defined(HAS_RGBATOUVROW_SSSE3) +#if defined(HAS_RGBATOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - RGBAToUVRow = RGBAToUVRow_Any_SSSE3; RGBAToYRow = RGBAToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - RGBAToUVRow = RGBAToUVRow_SSSE3; RGBAToYRow = RGBAToYRow_SSSE3; } } #endif +#if defined(HAS_RGBATOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGBAToUVRow = RGBAToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGBAToUVRow = RGBAToUVRow_SSSE3; + } + } +#endif #if defined(HAS_RGBATOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGBAToYRow = RGBAToYRow_Any_NEON; @@ -1727,6 +1761,12 @@ int RGBAToI420(const uint8_t* src_rgba, return 0; } +// Enabled if 1 pass is available +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ + defined(HAS_RGB24TOYROW_MMI)) +#define HAS_RGB24TOYROW +#endif + // Convert RGB24 to I420. LIBYUV_API int RGB24ToI420(const uint8_t* src_rgb24, @@ -1740,8 +1780,7 @@ int RGB24ToI420(const uint8_t* src_rgb24, int width, int height) { int y; -#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_MMI)) +#if defined(HAS_RGB24TOYROW) void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, uint8_t* dst_v, int width) = RGB24ToUVRow_C; @@ -1766,6 +1805,8 @@ int RGB24ToI420(const uint8_t* src_rgb24, src_stride_rgb24 = -src_stride_rgb24; } +#if defined(HAS_RGB24TOYROW) + // Neon version does direct RGB24 to YUV. #if defined(HAS_RGB24TOYROW_NEON) && defined(HAS_RGB24TOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { @@ -1778,8 +1819,7 @@ int RGB24ToI420(const uint8_t* src_rgb24, } } } -// MMI and MSA version does direct RGB24 to YUV. -#elif (defined(HAS_RGB24TOYROW_MMI) || defined(HAS_RGB24TOYROW_MSA)) +#endif #if defined(HAS_RGB24TOYROW_MMI) && defined(HAS_RGB24TOUVROW_MMI) if (TestCpuFlag(kCpuHasMMI)) { RGB24ToUVRow = RGB24ToUVRow_Any_MMI; @@ -1802,16 +1842,10 @@ int RGB24ToI420(const uint8_t* src_rgb24, } } #endif + // Other platforms do intermediate conversion from RGB24 to ARGB. -#else -#if defined(HAS_RGB24TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB24ToARGBRow = RGB24ToARGBRow_NEON; - } - } -#endif +#else // HAS_RGB24TOYROW + #if defined(HAS_RGB24TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; @@ -1820,51 +1854,49 @@ int RGB24ToI420(const uint8_t* src_rgb24, } } #endif -#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYRow = ARGBToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; - ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; - ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; - ARGBToYRow = ARGBToYRow_AVX2; } } #endif -#endif +#endif // HAS_RGB24TOYROW { -#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_MMI)) +#if !defined(HAS_RGB24TOYROW) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { -#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_MMI)) +#if defined(HAS_RGB24TOYROW) RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); RGB24ToYRow(src_rgb24, dst_y, width); RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); @@ -1881,8 +1913,7 @@ int RGB24ToI420(const uint8_t* src_rgb24, dst_v += dst_stride_v; } if (height & 1) { -#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_MMI)) +#if defined(HAS_RGB24TOYROW) RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width); RGB24ToYRow(src_rgb24, dst_y, width); #else @@ -1891,15 +1922,20 @@ int RGB24ToI420(const uint8_t* src_rgb24, ARGBToYRow(row, dst_y, width); #endif } -#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_MMI)) +#if !defined(HAS_RGB24TOYROW) free_aligned_buffer_64(row); #endif } return 0; } +#undef HAS_RGB24TOYROW + +// Enabled if 1 pass is available +#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_MMI)) +#define HAS_RGB24TOYJROW +#endif -// TODO(fbarchard): Use Matrix version to implement I420 and J420. // Convert RGB24 to J420. LIBYUV_API int RGB24ToJ420(const uint8_t* src_rgb24, @@ -1913,10 +1949,9 @@ int RGB24ToJ420(const uint8_t* src_rgb24, int width, int height) { int y; -#if (defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \ - defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI) +#if defined(HAS_RGB24TOYJROW) void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_u, uint8_t* dst_v, int width) = RGB24ToUVJRow_C; void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) = RGB24ToYJRow_C; @@ -1924,7 +1959,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24, void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RGB24ToARGBRow_C; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYJRow_C; @@ -1939,6 +1974,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24, src_stride_rgb24 = -src_stride_rgb24; } +#if defined(HAS_RGB24TOYJROW) + // Neon version does direct RGB24 to YUV. #if defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { @@ -1951,8 +1988,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24, } } } -// MMI and MSA version does direct RGB24 to YUV. -#elif (defined(HAS_RGB24TOYJROW_MMI) || defined(HAS_RGB24TOYJROW_MSA)) +#endif #if defined(HAS_RGB24TOYJROW_MMI) && defined(HAS_RGB24TOUVJROW_MMI) if (TestCpuFlag(kCpuHasMMI)) { RGB24ToUVJRow = RGB24ToUVJRow_Any_MMI; @@ -1975,15 +2011,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24, } } #endif -#else -#if defined(HAS_RGB24TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB24ToARGBRow = RGB24ToARGBRow_NEON; - } - } -#endif + +// Other platforms do intermediate conversion from RGB24 to ARGB. +#else // HAS_RGB24TOYJROW + #if defined(HAS_RGB24TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; @@ -1992,51 +2023,49 @@ int RGB24ToJ420(const uint8_t* src_rgb24, } } #endif -#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVJRow = ARGBToUVJRow_Any_NEON; - ARGBToYJRow = ARGBToYJRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYJRow = ARGBToYJRow_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_NEON; - } +#if defined(HAS_ARGBTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) +#if defined(HAS_ARGBTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVJRow = ARGBToUVJRow_SSSE3; - ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2) +#if defined(HAS_ARGBTOUVJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; - ARGBToYJRow = ARGBToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVJRow = ARGBToUVJRow_AVX2; - ARGBToYJRow = ARGBToYJRow_AVX2; } } #endif -#endif +#endif // HAS_RGB24TOYJROW { -#if !((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \ - defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI)) +#if !defined(HAS_RGB24TOYJROW) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { -#if ((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \ - defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI)) +#if defined(HAS_RGB24TOYJROW) RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); RGB24ToYJRow(src_rgb24, dst_y, width); RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); @@ -2053,8 +2082,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24, dst_v += dst_stride_v; } if (height & 1) { -#if ((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \ - defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI)) +#if defined(HAS_RGB24TOYJROW) RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width); RGB24ToYJRow(src_rgb24, dst_y, width); #else @@ -2063,31 +2091,37 @@ int RGB24ToJ420(const uint8_t* src_rgb24, ARGBToYJRow(row, dst_y, width); #endif } -#if !((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \ - defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI)) +#if !defined(HAS_RGB24TOYJROW) free_aligned_buffer_64(row); #endif } return 0; } +#undef HAS_RGB24TOYJROW + +// Enabled if 1 pass is available +#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ + defined(HAS_RAWTOYROW_MMI)) +#define HAS_RAWTOYROW +#endif // Convert RAW to I420. LIBYUV_API int RAWToI420(const uint8_t* src_raw, - int src_stride_raw, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; -#if (defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)) || \ - defined(HAS_RAWTOYROW_MSA) || defined(HAS_RAWTOYROW_MMI) - void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, - uint8_t* dst_v, int width) = RAWToUVRow_C; +#if defined(HAS_RAWTOYROW) + void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RAWToUVRow_C; void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = RAWToYRow_C; #else @@ -2109,6 +2143,8 @@ int RAWToI420(const uint8_t* src_raw, src_stride_raw = -src_stride_raw; } +#if defined(HAS_RAWTOYROW) + // Neon version does direct RAW to YUV. #if defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { @@ -2121,8 +2157,7 @@ int RAWToI420(const uint8_t* src_raw, } } } -// MMI and MSA version does direct RAW to YUV. -#elif (defined(HAS_RAWTOYROW_MMI) || defined(HAS_RAWTOYROW_MSA)) +#endif #if defined(HAS_RAWTOYROW_MMI) && defined(HAS_RAWTOUVROW_MMI) if (TestCpuFlag(kCpuHasMMI)) { RAWToUVRow = RAWToUVRow_Any_MMI; @@ -2145,28 +2180,10 @@ int RAWToI420(const uint8_t* src_raw, } } #endif + // Other platforms do intermediate conversion from RAW to ARGB. -#else -#if defined(HAS_RAWTOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RAWToARGBRow = RAWToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RAWToARGBRow = RAWToARGBRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } - } - } -#endif +#else // HAS_RAWTOYROW + #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -2175,39 +2192,49 @@ int RAWToI420(const uint8_t* src_raw, } } #endif -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } #endif +#endif // HAS_RAWTOYROW { -#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_MMI)) +#if !defined(HAS_RAWTOYROW) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { -#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_MMI)) +#if defined(HAS_RAWTOYROW) RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); RAWToYRow(src_raw, dst_y, width); RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); @@ -2224,8 +2251,7 @@ int RAWToI420(const uint8_t* src_raw, dst_v += dst_stride_v; } if (height & 1) { -#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_MMI)) +#if defined(HAS_RAWTOYROW) RAWToUVRow(src_raw, 0, dst_u, dst_v, width); RAWToYRow(src_raw, dst_y, width); #else @@ -2234,32 +2260,36 @@ int RAWToI420(const uint8_t* src_raw, ARGBToYRow(row, dst_y, width); #endif } -#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_MMI)) +#if !defined(HAS_RAWTOYROW) free_aligned_buffer_64(row); #endif } return 0; } +#undef HAS_RAWTOYROW + +// Enabled if 1 pass is available +#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \ + defined(HAS_RAWTOYJROW_MMI)) +#define HAS_RAWTOYJROW +#endif -// TODO(fbarchard): Use Matrix version to implement I420 and J420. // Convert RAW to J420. LIBYUV_API int RAWToJ420(const uint8_t* src_raw, - int src_stride_raw, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; -#if (defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \ - defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI) +#if defined(HAS_RAWTOYJROW) void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_u, uint8_t* dst_v, int width) = RAWToUVJRow_C; void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = RAWToYJRow_C; @@ -2267,7 +2297,7 @@ int RAWToJ420(const uint8_t* src_raw, void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYJRow_C; @@ -2282,6 +2312,8 @@ int RAWToJ420(const uint8_t* src_raw, src_stride_raw = -src_stride_raw; } +#if defined(HAS_RAWTOYJROW) + // Neon version does direct RAW to YUV. #if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { @@ -2294,8 +2326,7 @@ int RAWToJ420(const uint8_t* src_raw, } } } -// MMI and MSA version does direct RAW to YUV. -#elif (defined(HAS_RAWTOYJROW_MMI) || defined(HAS_RAWTOYJROW_MSA)) +#endif #if defined(HAS_RAWTOYJROW_MMI) && defined(HAS_RAWTOUVJROW_MMI) if (TestCpuFlag(kCpuHasMMI)) { RAWToUVJRow = RAWToUVJRow_Any_MMI; @@ -2318,27 +2349,10 @@ int RAWToJ420(const uint8_t* src_raw, } } #endif -#else -#if defined(HAS_RAWTOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RAWToARGBRow = RAWToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RAWToARGBRow = RAWToARGBRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVJRow = ARGBToUVJRow_Any_NEON; - ARGBToYJRow = ARGBToYJRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYJRow = ARGBToYJRow_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_NEON; - } - } - } -#endif + +// Other platforms do intermediate conversion from RAW to ARGB. +#else // HAS_RAWTOYJROW + #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -2347,39 +2361,49 @@ int RAWToJ420(const uint8_t* src_raw, } } #endif -#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) +#if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; ARGBToYJRow = ARGBToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_SSSE3; ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2) +#if defined(HAS_ARGBTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; ARGBToYJRow = ARGBToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVJRow = ARGBToUVJRow_AVX2; ARGBToYJRow = ARGBToYJRow_AVX2; } } #endif +#if defined(HAS_ARGBTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_SSSE3; + } + } #endif +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; + } + } +#endif +#endif // HAS_RAWTOYJROW { -#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \ - defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)) +#if !defined(HAS_RAWTOYJROW) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { -#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \ - defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)) +#if defined(HAS_RAWTOYJROW) RAWToUVJRow(src_raw, src_stride_raw, dst_u, dst_v, width); RAWToYJRow(src_raw, dst_y, width); RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); @@ -2396,8 +2420,7 @@ int RAWToJ420(const uint8_t* src_raw, dst_v += dst_stride_v; } if (height & 1) { -#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \ - defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)) +#if defined(HAS_RAWTOYJROW) RAWToUVJRow(src_raw, 0, dst_u, dst_v, width); RAWToYJRow(src_raw, dst_y, width); #else @@ -2406,13 +2429,13 @@ int RAWToJ420(const uint8_t* src_raw, ARGBToYJRow(row, dst_y, width); #endif } -#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \ - defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)) +#if !defined(HAS_RAWTOYJROW) free_aligned_buffer_64(row); #endif } return 0; } +#undef HAS_RAWTOYJROW // Convert RGB565 to I420. LIBYUV_API @@ -2507,26 +2530,38 @@ int RGB565ToI420(const uint8_t* src_rgb565, } } #endif -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif #endif { #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ @@ -2666,26 +2701,38 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } } #endif -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif #endif { #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ @@ -2822,26 +2869,38 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, } } #endif -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif #if defined(HAS_ARGBTOYROW_MMI) && defined(HAS_ARGBTOUVROW_MMI) if (TestCpuFlag(kCpuHasMMI)) { ARGBToUVRow = ARGBToUVRow_Any_MMI; |