aboutsummaryrefslogtreecommitdiff
path: root/source/convert.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2021-10-19 00:02:50 -0700
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2021-10-19 07:54:50 +0000
commitb179f1847a7cc17957eab399610cb9ef163bb715 (patch)
tree7c34ecfb0598a3b09d69f8e7b9525480a9b4749d /source/convert.cc
parentf0cfc1f1c8a4bf0e9b1e73b6ef87bdfc6e2566ae (diff)
downloadlibyuv-b179f1847a7cc17957eab399610cb9ef163bb715.tar.gz
Enable SIMD for exact RGB to Y conversions
Bug: libyuv:908, b/202888439 Change-Id: Icc5470b85d91b441ded9958ee04b4f32246646f0 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3230489 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Mirko Bonadei <mbonadei@chromium.org>
Diffstat (limited to 'source/convert.cc')
-rw-r--r--source/convert.cc491
1 files changed, 275 insertions, 216 deletions
diff --git a/source/convert.cc b/source/convert.cc
index 69f7fb6e..b5e241e1 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -1368,38 +1368,54 @@ int ARGBToI420(const uint8_t* src_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
-#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
+#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
}
}
#endif
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
+#if defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_MMI) && defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
@@ -1560,26 +1576,38 @@ int ABGRToI420(const uint8_t* src_abgr,
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
-#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
+#if defined(HAS_ABGRTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
ABGRToYRow = ABGRToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ABGRToUVRow = ABGRToUVRow_SSSE3;
ABGRToYRow = ABGRToYRow_SSSE3;
}
}
#endif
-#if defined(HAS_ABGRTOYROW_AVX2) && defined(HAS_ABGRTOUVROW_AVX2)
+#if defined(HAS_ABGRTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVRow = ABGRToUVRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ABGRToUVRow = ABGRToUVRow_Any_AVX2;
ABGRToYRow = ABGRToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- ABGRToUVRow = ABGRToUVRow_AVX2;
ABGRToYRow = ABGRToYRow_AVX2;
}
}
#endif
+#if defined(HAS_ABGRTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ABGRToUVRow = ABGRToUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToUVRow = ABGRToUVRow_AVX2;
+ }
+ }
+#endif
#if defined(HAS_ABGRTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToYRow = ABGRToYRow_Any_NEON;
@@ -1662,16 +1690,22 @@ int RGBAToI420(const uint8_t* src_rgba,
src_rgba = src_rgba + (height - 1) * src_stride_rgba;
src_stride_rgba = -src_stride_rgba;
}
-#if defined(HAS_RGBATOYROW_SSSE3) && defined(HAS_RGBATOUVROW_SSSE3)
+#if defined(HAS_RGBATOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
RGBAToYRow = RGBAToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- RGBAToUVRow = RGBAToUVRow_SSSE3;
RGBAToYRow = RGBAToYRow_SSSE3;
}
}
#endif
+#if defined(HAS_RGBATOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToUVRow = RGBAToUVRow_SSSE3;
+ }
+ }
+#endif
#if defined(HAS_RGBATOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGBAToYRow = RGBAToYRow_Any_NEON;
@@ -1727,6 +1761,12 @@ int RGBAToI420(const uint8_t* src_rgba,
return 0;
}
+// Enabled if 1 pass is available
+#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
+ defined(HAS_RGB24TOYROW_MMI))
+#define HAS_RGB24TOYROW
+#endif
+
// Convert RGB24 to I420.
LIBYUV_API
int RGB24ToI420(const uint8_t* src_rgb24,
@@ -1740,8 +1780,7 @@ int RGB24ToI420(const uint8_t* src_rgb24,
int width,
int height) {
int y;
-#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
- defined(HAS_RGB24TOYROW_MMI))
+#if defined(HAS_RGB24TOYROW)
void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB24ToUVRow_C;
@@ -1766,6 +1805,8 @@ int RGB24ToI420(const uint8_t* src_rgb24,
src_stride_rgb24 = -src_stride_rgb24;
}
+#if defined(HAS_RGB24TOYROW)
+
// Neon version does direct RGB24 to YUV.
#if defined(HAS_RGB24TOYROW_NEON) && defined(HAS_RGB24TOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
@@ -1778,8 +1819,7 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
}
}
-// MMI and MSA version does direct RGB24 to YUV.
-#elif (defined(HAS_RGB24TOYROW_MMI) || defined(HAS_RGB24TOYROW_MSA))
+#endif
#if defined(HAS_RGB24TOYROW_MMI) && defined(HAS_RGB24TOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGB24ToUVRow = RGB24ToUVRow_Any_MMI;
@@ -1802,16 +1842,10 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
}
#endif
+
// Other platforms do intermediate conversion from RGB24 to ARGB.
-#else
-#if defined(HAS_RGB24TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGB24ToARGBRow = RGB24ToARGBRow_NEON;
- }
- }
-#endif
+#else // HAS_RGB24TOYROW
+
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
@@ -1820,51 +1854,49 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
+#if defined(HAS_ARGBTOYROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
- ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+#if defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
- ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
- ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
-#endif
+#endif // HAS_RGB24TOYROW
{
-#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
- defined(HAS_RGB24TOYROW_MMI))
+#if !defined(HAS_RGB24TOYROW)
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
-#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
- defined(HAS_RGB24TOYROW_MMI))
+#if defined(HAS_RGB24TOYROW)
RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
RGB24ToYRow(src_rgb24, dst_y, width);
RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
@@ -1881,8 +1913,7 @@ int RGB24ToI420(const uint8_t* src_rgb24,
dst_v += dst_stride_v;
}
if (height & 1) {
-#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
- defined(HAS_RGB24TOYROW_MMI))
+#if defined(HAS_RGB24TOYROW)
RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
RGB24ToYRow(src_rgb24, dst_y, width);
#else
@@ -1891,15 +1922,20 @@ int RGB24ToI420(const uint8_t* src_rgb24,
ARGBToYRow(row, dst_y, width);
#endif
}
-#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
- defined(HAS_RGB24TOYROW_MMI))
+#if !defined(HAS_RGB24TOYROW)
free_aligned_buffer_64(row);
#endif
}
return 0;
}
+#undef HAS_RGB24TOYROW
+
+// Enabled if 1 pass is available
+#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_MMI))
+#define HAS_RGB24TOYJROW
+#endif
-// TODO(fbarchard): Use Matrix version to implement I420 and J420.
// Convert RGB24 to J420.
LIBYUV_API
int RGB24ToJ420(const uint8_t* src_rgb24,
@@ -1913,10 +1949,9 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
int width,
int height) {
int y;
-#if (defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
- defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI)
+#if defined(HAS_RGB24TOYJROW)
void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB24ToUVJRow_C;
void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
RGB24ToYJRow_C;
@@ -1924,7 +1959,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RGB24ToARGBRow_C;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
@@ -1939,6 +1974,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
src_stride_rgb24 = -src_stride_rgb24;
}
+#if defined(HAS_RGB24TOYJROW)
+
// Neon version does direct RGB24 to YUV.
#if defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
@@ -1951,8 +1988,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
}
}
-// MMI and MSA version does direct RGB24 to YUV.
-#elif (defined(HAS_RGB24TOYJROW_MMI) || defined(HAS_RGB24TOYJROW_MSA))
+#endif
#if defined(HAS_RGB24TOYJROW_MMI) && defined(HAS_RGB24TOUVJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGB24ToUVJRow = RGB24ToUVJRow_Any_MMI;
@@ -1975,15 +2011,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
}
#endif
-#else
-#if defined(HAS_RGB24TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGB24ToARGBRow = RGB24ToARGBRow_NEON;
- }
- }
-#endif
+
+// Other platforms do intermediate conversion from RGB24 to ARGB.
+#else // HAS_RGB24TOYJROW
+
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
@@ -1992,51 +2023,49 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_NEON;
- }
+#if defined(HAS_ARGBTOYJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
+#if defined(HAS_ARGBTOYJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYJRow = ARGBToYJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
- ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
- ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
+#if defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
- ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVJRow = ARGBToUVJRow_AVX2;
- ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
-#endif
+#endif // HAS_RGB24TOYJROW
{
-#if !((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
- defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
+#if !defined(HAS_RGB24TOYJROW)
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
-#if ((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
- defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
+#if defined(HAS_RGB24TOYJROW)
RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
RGB24ToYJRow(src_rgb24, dst_y, width);
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
@@ -2053,8 +2082,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
dst_v += dst_stride_v;
}
if (height & 1) {
-#if ((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
- defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
+#if defined(HAS_RGB24TOYJROW)
RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width);
RGB24ToYJRow(src_rgb24, dst_y, width);
#else
@@ -2063,31 +2091,37 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
ARGBToYJRow(row, dst_y, width);
#endif
}
-#if !((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
- defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
+#if !defined(HAS_RGB24TOYJROW)
free_aligned_buffer_64(row);
#endif
}
return 0;
}
+#undef HAS_RGB24TOYJROW
+
+// Enabled if 1 pass is available
+#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
+ defined(HAS_RAWTOYROW_MMI))
+#define HAS_RAWTOYROW
+#endif
// Convert RAW to I420.
LIBYUV_API
int RAWToI420(const uint8_t* src_raw,
- int src_stride_raw,
- uint8_t* dst_y,
- int dst_stride_y,
- uint8_t* dst_u,
- int dst_stride_u,
- uint8_t* dst_v,
- int dst_stride_v,
- int width,
- int height) {
+ int src_stride_raw,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
int y;
-#if (defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)) || \
- defined(HAS_RAWTOYROW_MSA) || defined(HAS_RAWTOYROW_MMI)
- void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u,
- uint8_t* dst_v, int width) = RAWToUVRow_C;
+#if defined(HAS_RAWTOYROW)
+ void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ RAWToUVRow_C;
void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
RAWToYRow_C;
#else
@@ -2109,6 +2143,8 @@ int RAWToI420(const uint8_t* src_raw,
src_stride_raw = -src_stride_raw;
}
+#if defined(HAS_RAWTOYROW)
+
// Neon version does direct RAW to YUV.
#if defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
@@ -2121,8 +2157,7 @@ int RAWToI420(const uint8_t* src_raw,
}
}
}
-// MMI and MSA version does direct RAW to YUV.
-#elif (defined(HAS_RAWTOYROW_MMI) || defined(HAS_RAWTOYROW_MSA))
+#endif
#if defined(HAS_RAWTOYROW_MMI) && defined(HAS_RAWTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RAWToUVRow = RAWToUVRow_Any_MMI;
@@ -2145,28 +2180,10 @@ int RAWToI420(const uint8_t* src_raw,
}
}
#endif
+
// Other platforms do intermediate conversion from RAW to ARGB.
-#else
-#if defined(HAS_RAWTOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RAWToARGBRow = RAWToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RAWToARGBRow = RAWToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
- }
- }
-#endif
+#else // HAS_RAWTOYROW
+
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
@@ -2175,39 +2192,49 @@ int RAWToI420(const uint8_t* src_raw,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ }
+ }
#endif
+#endif // HAS_RAWTOYROW
{
-#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
- defined(HAS_RAWTOYROW_MMI))
+#if !defined(HAS_RAWTOYROW)
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
-#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
- defined(HAS_RAWTOYROW_MMI))
+#if defined(HAS_RAWTOYROW)
RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width);
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
@@ -2224,8 +2251,7 @@ int RAWToI420(const uint8_t* src_raw,
dst_v += dst_stride_v;
}
if (height & 1) {
-#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
- defined(HAS_RAWTOYROW_MMI))
+#if defined(HAS_RAWTOYROW)
RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width);
#else
@@ -2234,32 +2260,36 @@ int RAWToI420(const uint8_t* src_raw,
ARGBToYRow(row, dst_y, width);
#endif
}
-#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
- defined(HAS_RAWTOYROW_MMI))
+#if !defined(HAS_RAWTOYROW)
free_aligned_buffer_64(row);
#endif
}
return 0;
}
+#undef HAS_RAWTOYROW
+
+// Enabled if 1 pass is available
+#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
+ defined(HAS_RAWTOYJROW_MMI))
+#define HAS_RAWTOYJROW
+#endif
-// TODO(fbarchard): Use Matrix version to implement I420 and J420.
// Convert RAW to J420.
LIBYUV_API
int RAWToJ420(const uint8_t* src_raw,
- int src_stride_raw,
- uint8_t* dst_y,
- int dst_stride_y,
- uint8_t* dst_u,
- int dst_stride_u,
- uint8_t* dst_v,
- int dst_stride_v,
- int width,
- int height) {
+ int src_stride_raw,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
int y;
-#if (defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
- defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)
+#if defined(HAS_RAWTOYJROW)
void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
RAWToUVJRow_C;
void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
RAWToYJRow_C;
@@ -2267,7 +2297,7 @@ int RAWToJ420(const uint8_t* src_raw,
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
@@ -2282,6 +2312,8 @@ int RAWToJ420(const uint8_t* src_raw,
src_stride_raw = -src_stride_raw;
}
+#if defined(HAS_RAWTOYJROW)
+
// Neon version does direct RAW to YUV.
#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
@@ -2294,8 +2326,7 @@ int RAWToJ420(const uint8_t* src_raw,
}
}
}
-// MMI and MSA version does direct RAW to YUV.
-#elif (defined(HAS_RAWTOYJROW_MMI) || defined(HAS_RAWTOYJROW_MSA))
+#endif
#if defined(HAS_RAWTOYJROW_MMI) && defined(HAS_RAWTOUVJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RAWToUVJRow = RAWToUVJRow_Any_MMI;
@@ -2318,27 +2349,10 @@ int RAWToJ420(const uint8_t* src_raw,
}
}
#endif
-#else
-#if defined(HAS_RAWTOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RAWToARGBRow = RAWToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RAWToARGBRow = RAWToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_NEON;
- }
- }
- }
-#endif
+
+// Other platforms do intermediate conversion from RAW to ARGB.
+#else // HAS_RAWTOYJROW
+
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
@@ -2347,39 +2361,49 @@ int RAWToJ420(const uint8_t* src_raw,
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
+#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_SSSE3;
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
+#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- ARGBToUVJRow = ARGBToUVJRow_AVX2;
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
+#if defined(HAS_ARGBTOUVJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_SSSE3;
+ }
+ }
#endif
+#if defined(HAS_ARGBTOUVJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVJRow = ARGBToUVJRow_AVX2;
+ }
+ }
+#endif
+#endif // HAS_RAWTOYJROW
{
-#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
- defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
+#if !defined(HAS_RAWTOYJROW)
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
-#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
- defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
+#if defined(HAS_RAWTOYJROW)
RAWToUVJRow(src_raw, src_stride_raw, dst_u, dst_v, width);
RAWToYJRow(src_raw, dst_y, width);
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
@@ -2396,8 +2420,7 @@ int RAWToJ420(const uint8_t* src_raw,
dst_v += dst_stride_v;
}
if (height & 1) {
-#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
- defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
+#if defined(HAS_RAWTOYJROW)
RAWToUVJRow(src_raw, 0, dst_u, dst_v, width);
RAWToYJRow(src_raw, dst_y, width);
#else
@@ -2406,13 +2429,13 @@ int RAWToJ420(const uint8_t* src_raw,
ARGBToYJRow(row, dst_y, width);
#endif
}
-#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
- defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
+#if !defined(HAS_RAWTOYJROW)
free_aligned_buffer_64(row);
#endif
}
return 0;
}
+#undef HAS_RAWTOYJROW
// Convert RGB565 to I420.
LIBYUV_API
@@ -2507,26 +2530,38 @@ int RGB565ToI420(const uint8_t* src_rgb565,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
+#if defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ }
+ }
+#endif
#endif
{
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
@@ -2666,26 +2701,38 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
+#if defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ }
+ }
+#endif
#endif
{
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
@@ -2822,26 +2869,38 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
+#if defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_MMI) && defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;