aboutsummaryrefslogtreecommitdiff
path: root/source/convert.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2021-03-23 15:54:02 -0700
committerFrank Barchard <fbarchard@chromium.org>2021-03-23 23:45:54 +0000
commitd8f1bfc9816a9fc76f3a25cc0ee272fb9c07622a (patch)
tree6201f9cab35550653480bc372580d2c5014d074d /source/convert.cc
parentb046131c0bd44ca3a11276194d07b85373cfd608 (diff)
downloadlibyuv-d8f1bfc9816a9fc76f3a25cc0ee272fb9c07622a.tar.gz
Add RAWToJ420
Add J420 output from RAW. Optimize RGB24 and RAW To J420 on ARM by using NEON for the 2 step conversion. Also fix sign-compare warning that was breaking Windows build Bug: libyuv:887, b/183534734 Change-Id: I8c39334552dc0b28414e638708db413d6adf8d6e Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2783382 Reviewed-by: Wan-Teh Chang <wtc@google.com>
Diffstat (limited to 'source/convert.cc')
-rw-r--r--source/convert.cc262
1 files changed, 245 insertions, 17 deletions
diff --git a/source/convert.cc b/source/convert.cc
index 1bd59659..768e0f37 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -1368,6 +1368,18 @@ int ARGBToI420(const uint8_t* src_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
+#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
@@ -1388,22 +1400,6 @@ int ARGBToI420(const uint8_t* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
- }
-#endif
#if defined(HAS_ARGBTOYROW_MMI) && defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
@@ -1771,7 +1767,7 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
// Neon version does direct RGB24 to YUV.
-#if defined(HAS_RGB24TOYROW_NEON)
+#if defined(HAS_RGB24TOYROW_NEON) && defined(HAS_RGB24TOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
RGB24ToYRow = RGB24ToYRow_Any_NEON;
@@ -1808,6 +1804,14 @@ int RGB24ToI420(const uint8_t* src_rgb24,
#endif
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else
+#if defined(HAS_RGB24TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_NEON;
+ }
+ }
+#endif
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
@@ -1816,6 +1820,18 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
@@ -1960,6 +1976,14 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
#endif
#else
+#if defined(HAS_RGB24TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_NEON;
+ }
+ }
+#endif
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
@@ -1968,6 +1992,18 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
}
#endif
+#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYJRow = ARGBToYJRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_NEON;
+ }
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
@@ -2111,6 +2147,26 @@ int RAWToI420(const uint8_t* src_raw,
#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else
+#if defined(HAS_RAWTOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RAWToARGBRow = RAWToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RAWToARGBRow = RAWToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+ }
+#endif
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
@@ -2186,6 +2242,178 @@ int RAWToI420(const uint8_t* src_raw,
return 0;
}
+// TODO(fbarchard): Use Matrix version to implement I420 and J420.
+// Convert RAW to J420.
+LIBYUV_API
+int RAWToJ420(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+#if (defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
+ defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)
+ void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ RAWToUVJRow_C;
+ void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
+ RAWToYJRow_C;
+#else
+ void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
+ RAWToARGBRow_C;
+ void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVJRow_C;
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYJRow_C;
+#endif
+ if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_raw = src_raw + (height - 1) * src_stride_raw;
+ src_stride_raw = -src_stride_raw;
+ }
+
+// Neon version does direct RAW to YUV.
+#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RAWToUVJRow = RAWToUVJRow_Any_NEON;
+ RAWToYJRow = RAWToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RAWToYJRow = RAWToYJRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToUVJRow = RAWToUVJRow_NEON;
+ }
+ }
+ }
+// MMI and MSA version does direct RAW to YUV.
+#elif (defined(HAS_RAWTOYJROW_MMI) || defined(HAS_RAWTOYJROW_MSA))
+#if defined(HAS_RAWTOYJROW_MMI) && defined(HAS_RAWTOUVJROW_MMI)
+ if (TestCpuFlag(kCpuHasMMI)) {
+ RAWToUVJRow = RAWToUVJRow_Any_MMI;
+ RAWToYJRow = RAWToYJRow_Any_MMI;
+ if (IS_ALIGNED(width, 8)) {
+ RAWToYJRow = RAWToYJRow_MMI;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToUVJRow = RAWToUVJRow_MMI;
+ }
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_MSA) && defined(HAS_RAWTOUVJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RAWToUVJRow = RAWToUVJRow_Any_MSA;
+ RAWToYJRow = RAWToYJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToYJRow = RAWToYJRow_MSA;
+ RAWToUVJRow = RAWToUVJRow_MSA;
+ }
+ }
+#endif
+#else
+#if defined(HAS_RAWTOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RAWToARGBRow = RAWToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RAWToARGBRow = RAWToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYJRow = ARGBToYJRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_NEON;
+ }
+ }
+ }
+#endif
+#if defined(HAS_RAWTOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToARGBRow = RAWToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_SSSE3;
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVJRow = ARGBToUVJRow_AVX2;
+ ARGBToYJRow = ARGBToYJRow_AVX2;
+ }
+ }
+#endif
+#endif
+
+ {
+#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
+ defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
+ defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
+ RAWToUVJRow(src_raw, src_stride_raw, dst_u, dst_v, width);
+ RAWToYJRow(src_raw, dst_y, width);
+ RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
+#else
+ RAWToARGBRow(src_raw, row, width);
+ RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
+ ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYJRow(row, dst_y, width);
+ ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_raw += src_stride_raw * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
+ defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
+ RAWToUVJRow(src_raw, 0, dst_u, dst_v, width);
+ RAWToYJRow(src_raw, dst_y, width);
+#else
+ RAWToARGBRow(src_raw, row, width);
+ ARGBToUVJRow(row, 0, dst_u, dst_v, width);
+ ARGBToYJRow(row, dst_y, width);
+#endif
+ }
+#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
+ defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
+ free_aligned_buffer_64(row);
+#endif
+ }
+ return 0;
+}
+
// Convert RGB565 to I420.
LIBYUV_API
int RGB565ToI420(const uint8_t* src_rgb565,