aboutsummaryrefslogtreecommitdiff
path: root/source/planar_functions.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2022-09-30 15:12:37 -0700
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-09-30 22:41:21 +0000
commit00950840d1c9bcbb3eb6ebc5aac5793e71166c8b (patch)
treea261be18062391f039e4e230ce5714f0059ae845 /source/planar_functions.cc
parent9ba40a8f03673b79d3236e79707723fdf99f76b6 (diff)
downloadlibyuv-00950840d1c9bcbb3eb6ebc5aac5793e71166c8b.tar.gz
YUY2ToNV12 using YUY2ToY and YUY2ToNVUV
- Optimized YUY2ToNV12 that reduces it from 3 steps to 2 steps - Was SplitUV, memcpy Y, InterpolateUV - Now YUY2ToY, YUY2ToNVUV - rollback LIBYUV_UNLIMITED_DATA 3840x2160 1000 iterations: Pixel 2 Cortex A73 Was YUY2ToNV12_Opt (6515 ms) Now YUY2ToNV12_Opt (3350 ms) AB7 Mediatek P35 Cortex A53 Was YUY2ToNV12_Opt (6435 ms) Now YUY2ToNV12_Opt (3301 ms) Skylake AVX2 x64 Was YUY2ToNV12_Opt (1872 ms) Now YUY2ToNV12_Opt (1657 ms) SSE2 x64 Was YUY2ToNV12_Opt (2008 ms) Now YUY2ToNV12_Opt (1691 ms) Windows Skylake AVX2 32 bit x86 Was YUY2ToNV12_Opt (2161 ms) Now YUY2ToNV12_Opt (1628 ms) Bug: libyuv:943 Change-Id: I6c2ba2ae765413426baf770b837de114f808f6d0 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3929843 Reviewed-by: Wan-Teh Chang <wtc@google.com> Reviewed-by: richard winterton <rrwinterton@gmail.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/planar_functions.cc')
-rw-r--r--source/planar_functions.cc116
1 files changed, 42 insertions, 74 deletions
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 45c34d30..67229ee7 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -5095,9 +5095,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
return 0;
}
-// TODO(fbarchard): Consider if width is even Y channel can be split
-// directly. A SplitUVRow_Odd function could copy the remaining chroma.
-
LIBYUV_API
int YUY2ToNV12(const uint8_t* src_yuy2,
int src_stride_yuy2,
@@ -5108,13 +5105,10 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
int width,
int height) {
int y;
- int halfwidth = (width + 1) >> 1;
- void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
- int width) = SplitUVRow_C;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) = InterpolateRow_C;
-
+ void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
+ YUY2ToYRow_C;
+ void (*YUY2ToNVUVRow)(const uint8_t* src_yuy2, int stride_yuy2,
+ uint8_t* dst_uv, int width) = YUY2ToNVUVRow_C;
if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
return -1;
}
@@ -5125,109 +5119,83 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
src_stride_yuy2 = -src_stride_yuy2;
}
-#if defined(HAS_SPLITUVROW_SSE2)
+#if defined(HAS_YUY2TOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- SplitUVRow = SplitUVRow_Any_SSE2;
+ YUY2ToYRow = YUY2ToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_SSE2;
+ YUY2ToYRow = YUY2ToYRow_SSE2;
}
}
#endif
-#if defined(HAS_SPLITUVROW_AVX2)
+#if defined(HAS_YUY2TOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- SplitUVRow = SplitUVRow_Any_AVX2;
+ YUY2ToYRow = YUY2ToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_AVX2;
+ YUY2ToYRow = YUY2ToYRow_AVX2;
}
}
#endif
-#if defined(HAS_SPLITUVROW_NEON)
+#if defined(HAS_YUY2TOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- SplitUVRow = SplitUVRow_Any_NEON;
+ YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_NEON;
+ YUY2ToYRow = YUY2ToYRow_NEON;
}
}
#endif
-#if defined(HAS_SPLITUVROW_MSA)
+#if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
- SplitUVRow = SplitUVRow_Any_MSA;
+ YUY2ToYRow = YUY2ToYRow_Any_MSA;
if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_MSA;
+ YUY2ToYRow = YUY2ToYRow_MSA;
}
}
#endif
-#if defined(HAS_SPLITUVROW_LSX)
- if (TestCpuFlag(kCpuHasLSX)) {
- SplitUVRow = SplitUVRow_Any_LSX;
+#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_LSX;
+ YUY2ToYRow = YUY2ToYRow_LASX;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
+
+#if defined(HAS_YUY2TONVUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
+ YUY2ToNVUVRow = YUY2ToNVUVRow_SSE2;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
+#if defined(HAS_YUY2TONVUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
+ YUY2ToNVUVRow = YUY2ToNVUVRow_AVX2;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
+#if defined(HAS_YUY2TONVUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_MSA)
- if (TestCpuFlag(kCpuHasMSA)) {
- InterpolateRow = InterpolateRow_Any_MSA;
- if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_MSA;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_LSX)
- if (TestCpuFlag(kCpuHasLSX)) {
- InterpolateRow = InterpolateRow_Any_LSX;
- if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_LSX;
+ YUY2ToNVUVRow = YUY2ToNVUVRow_NEON;
}
}
#endif
- {
- int awidth = halfwidth * 2;
- // row of y and 2 rows of uv
- align_buffer_64(rows, awidth * 3);
-
- for (y = 0; y < height - 1; y += 2) {
- // Split Y from UV.
- SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
- memcpy(dst_y, rows, width);
- SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
- memcpy(dst_y + dst_stride_y, rows, width);
- InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
- src_yuy2 += src_stride_yuy2 * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
- }
- if (height & 1) {
- // Split Y from UV.
- SplitUVRow(src_yuy2, rows, dst_uv, awidth);
- memcpy(dst_y, rows, width);
- }
- free_aligned_buffer_64(rows);
+ for (y = 0; y < height - 1; y += 2) {
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width);
+ YUY2ToNVUVRow(src_yuy2, src_stride_yuy2, dst_uv, width);
+ src_yuy2 += src_stride_yuy2 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ YUY2ToNVUVRow(src_yuy2, 0, dst_uv, width);
}
return 0;
}