aboutsummaryrefslogtreecommitdiff
path: root/source/row_neon64.cc
diff options
context:
space:
mode:
authorJustin Green <greenjustin@google.com>2022-02-03 11:46:44 -0500
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-02-03 17:01:49 +0000
commitb4ddbaf549a1bf5572bf703fd2862d1eb7380c6a (patch)
tree9e0a90646de7b1c50f40e3aeb452f749d297561b /source/row_neon64.cc
parent804980bbab748fd0e180cd6e7d9292ff49baf704 (diff)
downloadlibyuv-b4ddbaf549a1bf5572bf703fd2862d1eb7380c6a.tar.gz
Add support for MM21.
Add support for MM21 to NV12 and I420 conversion, and add SIMD optimizations for arm, aarch64, SSE2, and SSSE3 machines. Bug: libyuv:915, b/215425056 Change-Id: Iecb0c33287f35766a6169d4adf3b7397f1ba8b5d Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3433269 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Justin Green <greenjustin@google.com>
Diffstat (limited to 'source/row_neon64.cc')
-rw-r--r--source/row_neon64.cc23
1 files changed, 23 insertions, 0 deletions
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 1d1f9bb1..442e60cd 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -627,6 +627,29 @@ void DetileRow_NEON(const uint8_t* src,
);
}
+// Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
+void DetileSplitUVRow_NEON(const uint8_t* src_uv,
+ ptrdiff_t src_tile_stride,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.8b,v1.8b}, [%0], %4 \n"
+ "subs %w3, %w3, #16 \n"
+ "prfm pldl1keep, [%0, 1792] \n"
+ "st1 {v0.8b}, [%1], #8 \n"
+ "st1 {v1.8b}, [%2], #8 \n"
+ "b.gt 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"(src_tile_stride) // %4
+ : "cc", "memory", "v0", "v1" // Clobber List
+ );
+}
+
#if LIBYUV_USE_ST2
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,