aboutsummaryrefslogtreecommitdiff
path: root/source/row_neon.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2022-11-16 18:02:34 -0800
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-11-17 02:47:57 +0000
commit2d2cee418a18b9f1bfa6b8037d4f8da095720695 (patch)
tree049f9ca258653794581a45af7a11a764e7e1ad44 /source/row_neon.cc
parent6f21862f1b741088b0c2c3ff894af6b82634015c (diff)
downloadlibyuv-2d2cee418a18b9f1bfa6b8037d4f8da095720695.tar.gz
Add Detile_16 planar function for 10 bit MT2T format
- Neon and SSE2 - Any for odd widths Pixel 2 little core AArch32 build C TestDetilePlane_16 (1275 ms) TestDetilePlane (1203 ms) Neon TestDetilePlane_16 (693 ms) TestDetilePlane (660 ms) Bug: b/258474032 Change-Id: Idbd09c5e9324e4deef5f1d54090d4b63cc7db812 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4031848 Reviewed-by: Wan-Teh Chang <wtc@google.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/row_neon.cc')
-rw-r--r--source/row_neon.cc20
1 files changed, 20 insertions, 0 deletions
diff --git a/source/row_neon.cc b/source/row_neon.cc
index b777a0e1..d2815d17 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -622,6 +622,26 @@ void DetileRow_NEON(const uint8_t* src,
);
}
+// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's.
+void DetileRow_16_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.16 {q0, q1}, [%0], %3 \n" // load 16 pixels
+ "subs %2, %2, #16 \n" // 16 processed per loop
+ "pld [%0, #3584] \n"
+ "vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride * 2) // %3
+ : "cc", "memory", "q0", "q1" // Clobber List
+ );
+}
+
// Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
void DetileSplitUVRow_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,