diff options
author | Frank Barchard <fbarchard@google.com> | 2022-11-16 18:02:34 -0800 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-11-17 02:47:57 +0000 |
commit | 2d2cee418a18b9f1bfa6b8037d4f8da095720695 (patch) | |
tree | 049f9ca258653794581a45af7a11a764e7e1ad44 /source/row_neon.cc | |
parent | 6f21862f1b741088b0c2c3ff894af6b82634015c (diff) | |
download | libyuv-2d2cee418a18b9f1bfa6b8037d4f8da095720695.tar.gz |
Add Detile_16 planar function for 10 bit MT2T format
- Neon and SSE2
- Any for odd widths
Pixel 2 little core AArch32 build
C
TestDetilePlane_16 (1275 ms)
TestDetilePlane (1203 ms)
Neon
TestDetilePlane_16 (693 ms)
TestDetilePlane (660 ms)
Bug: b/258474032
Change-Id: Idbd09c5e9324e4deef5f1d54090d4b63cc7db812
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4031848
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/row_neon.cc')
-rw-r--r-- | source/row_neon.cc | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/source/row_neon.cc b/source/row_neon.cc index b777a0e1..d2815d17 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -622,6 +622,26 @@ void DetileRow_NEON(const uint8_t* src, ); } +// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's. +void DetileRow_16_NEON(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width) { + asm volatile( + "1: \n" + "vld1.16 {q0, q1}, [%0], %3 \n" // load 16 pixels + "subs %2, %2, #16 \n" // 16 processed per loop + "pld [%0, #3584] \n" + "vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels + "bgt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"(src_tile_stride * 2) // %3 + : "cc", "memory", "q0", "q1" // Clobber List + ); +} + // Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V. void DetileSplitUVRow_NEON(const uint8_t* src_uv, ptrdiff_t src_tile_stride, |