diff options
author | Frank Barchard <fbarchard@google.com> | 2022-12-05 16:10:38 -0800 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-12-06 19:54:40 +0000 |
commit | 610e0cdead3fcd3288693d18eab8c7323805ad9e (patch) | |
tree | 6b1b50bd731fdfe47a81a13acf4fbf16f75360b0 /source/row_neon.cc | |
parent | c19943b4d00b1f44cc2158d3332e4450d69353ae (diff) | |
download | libyuv-610e0cdead3fcd3288693d18eab8c7323805ad9e.tar.gz |
MT2T Warning fixes for fuchsia
Bug: b/258474032, b/257266635
Change-Id: Ic5cbbc60e2e1463361e359a2fe3e97976c1ea929
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4081348
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Justin Green <greenjustin@google.com>
Diffstat (limited to 'source/row_neon.cc')
-rw-r--r-- | source/row_neon.cc | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/source/row_neon.cc b/source/row_neon.cc index d2815d17..0c6065f8 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -720,6 +720,60 @@ void DetileToYUY2_NEON(const uint8_t* src_y, } #endif +void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) { + const uint16_t* src_lower_bits = src; + const uint16_t* src_upper_bits = src + 8; + asm volatile( + "1: \n" + "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Load 32 bytes of upper + // bits. + "vld1.8 {d6}, [%0]! \n" // Load 8 bytes of lower + // bits. + "vshl.u8 d4, d6, #2 \n" // Align lower bits. + "vshl.u8 d2, d6, #4 \n" + "vshl.u8 d0, d6, #6 \n" + "vzip.u8 d0, d1 \n" // Zip lower and upper + // bits together. + "vzip.u8 d2, d3 \n" + "vzip.u8 d4, d5 \n" + "vzip.u8 d6, d7 \n" + "vsri.u16 q0, q0, #10 \n" // Copy upper 6 bits into + // lower 6 bits for better + // accuracy in + // conversions. + "vsri.u16 q1, q1, #10 \n" + "vsri.u16 q2, q2, #10 \n" + "vsri.u16 q3, q3, #10 \n" + "vst4.16 {d0, d2, d4, d6}, [%2]! \n" // Store 32 pixels + "vst4.16 {d1, d3, d5, d7}, [%2]! \n" + "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Process last 32 pixels + // in the block + "vld1.8 {d6}, [%0]! \n" + "vshl.u8 d4, d6, #2 \n" + "vshl.u8 d2, d6, #4 \n" + "vshl.u8 d0, d6, #6 \n" + "vzip.u8 d0, d1 \n" + "vzip.u8 d2, d3 \n" + "vzip.u8 d4, d5 \n" + "vzip.u8 d6, d7 \n" + "vsri.u16 q0, q0, #10 \n" + "vsri.u16 q1, q1, #10 \n" + "vsri.u16 q2, q2, #10 \n" + "vsri.u16 q3, q3, #10 \n" + "vst4.16 {d0, d2, d4, d6}, [%2]! \n" + "vst4.16 {d1, d3, d5, d7}, [%2]! \n" + "mov %0, %1 \n" + "add %1, %0, #16 \n" + "subs %3, %3, #80 \n" + "bgt 1b \n" + : "+r"(src_lower_bits), // %0 + "+r"(src_upper_bits), // %1 + "+r"(dst), // %2 + "+r"(size) // %3 + : + : "cc", "memory", "q0", "q1", "q2", "q3"); +} + // Reads 16 U's and V's and writes out 16 pairs of UV. void MergeUVRow_NEON(const uint8_t* src_u, const uint8_t* src_v, |