aboutsummaryrefslogtreecommitdiff
path: root/source/row_neon.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2022-12-05 16:10:38 -0800
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-12-06 19:54:40 +0000
commit610e0cdead3fcd3288693d18eab8c7323805ad9e (patch)
tree6b1b50bd731fdfe47a81a13acf4fbf16f75360b0 /source/row_neon.cc
parentc19943b4d00b1f44cc2158d3332e4450d69353ae (diff)
downloadlibyuv-610e0cdead3fcd3288693d18eab8c7323805ad9e.tar.gz
MT2T Warning fixes for fuchsia
Bug: b/258474032, b/257266635 Change-Id: Ic5cbbc60e2e1463361e359a2fe3e97976c1ea929 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4081348 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Justin Green <greenjustin@google.com>
Diffstat (limited to 'source/row_neon.cc')
-rw-r--r--source/row_neon.cc54
1 files changed, 54 insertions, 0 deletions
diff --git a/source/row_neon.cc b/source/row_neon.cc
index d2815d17..0c6065f8 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -720,6 +720,60 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
}
#endif
+void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) {
+ const uint16_t* src_lower_bits = src;
+ const uint16_t* src_upper_bits = src + 8;
+ asm volatile(
+ "1: \n"
+ "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Load 32 bytes of upper
+ // bits.
+ "vld1.8 {d6}, [%0]! \n" // Load 8 bytes of lower
+ // bits.
+ "vshl.u8 d4, d6, #2 \n" // Align lower bits.
+ "vshl.u8 d2, d6, #4 \n"
+ "vshl.u8 d0, d6, #6 \n"
+ "vzip.u8 d0, d1 \n" // Zip lower and upper
+ // bits together.
+ "vzip.u8 d2, d3 \n"
+ "vzip.u8 d4, d5 \n"
+ "vzip.u8 d6, d7 \n"
+ "vsri.u16 q0, q0, #10 \n" // Copy upper 6 bits into
+ // lower 6 bits for better
+ // accuracy in
+ // conversions.
+ "vsri.u16 q1, q1, #10 \n"
+ "vsri.u16 q2, q2, #10 \n"
+ "vsri.u16 q3, q3, #10 \n"
+ "vst4.16 {d0, d2, d4, d6}, [%2]! \n" // Store 32 pixels
+ "vst4.16 {d1, d3, d5, d7}, [%2]! \n"
+ "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Process last 32 pixels
+ // in the block
+ "vld1.8 {d6}, [%0]! \n"
+ "vshl.u8 d4, d6, #2 \n"
+ "vshl.u8 d2, d6, #4 \n"
+ "vshl.u8 d0, d6, #6 \n"
+ "vzip.u8 d0, d1 \n"
+ "vzip.u8 d2, d3 \n"
+ "vzip.u8 d4, d5 \n"
+ "vzip.u8 d6, d7 \n"
+ "vsri.u16 q0, q0, #10 \n"
+ "vsri.u16 q1, q1, #10 \n"
+ "vsri.u16 q2, q2, #10 \n"
+ "vsri.u16 q3, q3, #10 \n"
+ "vst4.16 {d0, d2, d4, d6}, [%2]! \n"
+ "vst4.16 {d1, d3, d5, d7}, [%2]! \n"
+ "mov %0, %1 \n"
+ "add %1, %0, #16 \n"
+ "subs %3, %3, #80 \n"
+ "bgt 1b \n"
+ : "+r"(src_lower_bits), // %0
+ "+r"(src_upper_bits), // %1
+ "+r"(dst), // %2
+ "+r"(size) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3");
+}
+
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v,