aboutsummaryrefslogtreecommitdiff
path: root/source/row_neon.cc
diff options
context:
space:
mode:
authorYuan Tong <tongyuan200097@gmail.com>2021-02-25 15:21:28 +0800
committerFrank Barchard <fbarchard@chromium.org>2021-02-25 23:16:54 +0000
commita8c181050c202854ae32433164e6bd5d1e7c4368 (patch)
treec300dbf9bfa59d0dc2772c311b0dfd154e08d54a /source/row_neon.cc
parent08815a29766a78398a8e2b9ed095280e9d0a73c2 (diff)
downloadlibyuv-a8c181050c202854ae32433164e6bd5d1e7c4368.tar.gz
Add 10/12 bit YUV To YUV functions
The following functions (and their 12 bit variant) are added: planar, 10->10: I410ToI010, I210ToI010 planar, 10->8: I410ToI444, I210ToI422 planar<->biplanar, 10->10: I010ToP010, I210ToP210, I410ToP410 P010ToI010, P210ToI210, P410ToI410 R=fbarchard@chromium.org Change-Id: I9aa2bafa0d6a6e1e38ce4e20cbb437e10f9b0158 Bug: libyuv:834, libyuv:873 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2709822 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com>
Diffstat (limited to 'source/row_neon.cc')
-rw-r--r--source/row_neon.cc115
1 files changed, 115 insertions, 0 deletions
diff --git a/source/row_neon.cc b/source/row_neon.cc
index e54cb12b..43a2cac7 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -3166,6 +3166,121 @@ void HalfMergeUVRow_NEON(const uint8_t* src_u,
: "cc", "memory", "q0", "q1", "q2", "q3");
}
+void SplitUVRow_16_NEON(const uint16_t* src_uv,
+ uint16_t* dst_u,
+ uint16_t* dst_v,
+ int depth,
+ int width) {
+ asm volatile(
+ "vdup.32 q0, %3 \n"
+ "1: \n"
+ "vld2.16 {q1, q2}, [%0]! \n" // load 8 UV
+ "vmovl.u16 q3, d2 \n"
+ "vmovl.u16 q4, d3 \n"
+ "vshl.u32 q3, q3, q0 \n"
+ "vshl.u32 q4, q4, q0 \n"
+ "vmovn.u32 d2, q3 \n"
+ "vmovn.u32 d3, q4 \n"
+ "vmovl.u16 q3, d4 \n"
+ "vmovl.u16 q4, d5 \n"
+ "vshl.u32 q3, q3, q0 \n"
+ "vshl.u32 q4, q4, q0 \n"
+ "vmovn.u32 d4, q3 \n"
+ "vmovn.u32 d5, q4 \n"
+ "subs %4, %4, #8 \n" // 8 src pixels per loop
+ "vst1.16 {q1}, [%1]! \n" // store 8 U pixels
+ "vst1.16 {q2}, [%2]! \n" // store 8 V pixels
+ "bgt 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(depth), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4");
+}
+
+void MergeUVRow_16_NEON(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
+ int depth,
+ int width) {
+ int shift = 16 - depth;
+ asm volatile(
+ "vdup.16 q2, %3 \n"
+ "1: \n"
+ "vld1.16 {q0}, [%0]! \n" // load 8 U
+ "vld1.16 {q1}, [%1]! \n" // load 8 V
+ "vshl.u16 q0, q0, q2 \n"
+ "vshl.u16 q1, q1, q2 \n"
+ "subs %4, %4, #8 \n" // 8 src pixels per loop
+ "vst2.16 {q0, q1}, [%2]! \n" // store 8 UV pixels
+ "bgt 1b \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(shift), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2");
+}
+
+void MultiplyRow_16_NEON(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ asm volatile(
+ "vdup.16 q2, %2 \n"
+ "1: \n"
+ "vld1.16 {q0}, [%0]! \n"
+ "vld1.16 {q1}, [%0]! \n"
+ "vmul.u16 q0, q0, q2 \n"
+ "vmul.u16 q1, q1, q2 \n"
+ "vst1.16 {q0}, [%1]! \n"
+ "vst1.16 {q1}, [%1]! \n"
+ "subs %3, %3, #16 \n" // 16 src pixels per loop
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(scale), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2");
+}
+
+void DivideRow_16_NEON(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ asm volatile(
+ "vdup.16 q0, %2 \n"
+ "1: \n"
+ "vld1.16 {q1}, [%0]! \n"
+ "vld1.16 {q2}, [%0]! \n"
+ "vmovl.u16 q3, d2 \n"
+ "vmovl.u16 q1, d3 \n"
+ "vmovl.u16 q4, d4 \n"
+ "vmovl.u16 q2, d5 \n"
+ "vshl.u32 q3, q3, q0 \n"
+ "vshl.u32 q4, q4, q0 \n"
+ "vshl.u32 q1, q1, q0 \n"
+ "vshl.u32 q2, q2, q0 \n"
+ "vmovn.u32 d2, q3 \n"
+ "vmovn.u32 d3, q1 \n"
+ "vmovn.u32 d4, q4 \n"
+ "vmovn.u32 d5, q2 \n"
+ "vst1.16 {q1}, [%1]! \n"
+ "vst1.16 {q2}, [%1]! \n"
+ "subs %3, %3, #16 \n" // 16 src pixels per loop
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(scale), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4");
+}
+
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)..
#ifdef __cplusplus