aboutsummaryrefslogtreecommitdiff
path: root/source/row_neon.cc
diff options
context:
space:
mode:
authorYuan Tong <tongyuan200097@gmail.com>2021-03-13 17:17:02 +0800
committerFrank Barchard <fbarchard@chromium.org>2021-03-13 20:55:21 +0000
commitf37014fcfffd62f00a80a900f016964763c56864 (patch)
tree45ee1ace11dca893dd46b99d1c7bfed857e9f3af /source/row_neon.cc
parent19bbedfd3e37329900d34e104021f04d7205ad78 (diff)
downloadlibyuv-f37014fcfffd62f00a80a900f016964763c56864.tar.gz
Add support for AR64 format
Add following conversions: ARGB,ABGR <-> AR64,AB64 AR64 <-> AB64 R=fbarchard@chromium.org Change-Id: I5ca5b40a98bffea11981e136afae4a511ba6c564 Bug: libyuv:886 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2746780 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/row_neon.cc')
-rw-r--r--source/row_neon.cc99
1 files changed, 99 insertions, 0 deletions
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 43a2cac7..5414d1ef 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -2119,6 +2119,105 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
}
+static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7,
+ 10, 9, 8, 11, 14, 13, 12, 15};
+
+void ARGBToAR64Row_NEON(const uint8_t* src_argb,
+ uint16_t* dst_ar64,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n"
+ "vld1.8 {q2}, [%0]! \n"
+ "vmov.u8 q1, q0 \n"
+ "vmov.u8 q3, q2 \n"
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vst2.8 {q0, q1}, [%1]! \n" // store 4 pixels
+ "vst2.8 {q2, q3}, [%1]! \n" // store 4 pixels
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_ar64), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3");
+}
+
+void ARGBToAB64Row_NEON(const uint8_t* src_argb,
+ uint16_t* dst_ab64,
+ int width) {
+ asm volatile(
+ "vld1.8 q4, %3 \n" // shuffler
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n"
+ "vld1.8 {q2}, [%0]! \n"
+ "vtbl.8 d2, {d0, d1}, d8 \n"
+ "vtbl.8 d3, {d0, d1}, d9 \n"
+ "vtbl.8 d6, {d4, d5}, d8 \n"
+ "vtbl.8 d7, {d4, d5}, d9 \n"
+ "vmov.u8 q0, q1 \n"
+ "vmov.u8 q2, q3 \n"
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vst2.8 {q0, q1}, [%1]! \n" // store 4 pixels
+ "vst2.8 {q2, q3}, [%1]! \n" // store 4 pixels
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_ab64), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleARGBToABGR) // %3
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4");
+}
+
+void AR64ToARGBRow_NEON(const uint16_t* src_ar64,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.16 {q0}, [%0]! \n"
+ "vld1.16 {q1}, [%0]! \n"
+ "vld1.16 {q2}, [%0]! \n"
+ "vld1.16 {q3}, [%0]! \n"
+ "vshrn.u16 d0, q0, #8 \n"
+ "vshrn.u16 d1, q1, #8 \n"
+ "vshrn.u16 d4, q2, #8 \n"
+ "vshrn.u16 d5, q3, #8 \n"
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vst1.8 {q0}, [%1]! \n" // store 4 pixels
+ "vst1.8 {q2}, [%1]! \n" // store 4 pixels
+ "bgt 1b \n"
+ : "+r"(src_ar64), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3");
+}
+
+static const uvec8 kShuffleAB64ToARGB = {5, 3, 1, 7, 13, 11, 9, 15};
+
+void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "vld1.8 d8, %3 \n" // shuffler
+ "1: \n"
+ "vld1.16 {q0}, [%0]! \n"
+ "vld1.16 {q1}, [%0]! \n"
+ "vld1.16 {q2}, [%0]! \n"
+ "vld1.16 {q3}, [%0]! \n"
+ "vtbl.8 d0, {d0, d1}, d8 \n"
+ "vtbl.8 d1, {d2, d3}, d8 \n"
+ "vtbl.8 d4, {d4, d5}, d8 \n"
+ "vtbl.8 d5, {d6, d7}, d8 \n"
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vst1.8 {q0}, [%1]! \n" // store 4 pixels
+ "vst1.8 {q2}, [%1]! \n" // store 4 pixels
+ "bgt 1b \n"
+ : "+r"(src_ab64), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleAB64ToARGB) // %3
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4");
+}
+
void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d6, #25 \n" // B * 0.1016 coefficient