diff options
author | Yuan Tong <tongyuan200097@gmail.com> | 2021-03-13 17:17:02 +0800 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2021-03-13 20:55:21 +0000 |
commit | f37014fcfffd62f00a80a900f016964763c56864 (patch) | |
tree | 45ee1ace11dca893dd46b99d1c7bfed857e9f3af /source/row_neon.cc | |
parent | 19bbedfd3e37329900d34e104021f04d7205ad78 (diff) | |
download | libyuv-f37014fcfffd62f00a80a900f016964763c56864.tar.gz |
Add support for AR64 format
Add following conversions:
ARGB,ABGR <-> AR64,AB64
AR64 <-> AB64
R=fbarchard@chromium.org
Change-Id: I5ca5b40a98bffea11981e136afae4a511ba6c564
Bug: libyuv:886
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2746780
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/row_neon.cc')
-rw-r--r-- | source/row_neon.cc | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/source/row_neon.cc b/source/row_neon.cc index 43a2cac7..5414d1ef 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -2119,6 +2119,105 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); } +static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7, + 10, 9, 8, 11, 14, 13, 12, 15}; + +void ARGBToAR64Row_NEON(const uint8_t* src_argb, + uint16_t* dst_ar64, + int width) { + asm volatile( + "1: \n" + "vld1.8 {q0}, [%0]! \n" + "vld1.8 {q2}, [%0]! \n" + "vmov.u8 q1, q0 \n" + "vmov.u8 q3, q2 \n" + "subs %2, %2, #8 \n" // 8 processed per loop. + "vst2.8 {q0, q1}, [%1]! \n" // store 4 pixels + "vst2.8 {q2, q3}, [%1]! \n" // store 4 pixels + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_ar64), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q3"); +} + +void ARGBToAB64Row_NEON(const uint8_t* src_argb, + uint16_t* dst_ab64, + int width) { + asm volatile( + "vld1.8 q4, %3 \n" // shuffler + "1: \n" + "vld1.8 {q0}, [%0]! \n" + "vld1.8 {q2}, [%0]! \n" + "vtbl.8 d2, {d0, d1}, d8 \n" + "vtbl.8 d3, {d0, d1}, d9 \n" + "vtbl.8 d6, {d4, d5}, d8 \n" + "vtbl.8 d7, {d4, d5}, d9 \n" + "vmov.u8 q0, q1 \n" + "vmov.u8 q2, q3 \n" + "subs %2, %2, #8 \n" // 8 processed per loop. + "vst2.8 {q0, q1}, [%1]! \n" // store 4 pixels + "vst2.8 {q2, q3}, [%1]! \n" // store 4 pixels + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_ab64), // %1 + "+r"(width) // %2 + : "m"(kShuffleARGBToABGR) // %3 + : "cc", "memory", "q0", "q1", "q2", "q3", "q4"); +} + +void AR64ToARGBRow_NEON(const uint16_t* src_ar64, + uint8_t* dst_argb, + int width) { + asm volatile( + "1: \n" + "vld1.16 {q0}, [%0]! \n" + "vld1.16 {q1}, [%0]! \n" + "vld1.16 {q2}, [%0]! \n" + "vld1.16 {q3}, [%0]! \n" + "vshrn.u16 d0, q0, #8 \n" + "vshrn.u16 d1, q1, #8 \n" + "vshrn.u16 d4, q2, #8 \n" + "vshrn.u16 d5, q3, #8 \n" + "subs %2, %2, #8 \n" // 8 processed per loop. + "vst1.8 {q0}, [%1]! \n" // store 4 pixels + "vst1.8 {q2}, [%1]! \n" // store 4 pixels + "bgt 1b \n" + : "+r"(src_ar64), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q3"); +} + +static const uvec8 kShuffleAB64ToARGB = {5, 3, 1, 7, 13, 11, 9, 15}; + +void AB64ToARGBRow_NEON(const uint16_t* src_ab64, + uint8_t* dst_argb, + int width) { + asm volatile( + "vld1.8 d8, %3 \n" // shuffler + "1: \n" + "vld1.16 {q0}, [%0]! \n" + "vld1.16 {q1}, [%0]! \n" + "vld1.16 {q2}, [%0]! \n" + "vld1.16 {q3}, [%0]! \n" + "vtbl.8 d0, {d0, d1}, d8 \n" + "vtbl.8 d1, {d2, d3}, d8 \n" + "vtbl.8 d4, {d4, d5}, d8 \n" + "vtbl.8 d5, {d6, d7}, d8 \n" + "subs %2, %2, #8 \n" // 8 processed per loop. + "vst1.8 {q0}, [%1]! \n" // store 4 pixels + "vst1.8 {q2}, [%1]! \n" // store 4 pixels + "bgt 1b \n" + : "+r"(src_ab64), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleAB64ToARGB) // %3 + : "cc", "memory", "q0", "q1", "q2", "q3", "q4"); +} + void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d6, #25 \n" // B * 0.1016 coefficient |