diff options
author | Frank Barchard <fbarchard@google.com> | 2022-12-05 16:10:38 -0800 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-12-06 19:54:40 +0000 |
commit | 610e0cdead3fcd3288693d18eab8c7323805ad9e (patch) | |
tree | 6b1b50bd731fdfe47a81a13acf4fbf16f75360b0 /source/row_neon64.cc | |
parent | c19943b4d00b1f44cc2158d3332e4450d69353ae (diff) | |
download | libyuv-610e0cdead3fcd3288693d18eab8c7323805ad9e.tar.gz |
MT2T Warning fixes for fuchsia
Bug: b/258474032, b/257266635
Change-Id: Ic5cbbc60e2e1463361e359a2fe3e97976c1ea929
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4081348
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Justin Green <greenjustin@google.com>
Diffstat (limited to 'source/row_neon64.cc')
-rw-r--r-- | source/row_neon64.cc | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 85d1c1b9..e0a4ea19 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -749,6 +749,54 @@ void DetileToYUY2_NEON(const uint8_t* src_y, } #endif +// Unpack MT2T into tiled P010 64 pixels at a time. See +// tinyurl.com/mtk-10bit-video-format for format documentation. +void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) { + const uint16_t* src_lower_bits = src; + const uint16_t* src_upper_bits = src + 8; + asm volatile( + "1: \n" + "ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n" + "ld1 {v7.8b}, [%0], #8 \n" + "shl v6.8b, v7.8b, #2 \n" + "shl v5.8b, v7.8b, #4 \n" + "shl v4.8b, v7.8b, #6 \n" + "zip1 v0.16b, v4.16b, v0.16b \n" + "zip1 v1.16b, v5.16b, v1.16b \n" + "zip1 v2.16b, v6.16b, v2.16b \n" + "zip1 v3.16b, v7.16b, v3.16b \n" + "sri v0.8h, v0.8h, #10 \n" + "sri v1.8h, v1.8h, #10 \n" + "sri v2.8h, v2.8h, #10 \n" + "sri v3.8h, v3.8h, #10 \n" + "st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n" + "ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n" + "ld1 {v7.8b}, [%0], #8 \n" + "shl v6.8b, v7.8b, #2 \n" + "shl v5.8b, v7.8b, #4 \n" + "shl v4.8b, v7.8b, #6 \n" + "zip1 v0.16b, v4.16b, v0.16b \n" + "zip1 v1.16b, v5.16b, v1.16b \n" + "zip1 v2.16b, v6.16b, v2.16b \n" + "zip1 v3.16b, v7.16b, v3.16b \n" + "sri v0.8h, v0.8h, #10 \n" + "sri v1.8h, v1.8h, #10 \n" + "sri v2.8h, v2.8h, #10 \n" + "sri v3.8h, v3.8h, #10 \n" + "st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n" + "mov %0, %1 \n" + "add %1, %0, #16 \n" + "subs %3, %3, #80 \n" + "b.gt 1b \n" + : "+r"(src_lower_bits), // %0 + "+r"(src_upper_bits), // %1 + "+r"(dst), // %2 + "+r"(size) // %3 + : + : "cc", "memory", "w0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12"); +} + #if LIBYUV_USE_ST2 // Reads 16 U's and V's and writes out 16 pairs of UV. void MergeUVRow_NEON(const uint8_t* src_u, |