diff options
author | Frank Barchard <fbarchard@google.com> | 2021-03-23 15:54:02 -0700 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2021-03-23 23:45:54 +0000 |
commit | d8f1bfc9816a9fc76f3a25cc0ee272fb9c07622a (patch) | |
tree | 6201f9cab35550653480bc372580d2c5014d074d /source/row_neon64.cc | |
parent | b046131c0bd44ca3a11276194d07b85373cfd608 (diff) | |
download | libyuv-d8f1bfc9816a9fc76f3a25cc0ee272fb9c07622a.tar.gz |
Add RAWToJ420
Add J420 output from RAW.
Optimize RGB24 and RAW To J420 on ARM by using NEON for the 2 step conversion.
Also fix sign-compare warning that was breaking Windows build
Bug: libyuv:887, b/183534734
Change-Id: I8c39334552dc0b28414e638708db413d6adf8d6e
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2783382
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Diffstat (limited to 'source/row_neon64.cc')
-rw-r--r-- | source/row_neon64.cc | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 9662cd3c..3281e90f 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -1628,10 +1628,10 @@ void AR64ToARGBRow_NEON(const uint16_t* src_ar64, "subs %w2, %w2, #8 \n" // 8 processed per loop. "stp q0, q2, [%1], #32 \n" // store 8 pixels "b.gt 1b \n" - : "+r"(src_ar64), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAR64ToARGB) // %3 + : "+r"(src_ar64), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleAR64ToARGB) // %3 : "cc", "memory", "v0", "v1", "v2", "v3", "v4"); } @@ -2506,9 +2506,9 @@ void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) { "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels. "prfm pldl1keep, [%0, 448] \n" "subs %w2, %w2, #8 \n" // 8 processed per loop. - "umull v0.8h, v0.8b, v4.8b \n" // B + "umull v0.8h, v0.8b, v4.8b \n" // R "umlal v0.8h, v1.8b, v5.8b \n" // G - "umlal v0.8h, v2.8b, v6.8b \n" // R + "umlal v0.8h, v2.8b, v6.8b \n" // B "uqrshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit Y "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" |