aboutsummaryrefslogtreecommitdiff
path: root/source/row_neon64.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2021-03-23 15:54:02 -0700
committerFrank Barchard <fbarchard@chromium.org>2021-03-23 23:45:54 +0000
commitd8f1bfc9816a9fc76f3a25cc0ee272fb9c07622a (patch)
tree6201f9cab35550653480bc372580d2c5014d074d /source/row_neon64.cc
parentb046131c0bd44ca3a11276194d07b85373cfd608 (diff)
downloadlibyuv-d8f1bfc9816a9fc76f3a25cc0ee272fb9c07622a.tar.gz
Add RAWToJ420
Add J420 output from RAW. Optimize RGB24 and RAW To J420 on ARM by using NEON for the 2 step conversion. Also fix sign-compare warning that was breaking Windows build Bug: libyuv:887, b/183534734 Change-Id: I8c39334552dc0b28414e638708db413d6adf8d6e Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2783382 Reviewed-by: Wan-Teh Chang <wtc@google.com>
Diffstat (limited to 'source/row_neon64.cc')
-rw-r--r--source/row_neon64.cc12
1 files changed, 6 insertions, 6 deletions
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 9662cd3c..3281e90f 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -1628,10 +1628,10 @@ void AR64ToARGBRow_NEON(const uint16_t* src_ar64,
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"stp q0, q2, [%1], #32 \n" // store 8 pixels
"b.gt 1b \n"
- : "+r"(src_ar64), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kShuffleAR64ToARGB) // %3
+ : "+r"(src_ar64), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleAR64ToARGB) // %3
: "cc", "memory", "v0", "v1", "v2", "v3", "v4");
}
@@ -2506,9 +2506,9 @@ void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
"prfm pldl1keep, [%0, 448] \n"
"subs %w2, %w2, #8 \n" // 8 processed per loop.
- "umull v0.8h, v0.8b, v4.8b \n" // B
+ "umull v0.8h, v0.8b, v4.8b \n" // R
"umlal v0.8h, v1.8b, v5.8b \n" // G
- "umlal v0.8h, v2.8b, v6.8b \n" // R
+ "umlal v0.8h, v2.8b, v6.8b \n" // B
"uqrshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit Y
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
"b.gt 1b \n"