aboutsummaryrefslogtreecommitdiff
path: root/source/row_neon.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2021-10-11 11:47:12 -0700
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2021-10-12 18:17:50 +0000
commitdaf9778a24a138cf7578b1ddf70ca867c2882c2c (patch)
tree406392bd8720f6b2780c232232b8fb79f14257cb /source/row_neon.cc
parentd13d9d5972ec99e9f923ec5ca2afb8c1d21b8e5a (diff)
downloadlibyuv-daf9778a24a138cf7578b1ddf70ca867c2882c2c.tar.gz
Fix for failed compile with armv-7a neon gcc
Bug: libyuv:907 Change-Id: I955e83c72b57ce5ba45730030b32f337be610a21 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3216739 Reviewed-by: Mirko Bonadei <mbonadei@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/row_neon.cc')
-rw-r--r--source/row_neon.cc39
1 files changed, 21 insertions, 18 deletions
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 6ef6f1c4..03ad8302 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -1304,16 +1304,17 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
int width) {
asm volatile(
"1: \n"
- "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of
- // RGB24.
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 16 pixels of ARGB.
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n"
+ "subs %2, %2, #16 \n" // 16 processed per loop.
+ "vst3.8 {d0, d2, d4}, [%1]! \n" // store 16 RGB24 pixels.
+ "vst3.8 {d1, d3, d5}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_rgb24), // %1
"+r"(width) // %2
:
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
);
}
@@ -2319,9 +2320,6 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
}
-static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7,
- 10, 9, 8, 11, 14, 13, 12, 15};
-
void ARGBToAR64Row_NEON(const uint8_t* src_argb,
uint16_t* dst_ar64,
int width) {
@@ -2342,11 +2340,15 @@ void ARGBToAR64Row_NEON(const uint8_t* src_argb,
: "cc", "memory", "q0", "q1", "q2", "q3");
}
+static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7,
+ 10, 9, 8, 11, 14, 13, 12, 15};
+
void ARGBToAB64Row_NEON(const uint8_t* src_argb,
uint16_t* dst_ab64,
int width) {
asm volatile(
- "vld1.8 q4, %3 \n" // shuffler
+ "vld1.8 {q4}, [%3] \n" // shuffler
+
"1: \n"
"vld1.8 {q0}, [%0]! \n"
"vld1.8 {q2}, [%0]! \n"
@@ -2360,10 +2362,10 @@ void ARGBToAB64Row_NEON(const uint8_t* src_argb,
"vst2.8 {q0, q1}, [%1]! \n" // store 4 pixels
"vst2.8 {q2, q3}, [%1]! \n" // store 4 pixels
"bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_ab64), // %1
- "+r"(width) // %2
- : "m"(kShuffleARGBToABGR) // %3
+ : "+r"(src_argb), // %0
+ "+r"(dst_ab64), // %1
+ "+r"(width) // %2
+ : "r"(&kShuffleARGBToABGR) // %3
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
}
@@ -2397,7 +2399,8 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
uint8_t* dst_argb,
int width) {
asm volatile(
- "vld1.8 d8, %3 \n" // shuffler
+ "vld1.8 {d8}, [%3] \n" // shuffler
+
"1: \n"
"vld1.16 {q0}, [%0]! \n"
"vld1.16 {q1}, [%0]! \n"
@@ -2411,10 +2414,10 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
"vst1.8 {q0}, [%1]! \n" // store 4 pixels
"vst1.8 {q2}, [%1]! \n" // store 4 pixels
"bgt 1b \n"
- : "+r"(src_ab64), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kShuffleAB64ToARGB) // %3
+ : "+r"(src_ab64), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(&kShuffleAB64ToARGB) // %3
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
}