From daf9778a24a138cf7578b1ddf70ca867c2882c2c Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Mon, 11 Oct 2021 11:47:12 -0700 Subject: Fix for failed compile with armv-7a neon gcc Bug: libyuv:907 Change-Id: I955e83c72b57ce5ba45730030b32f337be610a21 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3216739 Reviewed-by: Mirko Bonadei Commit-Queue: Frank Barchard --- source/row_neon.cc | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'source/row_neon.cc') diff --git a/source/row_neon.cc b/source/row_neon.cc index 6ef6f1c4..03ad8302 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1304,16 +1304,17 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb, int width) { asm volatile( "1: \n" - "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of - // RGB24. + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 16 pixels of ARGB. + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" + "subs %2, %2, #16 \n" // 16 processed per loop. + "vst3.8 {d0, d2, d4}, [%1]! \n" // store 16 RGB24 pixels. + "vst3.8 {d1, d3, d5}, [%1]! \n" "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_rgb24), // %1 "+r"(width) // %2 : - : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List + : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List ); } @@ -2319,9 +2320,6 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); } -static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7, - 10, 9, 8, 11, 14, 13, 12, 15}; - void ARGBToAR64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ar64, int width) { @@ -2342,11 +2340,15 @@ void ARGBToAR64Row_NEON(const uint8_t* src_argb, : "cc", "memory", "q0", "q1", "q2", "q3"); } +static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7, + 10, 9, 8, 11, 14, 13, 12, 15}; + void ARGBToAB64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ab64, int width) { asm volatile( - "vld1.8 q4, %3 \n" // shuffler + "vld1.8 {q4}, [%3] \n" // shuffler + "1: \n" "vld1.8 {q0}, [%0]! \n" "vld1.8 {q2}, [%0]! \n" @@ -2360,10 +2362,10 @@ void ARGBToAB64Row_NEON(const uint8_t* src_argb, "vst2.8 {q0, q1}, [%1]! \n" // store 4 pixels "vst2.8 {q2, q3}, [%1]! \n" // store 4 pixels "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_ab64), // %1 - "+r"(width) // %2 - : "m"(kShuffleARGBToABGR) // %3 + : "+r"(src_argb), // %0 + "+r"(dst_ab64), // %1 + "+r"(width) // %2 + : "r"(&kShuffleARGBToABGR) // %3 : "cc", "memory", "q0", "q1", "q2", "q3", "q4"); } @@ -2397,7 +2399,8 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { asm volatile( - "vld1.8 d8, %3 \n" // shuffler + "vld1.8 {d8}, [%3] \n" // shuffler + "1: \n" "vld1.16 {q0}, [%0]! \n" "vld1.16 {q1}, [%0]! \n" @@ -2411,10 +2414,10 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64, "vst1.8 {q0}, [%1]! \n" // store 4 pixels "vst1.8 {q2}, [%1]! \n" // store 4 pixels "bgt 1b \n" - : "+r"(src_ab64), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAB64ToARGB) // %3 + : "+r"(src_ab64), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(&kShuffleAB64ToARGB) // %3 : "cc", "memory", "q0", "q1", "q2", "q3", "q4"); } -- cgit v1.2.3