From 1a971f8cc3513766f3497ed73e613217b860935d Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Fri, 24 Mar 2023 02:18:55 -0700 Subject: clang 17 -flto-thin bug fix for Neon YUVtoRGB and ARGBToRGB565Dither - YUV to RGB AArch32 kRGBCoeffBias rewind pointer - ARGBToRGB565Dither declare width and source pointers as modified Bug: chromium:1424089 Change-Id: I987180652331bab16ce27d8d166399a687ee890e Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4370099 Reviewed-by: Mirko Bonadei Commit-Queue: Frank Barchard --- source/row_neon.cc | 25 +++++++++++++------------ source/row_neon64.cc | 20 ++++++++++---------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/source/row_neon.cc b/source/row_neon.cc index b3e800d9..bd45082f 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -91,10 +91,11 @@ extern "C" { #define YUVTORGB_SETUP \ "vld4.8 {d26[], d27[], d28[], d29[]}, [%[kUVCoeff]] \n" \ - "vld1.16 {d31[]}, [%[kRGBCoeffBias]]! \n" \ - "vld1.16 {d20[], d21[]}, [%[kRGBCoeffBias]]! \n" \ - "vld1.16 {d22[], d23[]}, [%[kRGBCoeffBias]]! \n" \ - "vld1.16 {d24[], d25[]}, [%[kRGBCoeffBias]] \n" + "vld1.16 {d31[]}, [%[kRGBCoeffBias]]! \n" \ + "vld1.16 {d20[], d21[]}, [%[kRGBCoeffBias]]! \n" \ + "vld1.16 {d22[], d23[]}, [%[kRGBCoeffBias]]! \n" \ + "vld1.16 {d24[], d25[]}, [%[kRGBCoeffBias]] \n" \ + "sub %[kRGBCoeffBias], %[kRGBCoeffBias], #10 \n" // q0: B uint16x8_t // q1: G uint16x8_t @@ -1754,20 +1755,20 @@ void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, const uint32_t dither4, int width) { asm volatile( - "vdup.32 d7, %2 \n" // dither4 + "vdup.32 d7, %3 \n" // dither4 "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%1]! \n" // load 8 pixels of ARGB. - "subs %3, %3, #8 \n" // 8 processed per loop. + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. "vqadd.u8 d0, d0, d7 \n" "vqadd.u8 d2, d2, d7 \n" "vqadd.u8 d4, d4, d7 \n" // add for dither ARGBTORGB565 - "vst1.8 {q2}, [%0]! \n" // store 8 RGB565. + "vst1.8 {q2}, [%1]! \n" // store 8 RGB565. "bgt 1b \n" - : "+r"(dst_rgb) // %0 - : "r"(src_argb), // %1 - "r"(dither4), // %2 - "r"(width) // %3 + : "+r"(src_argb), // %0 + "+r"(dst_rgb), // %1 + "+r"(width) // %2 + : "r"(dither4) // %3 : "cc", "memory", "q0", "q1", "q2", "q3"); } diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 2b5522f0..4355547c 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -1982,21 +1982,21 @@ void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, const uint32_t dither4, int width) { asm volatile( - "dup v1.4s, %w2 \n" // dither4 + "dup v1.4s, %w3 \n" // dither4 "1: \n" - "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // load 8 - // pixels - "subs %w3, %w3, #8 \n" // 8 processed per loop. + "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 ARGB + "subs %w2, %w2, #8 \n" // 8 processed per loop. "uqadd v16.8b, v16.8b, v1.8b \n" "prfm pldl1keep, [%0, 448] \n" "uqadd v17.8b, v17.8b, v1.8b \n" - "uqadd v18.8b, v18.8b, v1.8b \n" ARGBTORGB565 - "st1 {v18.16b}, [%0], #16 \n" // store 8 pixels RGB565. + "uqadd v18.8b, v18.8b, v1.8b \n" + ARGBTORGB565 + "st1 {v18.16b}, [%1], #16 \n" // store 8 pixels RGB565. "b.gt 1b \n" - : "+r"(dst_rgb) // %0 - : "r"(src_argb), // %1 - "r"(dither4), // %2 - "r"(width) // %3 + : "+r"(src_argb), // %0 + "+r"(dst_rgb), // %1 + "+r"(width) // %2 + : "r"(dither4) // %3 : "cc", "memory", "v1", "v16", "v17", "v18", "v19"); } -- cgit v1.2.3