diff options
Diffstat (limited to 'files/source/row_any.cc')
-rw-r--r-- | files/source/row_any.cc | 40 |
1 files changed, 24 insertions, 16 deletions
diff --git a/files/source/row_any.cc b/files/source/row_any.cc index 74a6621f..af5d1fbc 100644 --- a/files/source/row_any.cc +++ b/files/source/row_any.cc @@ -19,6 +19,14 @@ namespace libyuv { extern "C" { #endif +// memset for temp is meant to clear the source buffer (not dest) so that +// SIMD that reads full multiple of 16 bytes will not trigger msan errors. +// memset is not needed for production, as the garbage values are processed but +// not used, although there may be edge cases for subsampling. +// The size of the buffer is based on the largest read, which can be inferred +// by the source type (e.g. ARGB) and the mask (last parameter), or by examining +// the source code for how much the source pointers are advanced. + // Subsampled source needs to be increase by 1 of not even. #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) @@ -618,17 +626,17 @@ ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) // Any 1 to 1 blended. Destination is read, modify, write. #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 2]); \ - memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \ + SIMD_ALIGNED(uint8 temp[64 * 2]); \ + memset(temp, 0, 64 * 2); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, n); \ } \ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ - memcpy(temp + 128, dst_ptr + n * BPP, r * BPP); \ - ANY_SIMD(temp, temp + 128, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ + memcpy(temp + 64, dst_ptr + n * BPP, r * BPP); \ + ANY_SIMD(temp, temp + 64, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ } #ifdef HAS_ARGBCOPYALPHAROW_AVX2 @@ -713,31 +721,31 @@ ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8*, 4, 4, 7) #define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ void NAMEANY(const uint16* src_ptr, uint16* dst_ptr, T shuffler, \ int width) { \ - SIMD_ALIGNED(uint16 temp[32 * 2]); \ - memset(temp, 0, 64); /* for msan */ \ + SIMD_ALIGNED(uint16 temp[16 * 2]); \ + memset(temp, 0, 32); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, shuffler, n); \ } \ - memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ - ANY_SIMD(temp, temp + 64, shuffler, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ + memcpy(temp, src_ptr + n, r * SBPP); \ + ANY_SIMD(temp, temp + 16, shuffler, MASK + 1); \ + memcpy(dst_ptr + n, temp + 16, r * BPP); \ } #ifdef HAS_HALFFLOATROW_SSE2 -ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, float, 1, 1, 7) +ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, float, 2, 2, 7) #endif #ifdef HAS_HALFFLOATROW_AVX2 -ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, float, 1, 1, 15) +ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, float, 2, 2, 15) #endif #ifdef HAS_HALFFLOATROW_F16C -ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, float, 1, 1, 15) -ANY11P16(HalfFloat1Row_Any_F16C, HalfFloat1Row_F16C, float, 1, 1, 15) +ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, float, 2, 2, 15) +ANY11P16(HalfFloat1Row_Any_F16C, HalfFloat1Row_F16C, float, 2, 2, 15) #endif #ifdef HAS_HALFFLOATROW_NEON -ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, float, 1, 1, 7) -ANY11P16(HalfFloat1Row_Any_NEON, HalfFloat1Row_NEON, float, 1, 1, 7) +ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, float, 2, 2, 7) +ANY11P16(HalfFloat1Row_Any_NEON, HalfFloat1Row_NEON, float, 2, 2, 7) #endif #undef ANY11P16 |