diff options
author | Frank Barchard <fbarchard@google.com> | 2022-06-06 18:30:15 -0700 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2022-06-07 01:41:56 +0000 |
commit | 60254a1d846a93a4d7559009004cdd91bcc04d82 (patch) | |
tree | a56a3667ace9c3e5394bfd58787ea7d5883c05b6 /source/row_neon64.cc | |
parent | c0c8c40b31636e575eaf07921d58d8f4ff3aa983 (diff) | |
download | libyuv-60254a1d846a93a4d7559009004cdd91bcc04d82.tar.gz |
I210ToI420, InterpolatePlane_16, and ScalePlane Vertical-only asan fix
- Add I210ToI420 to convert 10 bit 4:2:2 YUV to 4:2:0 8 bit
- Add NEON InterpolateRow_16 for fast 10 bit scaling
- When scaling up, set step to interpolate toward height - 1 to avoid buffer overread
- When scaling down, center the 2 rows used for source to achieve filtering.
- CopyPlane check for 0 size and return
Bug: libyuv:931, b/228605787, b/233233302, b/233634772, b/234558395, b/234340482
Change-Id: I63e8580710a57812b683c2fe40583ac5a179c4f1
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3687552
Reviewed-by: Mirko Bonadei <mbonadei@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Diffstat (limited to 'source/row_neon64.cc')
-rw-r--r-- | source/row_neon64.cc | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 8d43d594..6135014b 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -2966,6 +2966,71 @@ void InterpolateRow_NEON(uint8_t* dst_ptr, : "cc", "memory", "v0", "v1", "v3", "v4", "v5"); } +// Bilinear filter 8x2 -> 8x1 +void InterpolateRow_16_NEON(uint16_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, + int source_y_fraction) { + int y1_fraction = source_y_fraction; + int y0_fraction = 256 - y1_fraction; + const uint16_t* src_ptr1 = src_ptr + src_stride; + + asm volatile( + "cmp %w4, #0 \n" + "b.eq 100f \n" + "cmp %w4, #128 \n" + "b.eq 50f \n" + + "dup v5.8h, %w4 \n" + "dup v4.8h, %w5 \n" + // General purpose row blend. + "1: \n" + "ld1 {v0.8h}, [%1], #16 \n" + "ld1 {v1.8h}, [%2], #16 \n" + "subs %w3, %w3, #8 \n" + "umull v2.4s, v0.4h, v4.4h \n" + "prfm pldl1keep, [%1, 448] \n" + "umull2 v3.4s, v0.8h, v4.8h \n" + "prfm pldl1keep, [%2, 448] \n" + "umlal v2.4s, v1.4h, v5.4h \n" + "umlal2 v3.4s, v1.8h, v5.8h \n" + "rshrn v0.4h, v2.4s, #8 \n" + "rshrn2 v0.8h, v3.4s, #8 \n" + "st1 {v0.8h}, [%0], #16 \n" + "b.gt 1b \n" + "b 99f \n" + + // Blend 50 / 50. + "50: \n" + "ld1 {v0.8h}, [%1], #16 \n" + "ld1 {v1.8h}, [%2], #16 \n" + "subs %w3, %w3, #8 \n" + "prfm pldl1keep, [%1, 448] \n" + "urhadd v0.8h, v0.8h, v1.8h \n" + "prfm pldl1keep, [%2, 448] \n" + "st1 {v0.8h}, [%0], #16 \n" + "b.gt 50b \n" + "b 99f \n" + + // Blend 100 / 0 - Copy row unchanged. + "100: \n" + "ld1 {v0.8h}, [%1], #16 \n" + "subs %w3, %w3, #8 \n" + "prfm pldl1keep, [%1, 448] \n" + "st1 {v0.8h}, [%0], #16 \n" + "b.gt 100b \n" + + "99: \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(src_ptr1), // %2 + "+r"(dst_width) // %3 + : "r"(y1_fraction), // %4 + "r"(y0_fraction) // %5 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5"); +} + // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr void ARGBBlendRow_NEON(const uint8_t* src_argb, const uint8_t* src_argb1, |