diff options
author | Frank Barchard <fbarchard@google.com> | 2022-09-16 11:12:39 -0700 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2022-09-16 19:46:47 +0000 |
commit | f71c83552d373f0ff41833b17e2880632d8561d7 (patch) | |
tree | 09088188086a6b03d07a5ebaa8edf01658466ad8 /source/row_neon.cc | |
parent | 3e38ce50589d9319badc0501f96d6c5b2b177472 (diff) | |
download | libyuv-f71c83552d373f0ff41833b17e2880632d8561d7.tar.gz |
I420ToRGB24MatrixFilter function added
- Implemented as 3 steps: Upsample UV to 4:4:4, I444ToARGB, ARGBToRGB24
- Fix some build warnings for missing prototypes.
Pixel 4
I420ToRGB24_Opt (743 ms)
I420ToRGB24Filter_Opt (1331 ms)
Windows with skylake xeon:
x86 32 bit
I420ToRGB24_Opt (387 ms)
I420ToRGB24Filter_Opt (571 ms)
x64 64 bit
I420ToRGB24_Opt (384 ms)
I420ToRGB24Filter_Opt (582 ms)
Bug: libyuv:938, libyuv:830
Change-Id: Ie27f70816ec084437014f8a1c630ae011ee2348c
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3900298
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Diffstat (limited to 'source/row_neon.cc')
-rw-r--r-- | source/row_neon.cc | 54 |
1 files changed, 27 insertions, 27 deletions
diff --git a/source/row_neon.cc b/source/row_neon.cc index 82039e9f..3f5c5de1 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -625,20 +625,20 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv, #if LIBYUV_USE_ST2 // Read 16 Y, 8 UV, and write 8 YUYV. void DetileToYUY2_NEON(const uint8_t* src_y, - ptrdiff_t src_y_tile_stride, - const uint8_t* src_uv, - ptrdiff_t src_uv_tile_stride, - uint8_t* dst_yuy2, - int width) { + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { asm volatile( - "1: \n" - "vld1.8 q0, [%0], %4 \n" // Load 16 Y - "pld [%0, 1792] \n" - "vld1.8 q1, [%1], %5 \n" // Load 8 UV - "pld [%1, 1792] \n" - "subs %3, %3, #16 \n" - "vst2.8 {q0, q1}, [%2]! \n" - "bgt 1b \n" + "1: \n" + "vld1.8 q0, [%0], %4 \n" // Load 16 Y + "pld [%0, 1792] \n" + "vld1.8 q1, [%1], %5 \n" // Load 8 UV + "pld [%1, 1792] \n" + "subs %3, %3, #16 \n" + "vst2.8 {q0, q1}, [%2]! \n" + "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_yuy2), // %2 @@ -651,21 +651,21 @@ void DetileToYUY2_NEON(const uint8_t* src_y, #else // Read 16 Y, 8 UV, and write 8 YUYV. void DetileToYUY2_NEON(const uint8_t* src_y, - ptrdiff_t src_y_tile_stride, - const uint8_t* src_uv, - ptrdiff_t src_uv_tile_stride, - uint8_t* dst_yuy2, - int width) { + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { asm volatile( - "1: \n" - "vld1.8 q0, [%0], %4 \n" // Load 16 Y - "vld1.8 q1, [%1], %5 \n" // Load 8 UV - "subs %3, %3, #16 \n" - "pld [%0, 1792] \n" - "vzip.8 q0, q1 \n" - "pld [%1, 1792] \n" - "vst1.8 {q0, q1}, [%2]! \n" - "bgt 1b \n" + "1: \n" + "vld1.8 q0, [%0], %4 \n" // Load 16 Y + "vld1.8 q1, [%1], %5 \n" // Load 8 UV + "subs %3, %3, #16 \n" + "pld [%0, 1792] \n" + "vzip.8 q0, q1 \n" + "pld [%1, 1792] \n" + "vst1.8 {q0, q1}, [%2]! \n" + "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_yuy2), // %2 |