aboutsummaryrefslogtreecommitdiff
path: root/source/row_neon.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2022-09-16 11:12:39 -0700
committerFrank Barchard <fbarchard@chromium.org>2022-09-16 19:46:47 +0000
commitf71c83552d373f0ff41833b17e2880632d8561d7 (patch)
tree09088188086a6b03d07a5ebaa8edf01658466ad8 /source/row_neon.cc
parent3e38ce50589d9319badc0501f96d6c5b2b177472 (diff)
downloadlibyuv-f71c83552d373f0ff41833b17e2880632d8561d7.tar.gz
I420ToRGB24MatrixFilter function added
- Implemented as 3 steps: Upsample UV to 4:4:4, I444ToARGB, ARGBToRGB24 - Fix some build warnings for missing prototypes. Pixel 4 I420ToRGB24_Opt (743 ms) I420ToRGB24Filter_Opt (1331 ms) Windows with skylake xeon: x86 32 bit I420ToRGB24_Opt (387 ms) I420ToRGB24Filter_Opt (571 ms) x64 64 bit I420ToRGB24_Opt (384 ms) I420ToRGB24Filter_Opt (582 ms) Bug: libyuv:938, libyuv:830 Change-Id: Ie27f70816ec084437014f8a1c630ae011ee2348c Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3900298 Reviewed-by: Wan-Teh Chang <wtc@google.com>
Diffstat (limited to 'source/row_neon.cc')
-rw-r--r--source/row_neon.cc54
1 files changed, 27 insertions, 27 deletions
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 82039e9f..3f5c5de1 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -625,20 +625,20 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
#if LIBYUV_USE_ST2
// Read 16 Y, 8 UV, and write 8 YUYV.
void DetileToYUY2_NEON(const uint8_t* src_y,
- ptrdiff_t src_y_tile_stride,
- const uint8_t* src_uv,
- ptrdiff_t src_uv_tile_stride,
- uint8_t* dst_yuy2,
- int width) {
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
asm volatile(
- "1: \n"
- "vld1.8 q0, [%0], %4 \n" // Load 16 Y
- "pld [%0, 1792] \n"
- "vld1.8 q1, [%1], %5 \n" // Load 8 UV
- "pld [%1, 1792] \n"
- "subs %3, %3, #16 \n"
- "vst2.8 {q0, q1}, [%2]! \n"
- "bgt 1b \n"
+ "1: \n"
+ "vld1.8 q0, [%0], %4 \n" // Load 16 Y
+ "pld [%0, 1792] \n"
+ "vld1.8 q1, [%1], %5 \n" // Load 8 UV
+ "pld [%1, 1792] \n"
+ "subs %3, %3, #16 \n"
+ "vst2.8 {q0, q1}, [%2]! \n"
+ "bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_yuy2), // %2
@@ -651,21 +651,21 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
#else
// Read 16 Y, 8 UV, and write 8 YUYV.
void DetileToYUY2_NEON(const uint8_t* src_y,
- ptrdiff_t src_y_tile_stride,
- const uint8_t* src_uv,
- ptrdiff_t src_uv_tile_stride,
- uint8_t* dst_yuy2,
- int width) {
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
asm volatile(
- "1: \n"
- "vld1.8 q0, [%0], %4 \n" // Load 16 Y
- "vld1.8 q1, [%1], %5 \n" // Load 8 UV
- "subs %3, %3, #16 \n"
- "pld [%0, 1792] \n"
- "vzip.8 q0, q1 \n"
- "pld [%1, 1792] \n"
- "vst1.8 {q0, q1}, [%2]! \n"
- "bgt 1b \n"
+ "1: \n"
+ "vld1.8 q0, [%0], %4 \n" // Load 16 Y
+ "vld1.8 q1, [%1], %5 \n" // Load 8 UV
+ "subs %3, %3, #16 \n"
+ "pld [%0, 1792] \n"
+ "vzip.8 q0, q1 \n"
+ "pld [%1, 1792] \n"
+ "vst1.8 {q0, q1}, [%2]! \n"
+ "bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_yuy2), // %2