aboutsummaryrefslogtreecommitdiff
path: root/source/row_neon64.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2022-09-16 11:12:39 -0700
committerFrank Barchard <fbarchard@chromium.org>2022-09-16 19:46:47 +0000
commitf71c83552d373f0ff41833b17e2880632d8561d7 (patch)
tree09088188086a6b03d07a5ebaa8edf01658466ad8 /source/row_neon64.cc
parent3e38ce50589d9319badc0501f96d6c5b2b177472 (diff)
downloadlibyuv-f71c83552d373f0ff41833b17e2880632d8561d7.tar.gz
I420ToRGB24MatrixFilter function added
- Implemented as 3 steps: Upsample UV to 4:4:4, I444ToARGB, ARGBToRGB24 - Fix some build warnings for missing prototypes. Pixel 4 I420ToRGB24_Opt (743 ms) I420ToRGB24Filter_Opt (1331 ms) Windows with skylake xeon: x86 32 bit I420ToRGB24_Opt (387 ms) I420ToRGB24Filter_Opt (571 ms) x64 64 bit I420ToRGB24_Opt (384 ms) I420ToRGB24Filter_Opt (582 ms) Bug: libyuv:938, libyuv:830 Change-Id: Ie27f70816ec084437014f8a1c630ae011ee2348c Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3900298 Reviewed-by: Wan-Teh Chang <wtc@google.com>
Diffstat (limited to 'source/row_neon64.cc')
-rw-r--r--source/row_neon64.cc46
1 files changed, 23 insertions, 23 deletions
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index e166ce04..37962378 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -653,11 +653,11 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
#if LIBYUV_USE_ST2
// Read 16 Y, 8 UV, and write 8 YUY2
void DetileToYUY2_NEON(const uint8_t* src_y,
- ptrdiff_t src_y_tile_stride,
- const uint8_t* src_uv,
- ptrdiff_t src_uv_tile_stride,
- uint8_t* dst_yuy2,
- int width) {
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
asm volatile(
"1: \n"
"ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys
@@ -667,23 +667,23 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
"subs %w3, %w3, #16 \n" // store 8 YUY2
"st2 {v0.16b,v1.16b}, [%2], #32 \n"
"b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_yuy2), // %2
- "+r"(width) // %3
- : "r"(src_y_tile_stride), // %4
- "r"(src_uv_tile_stride) // %5
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
: "cc", "memory", "v0", "v1" // Clobber list
);
}
#else
// Read 16 Y, 8 UV, and write 8 YUY2
void DetileToYUY2_NEON(const uint8_t* src_y,
- ptrdiff_t src_y_tile_stride,
- const uint8_t* src_uv,
- ptrdiff_t src_uv_tile_stride,
- uint8_t* dst_yuy2,
- int width) {
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
asm volatile(
"1: \n"
"ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys
@@ -694,13 +694,13 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
"prfm pldl1keep, [%1, 1792] \n"
"zip2 v3.16b, v0.16b, v1.16b \n"
"st1 {v2.16b,v3.16b}, [%2], #32 \n" // store 8 YUY2
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_yuy2), // %2
- "+r"(width) // %3
- : "r"(src_y_tile_stride), // %4
- "r"(src_uv_tile_stride) // %5
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber list
);
}