diff options
author | Darren Hsieh <darren.hsieh@sifive.com> | 2023-05-02 00:33:27 -0700 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2023-05-10 00:29:20 +0000 |
commit | 964d963afb164e768919f5bd2284202d87a3d37c (patch) | |
tree | ce76e4d8dc4464791f42d1de762a97229da47e99 /source/convert_argb.cc | |
parent | 1d940cc570212c8979d81e78738296fe39f9df43 (diff) | |
download | libyuv-964d963afb164e768919f5bd2284202d87a3d37c.tar.gz |
Enable I422To{ARGB,RGBA,RGB24}Row_RVV
Run on SiFive internal FPGA:
I422ToARGB_Opt (~10x vs scalar)
I422ToRGBA_Opt (~10x vs scalar)
I420ToRGB24_Opt (~8x vs scalar)
LIBYUV_WIDTH=1280 LIBYUV_HEIGHT=720 LIBYUV_REPEAT=10
This CL manually sets rounding mode,
since we use fixed-point vector narrowing clip.
There is no definition about default value for fixed-point rounding mode.
https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#38-vector-fixed-point-rounding-mode-register-vxrm
The behavior could be different on differet paltforms. To avoid unexpected behavior, we set rounding mode manually.
Change-Id: I90f0dcb90c37f7da7caab8eb1df6c9c7a3c874a8
Signed-off-by: Darren Hsieh <darren.hsieh@sifive.com>
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4512373
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/convert_argb.cc')
-rw-r--r-- | source/convert_argb.cc | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 691208fd..b06ece53 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -136,6 +136,11 @@ int I420ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToARGBRow = I422ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); @@ -385,6 +390,11 @@ int I422ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToARGBRow = I422ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); @@ -4511,6 +4521,11 @@ int I422ToRGBAMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGBAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToRGBARow = I422ToRGBARow_RVV; + } +#endif for (y = 0; y < height; ++y) { I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width); @@ -4734,6 +4749,11 @@ int I420ToRGBAMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGBAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToRGBARow = I422ToRGBARow_RVV; + } +#endif for (y = 0; y < height; ++y) { I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width); @@ -4859,6 +4879,11 @@ int I420ToRGB24Matrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToRGB24Row = I422ToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); @@ -5056,6 +5081,11 @@ int I422ToRGB24Matrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToRGB24Row = I422ToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); @@ -5620,6 +5650,11 @@ int I420ToRGB565Dither(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToARGBRow = I422ToARGBRow_RVV; + } +#endif #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2; |