diff options
author | Darren Hsieh <darren.hsieh@sifive.com> | 2023-05-16 23:47:58 -0700 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2023-06-13 00:40:39 +0000 |
commit | 873eaa3bbf5296f57193686573395e6b5cc99d74 (patch) | |
tree | 87c558ae432b2e43c903458a2fb46861ef791a19 /include/libyuv/scale_row.h | |
parent | 29bcf021c68e5478e1cd0c1099122dbb10eb474e (diff) | |
download | libyuv-873eaa3bbf5296f57193686573395e6b5cc99d74.tar.gz |
[RVV] Enable Scale{ARGB,UV}RowDown{2,4,EVEN}_RVV
Run on SiFive internal FPGA:
Test case RVV function Speedup
I444ScaleDownBy3_Box ScaleAddRow_RVV+ScaleAddCols(scalar) 2.8
ARGBScaleDownBy2_None ScaleARGBRowDown2_RVV 2.2
ARGBScaleDownBy2_Linear ScaleARGBRowDown2Linear_RVV 5.0
ARGBScaleDownBy2_Box ScaleARGBRowDown2Box_RVV 4.3
ARGBScaleDownBy4_None ScaleARGBRowDownEven_RVV 1.2
ARGBScaleDownBy8_Box ScaleARGBRowDownEvenBox_RVV 3.2
ARGBScaleDownBy4_Box ScaleARGBRowDown2Box_RVV 4.5
I444ScaleDownBy2_None ScaleRowDown2_RVV 5.8
I444ScaleDownBy2_Linear ScaleRowDown2Linear_RVV 6.1
I444ScaleDownBy2_Box ScaleRowDown2Box_RVV 5.0
I444ScaleDownBy4_None ScaleRowDown4_RVV 3.6
I444ScaleDownBy4_Box ScaleRowDown4Box_RVV 3.5
UVScaleDownBy2_None ScaleUVRowDown2_RVV 5.8
UVScaleDownBy2_Linear ScaleUVRowDown2Linear_RVV 5.6
UVScaleDownBy2_Box ScaleUVRowDown2Box_RVV 4.1
UVScaleDownBy4_None ScaleUVRowDown4_RVV 1.7
UVScaleDownBy4_Box ScaleUVRowDown2Box_RVV 4.5
avg-speedup: 4
Note: Specialize ScaleUVRowDown with step_size=4 by ScaleUVRowDown4_RVV.
Bug: libyuv:956
Change-Id: If9604a6aadf681193f282507602c57c726332202
Signed-off-by: Darren Hsieh <darren.hsieh@sifive.com>
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4601684
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'include/libyuv/scale_row.h')
-rw-r--r-- | include/libyuv/scale_row.h | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index a7957c3f..631b15dd 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -175,6 +175,18 @@ extern "C" { #define HAS_SCALEROWDOWN34_LSX #endif +#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) +#define HAS_SCALEADDROW_RVV +#define HAS_SCALEARGBROWDOWN2_RVV +#define HAS_SCALEARGBROWDOWNEVEN_RVV +#define HAS_SCALEROWDOWN2_RVV +#define HAS_SCALEROWDOWN4_RVV +#define HAS_SCALEUVROWDOWN2_RVV +#define HAS_SCALEUVROWDOWN2LINEAR_RVV +#define HAS_SCALEUVROWDOWN2BOX_RVV +#define HAS_SCALEUVROWDOWNEVEN_RVV +#endif + // Scale ARGB vertically with bilinear interpolation. void ScalePlaneVertical(int src_height, int dst_width, @@ -949,6 +961,18 @@ void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); +void ScaleARGBRowDown2_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, @@ -1061,6 +1085,16 @@ void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb, int src_stepx, uint8_t* dst_argb, int dst_width); +void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width); void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, @@ -1143,6 +1177,18 @@ void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width); +void ScaleUVRowDown2_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Linear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, @@ -1203,6 +1249,16 @@ void ScaleUVRowDownEvenBox_NEON(const uint8_t* src_ptr, int src_stepx, uint8_t* dst_uv, int dst_width); +void ScaleUVRowDown4_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDownEven_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_uv, + int dst_width); void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, int32_t src_stepx, @@ -1744,6 +1800,29 @@ void ScaleRowDown34_1_Box_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width); +void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleRowDown2_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); + +void ScaleRowDown4_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); + #ifdef __cplusplus } // extern "C" } // namespace libyuv |