aboutsummaryrefslogtreecommitdiff
path: root/source/row_rvv.cc
diff options
context:
space:
mode:
authorDarren Hsieh <darren.hsieh@sifive.com>2023-04-09 21:34:25 -0700
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2023-04-13 19:33:16 +0000
commit44396e6e9aad554283c8f1fbe981ac122c40dfc7 (patch)
tree06debefea963536e10fbf1b0fe470cf889996083 /source/row_rvv.cc
parent68659d0d681b4c2318407f7dbc6eaa40055adba1 (diff)
downloadlibyuv-44396e6e9aad554283c8f1fbe981ac122c40dfc7.tar.gz
Add ARGBToRAWRow_RVV, ARGBToRGB24Row_RVV, RGB24ToARGBRow_RVV
* Run on SiFive internal FPGA: ARGBToRAW_Opt (~1.55x vs scalar) ARGBToRGB24_Opt (~1.44x vs scalar) RGB24ToARGB_Opt (~1.77x vs scalar) LIBYUV_WIDTH=1280 LIBYUV_HEIGHT=720 LIBYUV_REPEAT=10 Bug: libyuv:956 Change-Id: I26722f6848cd68684d95d9a7ee06ce0416e7985d Signed-off-by: Darren Hsieh <darren.hsieh@sifive.com> Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4413083 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/row_rvv.cc')
-rw-r--r--source/row_rvv.cc58
1 files changed, 50 insertions, 8 deletions
diff --git a/source/row_rvv.cc b/source/row_rvv.cc
index 0f264d34..629eca46 100644
--- a/source/row_rvv.cc
+++ b/source/row_rvv.cc
@@ -30,33 +30,33 @@ extern "C" {
void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
size_t vl = __riscv_vsetvl_e8m2(width);
vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
- while (width > 0) {
+ do {
vuint8m2_t v_b, v_g, v_r;
- vl = __riscv_vsetvl_e8m2(width);
__riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl);
__riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
width -= vl;
src_raw += (3 * vl);
dst_argb += (4 * vl);
- }
+ vl = __riscv_vsetvl_e8m2(width);
+ } while (width > 0);
}
void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
size_t vl = __riscv_vsetvl_e8m2(width);
vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
- while (width > 0) {
+ do {
vuint8m2_t v_b, v_g, v_r;
- vl = __riscv_vsetvl_e8m2(width);
__riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl);
__riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
width -= vl;
src_raw += (3 * vl);
dst_rgba += (4 * vl);
- }
+ vl = __riscv_vsetvl_e8m2(width);
+ } while (width > 0);
}
void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
- while (width > 0) {
+ do {
vuint8m2_t v_b, v_g, v_r;
size_t vl = __riscv_vsetvl_e8m2(width);
__riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_raw, vl);
@@ -64,7 +64,49 @@ void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
width -= vl;
src_raw += (3 * vl);
dst_rgb24 += (3 * vl);
- }
+ } while (width > 0);
+}
+
+void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m2(width);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg3e8_v_u8m2(dst_raw, v_r, v_g, v_b, vl);
+ width -= vl;
+ src_argb += (4 * vl);
+ dst_raw += (3 * vl);
+ } while (width > 0);
+}
+
+void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
+ uint8_t* dst_rgb24,
+ int width) {
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m2(width);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
+ width -= vl;
+ src_argb += (4 * vl);
+ dst_rgb24 += (3 * vl);
+ } while (width > 0);
+}
+
+void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
+ size_t vl = __riscv_vsetvl_e8m2(width);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r;
+ __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_rgb24, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ width -= vl;
+ src_rgb24 += (3 * vl);
+ dst_argb += (4 * vl);
+ vl = __riscv_vsetvl_e8m2(width);
+ } while (width > 0);
}
#ifdef __cplusplus