aboutsummaryrefslogtreecommitdiff
path: root/source/row_rvv.cc
diff options
context:
space:
mode:
authorBruce Lai <bruce.lai@sifive.com>2023-05-10 18:51:49 -0700
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2023-05-16 19:20:49 +0000
commit11d4536002b4748ff3ed795ce893335bbb5f79fe (patch)
treef55eae84acd0493e361cc897c56c499bcfe7aef2 /source/row_rvv.cc
parent6a68b18a9680ddb16b1397118675c146c6afbd65 (diff)
downloadlibyuv-11d4536002b4748ff3ed795ce893335bbb5f79fe.tar.gz
Enable I{422,444}AlphaToARGBRow_RVV & ARGBAttentuateRow_RVV
Run on SiFive internal FPGA: I444AlphaToARGB_Opt (~16x vs scalar) I422AlphaToARGB_Opt (~10x vs scalar) ARGBAttenuate_Opt (~3x vs scalar) LIBYUV_WIDTH=1280 LIBYUV_HEIGHT=720 LIBYUV_REPEAT=10 Change-Id: I0046eb7af8104bc8e13cee1cb91a19f90940d5b0 Signed-off-by: Bruce Lai <bruce.lai@sifive.com> Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4535657 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/row_rvv.cc')
-rw-r--r--source/row_rvv.cc90
1 files changed, 89 insertions, 1 deletions
diff --git a/source/row_rvv.cc b/source/row_rvv.cc
index 99f23165..ad131924 100644
--- a/source/row_rvv.cc
+++ b/source/row_rvv.cc
@@ -16,7 +16,6 @@
*/
#include <assert.h>
-
#include "libyuv/row.h"
#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
@@ -293,6 +292,38 @@ void I444ToARGBRow_RVV(const uint8_t* src_y,
} while (w > 0);
}
+void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ vuint8m1_t v_u, v_v;
+ vuint8m1_t v_ub, v_vr, v_ug, v_vg;
+ vuint8m1_t v_b, v_g, v_r, v_a;
+ vuint16m2_t v_yg, v_bb, v_bg, v_br;
+ vuint16m2_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, vl, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg,
+ v_br);
+ do {
+ READYUV444(vl, v_u, v_v, v_y_16);
+ v_a = __riscv_vle8_v_u8m1(src_a, vl);
+ YUVTORGB(vl, v_u, v_v, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg, v_br,
+ v_y_16, v_g_16, v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m1(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_a += vl;
+ src_u += vl;
+ src_v += vl;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+
void I444ToRGB24Row_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -352,6 +383,38 @@ void I422ToARGBRow_RVV(const uint8_t* src_y,
} while (w > 0);
}
+void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ vuint8m1_t v_u, v_v;
+ vuint8m1_t v_ub, v_vr, v_ug, v_vg;
+ vuint8m1_t v_b, v_g, v_r, v_a;
+ vuint16m2_t v_yg, v_bb, v_bg, v_br;
+ vuint16m2_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, vl, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg,
+ v_br);
+ do {
+ READYUV422(vl, v_u, v_v, v_y_16);
+ v_a = __riscv_vle8_v_u8m1(src_a, vl);
+ YUVTORGB(vl, v_u, v_v, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg, v_br,
+ v_y_16, v_g_16, v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m1(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_a += vl;
+ src_u += vl / 2;
+ src_v += vl / 2;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+
void I422ToRGBARow_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -710,6 +773,31 @@ void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) {
RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants);
}
+void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ size_t w = (size_t)width;
+ // To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_ba_16, v_ga_16, v_ra_16;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl);
+ v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl);
+ v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl);
+ v_b = __riscv_vnclipu_wx_u8m2(v_ba_16, 8, vl);
+ v_g = __riscv_vnclipu_wx_u8m2(v_ga_16, 8, vl);
+ v_r = __riscv_vnclipu_wx_u8m2(v_ra_16, 8, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv