diff options
author | Darren Hsieh <darren.hsieh@sifive.com> | 2023-05-09 01:39:06 -0700 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2023-05-10 19:50:56 +0000 |
commit | 497ea35688f997edb4c42ef1cdd9f2ab4efb9e29 (patch) | |
tree | f1b3303372165697ab22583783afb8c2b1525e2e /source/row_rvv.cc | |
parent | 964d963afb164e768919f5bd2284202d87a3d37c (diff) | |
download | libyuv-497ea35688f997edb4c42ef1cdd9f2ab4efb9e29.tar.gz |
Enable I444To{ARGB,RGB24}Row_RVV
Run on SiFive internal FPGA:
I444ToARGB_Opt (~16x vs scalar)
I444ToRGB24_Opt (~10x vs scalar)
LIBYUV_WIDTH=1280 LIBYUV_HEIGHT=720 LIBYUV_REPEAT=10
Change-Id: Idae7dc46ef648beaa14b58ba3eb56b67b17c9b3b
Signed-off-by: Darren Hsieh <darren.hsieh@sifive.com>
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4520761
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/row_rvv.cc')
-rw-r--r-- | source/row_rvv.cc | 116 |
1 files changed, 93 insertions, 23 deletions
diff --git a/source/row_rvv.cc b/source/row_rvv.cc index 475d3e66..39ce71e4 100644 --- a/source/row_rvv.cc +++ b/source/row_rvv.cc @@ -65,24 +65,35 @@ extern "C" { v_y_16 = __riscv_vwaddu_vx_u16m2(v_y, 0, vl); \ } +// Read [VLEN/8] Y, [VLEN/8] U, and [VLEN/8] V from 444 +#define READYUV444(vl, v_u, v_v, v_y_16) \ + { \ + vuint8m1_t v_y; \ + vl = __riscv_vsetvl_e8m1(w); \ + v_y = __riscv_vle8_v_u8m1(src_y, vl); \ + v_u = __riscv_vle8_v_u8m1(src_u, vl); \ + v_v = __riscv_vle8_v_u8m1(src_v, vl); \ + v_y_16 = __riscv_vwaddu_vx_u16m2(v_y, 0, vl); \ + } + // Convert from YUV to fixed point RGB -#define YUVTORGB(vl, v_u, v_v, v_ug, v_vg, v_yg, v_bb, v_bg, v_br, v_y_16, \ - v_g_16, v_b_16, v_r_16) \ - { \ - vuint16m2_t v_tmp0, v_tmp1, v_tmp2, v_tmp3, v_tmp4; \ - vuint32m4_t v_tmp5; \ - v_tmp0 = __riscv_vwmulu_vv_u16m2(v_u, v_ug, vl); \ - v_y_16 = __riscv_vmul_vx_u16m2(v_y_16, 0x0101, vl); \ - v_tmp0 = __riscv_vwmaccu_vv_u16m2(v_tmp0, v_vg, v_v, vl); \ - v_tmp1 = __riscv_vwmulu_vv_u16m2(v_u, v_ub, vl); \ - v_tmp5 = __riscv_vwmulu_vv_u32m4(v_y_16, v_yg, vl); \ - v_tmp2 = __riscv_vnsrl_wx_u16m2(v_tmp5, 16, vl); \ - v_tmp3 = __riscv_vadd_vv_u16m2(v_tmp2, v_bg, vl); \ - v_tmp4 = __riscv_vadd_vv_u16m2(v_tmp2, v_tmp1, vl); \ - v_tmp2 = __riscv_vwmaccu_vv_u16m2(v_tmp2, v_vr, v_v, vl); \ - v_g_16 = __riscv_vssubu_vv_u16m2(v_tmp3, v_tmp0, vl); \ - v_b_16 = __riscv_vssubu_vv_u16m2(v_tmp4, v_bb, vl); \ - v_r_16 = __riscv_vssubu_vv_u16m2(v_tmp2, v_br, vl); \ +#define YUVTORGB(vl, v_u, v_v, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg, v_br, \ + v_y_16, v_g_16, v_b_16, v_r_16) \ + { \ + vuint16m2_t v_tmp0, v_tmp1, v_tmp2, v_tmp3, v_tmp4; \ + vuint32m4_t v_tmp5; \ + v_tmp0 = __riscv_vwmulu_vv_u16m2(v_u, v_ug, vl); \ + v_y_16 = __riscv_vmul_vx_u16m2(v_y_16, 0x0101, vl); \ + v_tmp0 = __riscv_vwmaccu_vv_u16m2(v_tmp0, v_vg, v_v, vl); \ + v_tmp1 = __riscv_vwmulu_vv_u16m2(v_u, v_ub, vl); \ + v_tmp5 = __riscv_vwmulu_vv_u32m4(v_y_16, v_yg, vl); \ + v_tmp2 = __riscv_vnsrl_wx_u16m2(v_tmp5, 16, vl); \ + v_tmp3 = __riscv_vadd_vv_u16m2(v_tmp2, v_bg, vl); \ + v_tmp4 = __riscv_vadd_vv_u16m2(v_tmp2, v_tmp1, vl); \ + v_tmp2 = __riscv_vwmaccu_vv_u16m2(v_tmp2, v_vr, v_v, vl); \ + v_g_16 = __riscv_vssubu_vv_u16m2(v_tmp3, v_tmp0, vl); \ + v_b_16 = __riscv_vssubu_vv_u16m2(v_tmp4, v_bb, vl); \ + v_r_16 = __riscv_vssubu_vv_u16m2(v_tmp2, v_br, vl); \ } // Convert from fixed point RGB To 8 bit RGB @@ -252,6 +263,65 @@ void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24, } while (w > 0); } +void I444ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + size_t vl; + size_t w = (size_t)width; + vuint8m1_t v_u, v_v; + vuint8m1_t v_ub, v_vr, v_ug, v_vg; + vuint8m1_t v_b, v_g, v_r, v_a; + vuint16m2_t v_yg, v_bb, v_bg, v_br; + vuint16m2_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, vl, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg, + v_br); + v_a = __riscv_vmv_v_x_u8m1(255u, vl); + do { + READYUV444(vl, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg, v_br, + v_y_16, v_g_16, v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg4e8_v_u8m1(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_y += vl; + src_u += vl; + src_v += vl; + dst_argb += vl * 4; + } while (w > 0); +} + +void I444ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + size_t vl; + size_t w = (size_t)width; + vuint8m1_t v_u, v_v; + vuint8m1_t v_ub, v_vr, v_ug, v_vg; + vuint8m1_t v_b, v_g, v_r; + vuint16m2_t v_yg, v_bb, v_bg, v_br; + vuint16m2_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, vl, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg, + v_br); + do { + READYUV444(vl, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg, v_br, + v_y_16, v_g_16, v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg3e8_v_u8m1(dst_rgb24, v_b, v_g, v_r, vl); + w -= vl; + src_y += vl; + src_u += vl; + src_v += vl; + dst_rgb24 += vl * 3; + } while (w > 0); +} + void I422ToARGBRow_RVV(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -270,8 +340,8 @@ void I422ToARGBRow_RVV(const uint8_t* src_y, v_a = __riscv_vmv_v_x_u8m1(255u, vl); do { READYUV422(vl, v_u, v_v, v_y_16); - YUVTORGB(vl, v_u, v_v, v_ug, v_vg, v_yg, v_bb, v_bg, v_br, v_y_16, v_g_16, - v_b_16, v_r_16); + YUVTORGB(vl, v_u, v_v, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg, v_br, + v_y_16, v_g_16, v_b_16, v_r_16); RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); __riscv_vsseg4e8_v_u8m1(dst_argb, v_b, v_g, v_r, v_a, vl); w -= vl; @@ -300,8 +370,8 @@ void I422ToRGBARow_RVV(const uint8_t* src_y, v_a = __riscv_vmv_v_x_u8m1(255u, vl); do { READYUV422(vl, v_u, v_v, v_y_16); - YUVTORGB(vl, v_u, v_v, v_ug, v_vg, v_yg, v_bb, v_bg, v_br, v_y_16, v_g_16, - v_b_16, v_r_16); + YUVTORGB(vl, v_u, v_v, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg, v_br, + v_y_16, v_g_16, v_b_16, v_r_16); RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); __riscv_vsseg4e8_v_u8m1(dst_rgba, v_a, v_b, v_g, v_r, vl); w -= vl; @@ -329,8 +399,8 @@ void I422ToRGB24Row_RVV(const uint8_t* src_y, v_br); do { READYUV422(vl, v_u, v_v, v_y_16); - YUVTORGB(vl, v_u, v_v, v_ug, v_vg, v_yg, v_bb, v_bg, v_br, v_y_16, v_g_16, - v_b_16, v_r_16); + YUVTORGB(vl, v_u, v_v, v_ub, v_vr, v_ug, v_vg, v_yg, v_bb, v_bg, v_br, + v_y_16, v_g_16, v_b_16, v_r_16); RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); __riscv_vsseg3e8_v_u8m1(dst_rgb24, v_b, v_g, v_r, vl); w -= vl; |