aboutsummaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorBruce Lai <bruce.lai@sifive.com>2023-06-15 04:56:58 -0700
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2023-06-15 23:45:24 +0000
commit04821d1e7d60845525e8db55c7bcd41ef5be9406 (patch)
tree46b29e13d76928a8423b346d085eda3eb215e4bb /source
parent552571e8b24b2619c39ec176e6cb8e75d3e7fdd3 (diff)
downloadlibyuv-04821d1e7d60845525e8db55c7bcd41ef5be9406.tar.gz
[RVV] Enable ARGBExtractAlphaRow/ARGBCopyYToAlphaRow
* Run on SiFive internal FPGA: TestARGBExtractAlpha(~3.2x vs scalar) TestARGBCopyYToAlpha(~1.6x vs scalar) Change-Id: I36525c67e8ac3f71ea9d1a58c7dc15a4009d9da1 Signed-off-by: Bruce Lai <bruce.lai@sifive.com> Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4617955 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source')
-rw-r--r--source/convert.cc5
-rw-r--r--source/planar_functions.cc10
-rw-r--r--source/row_rvv.cc29
3 files changed, 44 insertions, 0 deletions
diff --git a/source/convert.cc b/source/convert.cc
index b11ab1bf..b68fb1d3 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -2128,6 +2128,11 @@ int ARGBToI420Alpha(const uint8_t* src_argb,
: ARGBExtractAlphaRow_Any_LSX;
}
#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV;
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index d115a2a1..dcc37836 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -5340,6 +5340,11 @@ int ARGBExtractAlpha(const uint8_t* src_argb,
: ARGBExtractAlphaRow_Any_LSX;
}
#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV;
+ }
+#endif
for (int y = 0; y < height; ++y) {
ARGBExtractAlphaRow(src_argb, dst_a, width);
@@ -5391,6 +5396,11 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBCOPYYTOALPHAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBCopyYToAlphaRow(src_y, dst_argb, width);
diff --git a/source/row_rvv.cc b/source/row_rvv.cc
index 27e91a3b..a79560c7 100644
--- a/source/row_rvv.cc
+++ b/source/row_rvv.cc
@@ -948,6 +948,35 @@ void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
} while (w > 0);
}
+void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ __riscv_vlseg4e8_v_u8m2(&v_r, &v_g, &v_b, &v_a, src_argb, vl);
+ __riscv_vse8_v_u8m2(dst_a, v_a, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_a += vl;
+ } while (w > 0);
+}
+
+void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width) {
+ size_t w = (size_t)width;
+ const ptrdiff_t dst_stride = 4;
+ dst += 3;
+ do {
+ size_t vl = __riscv_vsetvl_e8m8(w);
+ vuint8m8_t v_a = __riscv_vle8_v_u8m8(src, vl);
+ __riscv_vsse8_v_u8m8(dst, dst_stride, v_a, vl);
+ w -= vl;
+ src += vl;
+ dst += vl * dst_stride;
+ } while (w > 0);
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv