aboutsummaryrefslogtreecommitdiff
path: root/source/row_lsx.cc
diff options
context:
space:
mode:
authorHao Chen <chenhao@loongson.cn>2022-02-24 13:39:55 +0800
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-03-09 08:52:54 +0000
commit91bae707e100c2e834ccd14e41704202877d8680 (patch)
treee07c71fcfd5e3eb9389a96345e635738c1faca03 /source/row_lsx.cc
parent42d76a342f9f0775d5f5fd47f7ef1a9ba6444074 (diff)
downloadlibyuv-91bae707e100c2e834ccd14e41704202877d8680.tar.gz
Optimize functions for LASX in row_lasx.cc.
1. Optimize 18 functions in source/row_lasx.cc file. 2. Make small modifications to LSX. 3. Remove some unnecessary content. Bug: libyuv:912 Change-Id: Ifd1d85366efb9cdb3b99491e30fa450ff1848661 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3507640 Reviewed-by: Mirko Bonadei <mbonadei@chromium.org> Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/row_lsx.cc')
-rw-r--r--source/row_lsx.cc16
1 files changed, 6 insertions, 10 deletions
diff --git a/source/row_lsx.cc b/source/row_lsx.cc
index a445e636..3e8b901a 100644
--- a/source/row_lsx.cc
+++ b/source/row_lsx.cc
@@ -152,7 +152,7 @@ extern "C" {
_reg1 = __lsx_vmsub_h(_reg1, const_94, _tmpg); \
_reg0 = __lsx_vmsub_h(_reg0, const_38, _tmpr); \
_reg1 = __lsx_vmsub_h(_reg1, const_18, _tmpb); \
- _dst0 = __lsx_vsrlni_b_h(_reg1, _reg0, 8); \
+ _dst0 = __lsx_vpickod_b(_reg1, _reg0); \
}
void ARGB4444ToARGBRow_LSX(const uint8_t* src_argb4444,
@@ -355,7 +355,6 @@ void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555,
__m128i const_129 = __lsx_vldi(129);
__m128i const_25 = __lsx_vldi(25);
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
- __m128i shuff = {0x0B030A0209010800, 0x0F070E060D050C04};
for (x = 0; x < len; x++) {
src0 = __lsx_vld(src_argb1555, 0);
@@ -384,8 +383,7 @@ void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555,
reg1 = __lsx_vmaddwod_h_bu(reg1, tmpg, const_129);
reg0 = __lsx_vmaddwev_h_bu(reg0, tmpr, const_66);
reg1 = __lsx_vmaddwod_h_bu(reg1, tmpr, const_66);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
- dst0 = __lsx_vshuf_b(dst0, dst0, shuff);
+ dst0 = __lsx_vpackod_b(reg1, reg0);
__lsx_vst(dst0, dst_y, 0);
dst_y += 16;
src_argb1555 += 32;
@@ -468,7 +466,6 @@ void RGB565ToYRow_LSX(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
__m128i const_129 = __lsx_vldi(129);
__m128i const_25 = __lsx_vldi(25);
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
- __m128i shuff = {0x0B030A0209010800, 0x0F070E060D050C04};
for (x = 0; x < len; x++) {
src0 = __lsx_vld(src_rgb565, 0);
@@ -495,8 +492,7 @@ void RGB565ToYRow_LSX(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
reg1 = __lsx_vmaddwod_h_bu(reg1, tmpg, const_129);
reg0 = __lsx_vmaddwev_h_bu(reg0, tmpr, const_66);
reg1 = __lsx_vmaddwod_h_bu(reg1, tmpr, const_66);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
- dst0 = __lsx_vshuf_b(dst0, dst0, shuff);
+ dst0 = __lsx_vpackod_b(reg1, reg0);
__lsx_vst(dst0, dst_y, 0);
dst_y += 16;
src_rgb565 += 32;
@@ -591,7 +587,7 @@ void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
+ dst0 = __lsx_vpickod_b(reg1, reg0);
__lsx_vst(dst0, dst_y, 0);
dst_y += 16;
src_rgb24 += 48;
@@ -939,7 +935,7 @@ void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
reg1 = __lsx_vmaddwev_h_bu(const_128, tmp3, const_150);
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
+ dst0 = __lsx_vpickod_b(reg1, reg0);
__lsx_vst(dst0, dst_y, 0);
dst_y += 16;
src_argb += 64;
@@ -1228,7 +1224,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb,
reg1 = __lsx_vmsub_h(reg1, const_53, tmpg);
reg0 = __lsx_vmsub_h(reg0, const_21, tmpr);
reg1 = __lsx_vmsub_h(reg1, const_10, tmpb);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
+ dst0 = __lsx_vpickod_b(reg1, reg0);
__lsx_vstelm_d(dst0, dst_u, 0, 0);
__lsx_vstelm_d(dst0, dst_v, 0, 1);
dst_u += 8;