diff options
author | Hao Chen <chenhao@loongson.cn> | 2022-02-24 13:39:55 +0800 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-03-09 08:52:54 +0000 |
commit | 91bae707e100c2e834ccd14e41704202877d8680 (patch) | |
tree | e07c71fcfd5e3eb9389a96345e635738c1faca03 /source/row_lsx.cc | |
parent | 42d76a342f9f0775d5f5fd47f7ef1a9ba6444074 (diff) | |
download | libyuv-91bae707e100c2e834ccd14e41704202877d8680.tar.gz |
Optimize functions for LASX in row_lasx.cc.
1. Optimize 18 functions in source/row_lasx.cc file.
2. Make small modifications to LSX.
3. Remove some unnecessary content.
Bug: libyuv:912
Change-Id: Ifd1d85366efb9cdb3b99491e30fa450ff1848661
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3507640
Reviewed-by: Mirko Bonadei <mbonadei@chromium.org>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/row_lsx.cc')
-rw-r--r-- | source/row_lsx.cc | 16 |
1 files changed, 6 insertions, 10 deletions
diff --git a/source/row_lsx.cc b/source/row_lsx.cc index a445e636..3e8b901a 100644 --- a/source/row_lsx.cc +++ b/source/row_lsx.cc @@ -152,7 +152,7 @@ extern "C" { _reg1 = __lsx_vmsub_h(_reg1, const_94, _tmpg); \ _reg0 = __lsx_vmsub_h(_reg0, const_38, _tmpr); \ _reg1 = __lsx_vmsub_h(_reg1, const_18, _tmpb); \ - _dst0 = __lsx_vsrlni_b_h(_reg1, _reg0, 8); \ + _dst0 = __lsx_vpickod_b(_reg1, _reg0); \ } void ARGB4444ToARGBRow_LSX(const uint8_t* src_argb4444, @@ -355,7 +355,6 @@ void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, __m128i const_129 = __lsx_vldi(129); __m128i const_25 = __lsx_vldi(25); __m128i const_1080 = {0x1080108010801080, 0x1080108010801080}; - __m128i shuff = {0x0B030A0209010800, 0x0F070E060D050C04}; for (x = 0; x < len; x++) { src0 = __lsx_vld(src_argb1555, 0); @@ -384,8 +383,7 @@ void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, reg1 = __lsx_vmaddwod_h_bu(reg1, tmpg, const_129); reg0 = __lsx_vmaddwev_h_bu(reg0, tmpr, const_66); reg1 = __lsx_vmaddwod_h_bu(reg1, tmpr, const_66); - dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8); - dst0 = __lsx_vshuf_b(dst0, dst0, shuff); + dst0 = __lsx_vpackod_b(reg1, reg0); __lsx_vst(dst0, dst_y, 0); dst_y += 16; src_argb1555 += 32; @@ -468,7 +466,6 @@ void RGB565ToYRow_LSX(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { __m128i const_129 = __lsx_vldi(129); __m128i const_25 = __lsx_vldi(25); __m128i const_1080 = {0x1080108010801080, 0x1080108010801080}; - __m128i shuff = {0x0B030A0209010800, 0x0F070E060D050C04}; for (x = 0; x < len; x++) { src0 = __lsx_vld(src_rgb565, 0); @@ -495,8 +492,7 @@ void RGB565ToYRow_LSX(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { reg1 = __lsx_vmaddwod_h_bu(reg1, tmpg, const_129); reg0 = __lsx_vmaddwev_h_bu(reg0, tmpr, const_66); reg1 = __lsx_vmaddwod_h_bu(reg1, tmpr, const_66); - dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8); - dst0 = __lsx_vshuf_b(dst0, dst0, shuff); + dst0 = __lsx_vpackod_b(reg1, reg0); __lsx_vst(dst0, dst_y, 0); dst_y += 16; src_rgb565 += 32; @@ -591,7 +587,7 @@ void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129); reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0); reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1); - dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8); + dst0 = __lsx_vpickod_b(reg1, reg0); __lsx_vst(dst0, dst_y, 0); dst_y += 16; src_rgb24 += 48; @@ -939,7 +935,7 @@ void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) { reg1 = __lsx_vmaddwev_h_bu(const_128, tmp3, const_150); reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0); reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2); - dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8); + dst0 = __lsx_vpickod_b(reg1, reg0); __lsx_vst(dst0, dst_y, 0); dst_y += 16; src_argb += 64; @@ -1228,7 +1224,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb, reg1 = __lsx_vmsub_h(reg1, const_53, tmpg); reg0 = __lsx_vmsub_h(reg0, const_21, tmpr); reg1 = __lsx_vmsub_h(reg1, const_10, tmpb); - dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8); + dst0 = __lsx_vpickod_b(reg1, reg0); __lsx_vstelm_d(dst0, dst_u, 0, 0); __lsx_vstelm_d(dst0, dst_v, 0, 1); dst_u += 8; |