diff options
author | Hao Chen <chenhao@loongson.cn> | 2022-02-24 13:39:55 +0800 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-03-09 08:52:54 +0000 |
commit | 91bae707e100c2e834ccd14e41704202877d8680 (patch) | |
tree | e07c71fcfd5e3eb9389a96345e635738c1faca03 /source/convert.cc | |
parent | 42d76a342f9f0775d5f5fd47f7ef1a9ba6444074 (diff) | |
download | libyuv-91bae707e100c2e834ccd14e41704202877d8680.tar.gz |
Optimize functions for LASX in row_lasx.cc.
1. Optimize 18 functions in source/row_lasx.cc file.
2. Make small modifications to LSX.
3. Remove some unnecessary content.
Bug: libyuv:912
Change-Id: Ifd1d85366efb9cdb3b99491e30fa450ff1848661
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3507640
Reviewed-by: Mirko Bonadei <mbonadei@chromium.org>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/convert.cc')
-rw-r--r-- | source/convert.cc | 77 |
1 files changed, 64 insertions, 13 deletions
diff --git a/source/convert.cc b/source/convert.cc index 875afb30..8f02636d 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -1883,6 +1883,16 @@ int RGB24ToI420(const uint8_t* src_rgb24, } } #endif +#if defined(HAS_RGB24TOYROW_LASX) && defined(HAS_RGB24TOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB24ToUVRow = RGB24ToUVRow_Any_LASX; + RGB24ToYRow = RGB24ToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB24ToYRow = RGB24ToYRow_LASX; + RGB24ToUVRow = RGB24ToUVRow_LASX; + } + } +#endif // Other platforms do intermediate conversion from RGB24 to ARGB. #else // HAS_RGB24TOYROW @@ -2205,6 +2215,16 @@ int RAWToI420(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTOYROW_LASX) && defined(HAS_RAWTOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RAWToUVRow = RAWToUVRow_Any_LASX; + RAWToYRow = RAWToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RAWToYRow = RAWToYRow_LASX; + RAWToUVRow = RAWToUVRow_LASX; + } + } +#endif // Other platforms do intermediate conversion from RAW to ARGB. #else // HAS_RAWTOYROW @@ -2463,7 +2483,7 @@ int RGB565ToI420(const uint8_t* src_rgb565, int height) { int y; #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_LSX)) + defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width) = RGB565ToUVRow_C; @@ -2501,7 +2521,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, } } // MSA version does direct RGB565 to YUV. -#elif (defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_LSX)) +#elif (defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_LSX) \ + || defined(HAS_RGB565TOYROW_LASX)) #if defined(HAS_RGB565TOYROW_MSA) && defined(HAS_RGB565TOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB565ToUVRow = RGB565ToUVRow_Any_MSA; @@ -2522,6 +2543,16 @@ int RGB565ToI420(const uint8_t* src_rgb565, } } #endif +#if defined(HAS_RGB565TOYROW_LASX) && defined(HAS_RGB565TOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB565ToUVRow = RGB565ToUVRow_Any_LASX; + RGB565ToYRow = RGB565ToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB565ToYRow = RGB565ToYRow_LASX; + RGB565ToUVRow = RGB565ToUVRow_LASX; + } + } +#endif // Other platforms do intermediate conversion from RGB565 to ARGB. #else #if defined(HAS_RGB565TOARGBROW_SSE2) @@ -2575,14 +2606,14 @@ int RGB565ToI420(const uint8_t* src_rgb565, #endif { #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_LSX)) + defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_LSX)) + defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); @@ -2600,7 +2631,7 @@ int RGB565ToI420(const uint8_t* src_rgb565, } if (height & 1) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_LSX)) + defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); #else @@ -2610,7 +2641,7 @@ int RGB565ToI420(const uint8_t* src_rgb565, #endif } #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_LSX)) + defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) free_aligned_buffer_64(row); #endif } @@ -2631,7 +2662,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, int height) { int y; #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_LSX)) + defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGB1555ToUVRow_C; @@ -2670,7 +2701,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } } // MSA version does direct ARGB1555 to YUV. -#elif (defined(HAS_ARGB1555TOYROW_MSA)) +#elif (defined(HAS_ARGB1555TOYROW_MSA) || defined(HAS_ARGB1555TOYROW_LSX) \ + || defined(HAS_ARGB1555TOYROW_LASX)) #if defined(HAS_ARGB1555TOYROW_MSA) && defined(HAS_ARGB1555TOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MSA; @@ -2681,7 +2713,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } } #endif -#elif (defined(HAS_ARGB1555TOYROW_LSX) && defined(HAS_ARGB1555TOUVROW_LSX)) +#if defined(HAS_ARGB1555TOYROW_LSX) && defined(HAS_ARGB1555TOUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGB1555ToUVRow = ARGB1555ToUVRow_Any_LSX; ARGB1555ToYRow = ARGB1555ToYRow_Any_LSX; @@ -2690,6 +2722,17 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, ARGB1555ToUVRow = ARGB1555ToUVRow_LSX; } } +#endif +#if defined(HAS_ARGB1555TOYROW_LASX) && defined(HAS_ARGB1555TOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGB1555ToUVRow = ARGB1555ToUVRow_Any_LASX; + ARGB1555ToYRow = ARGB1555ToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGB1555ToYRow = ARGB1555ToYRow_LASX; + ARGB1555ToUVRow = ARGB1555ToUVRow_LASX; + } + } +#endif // Other platforms do intermediate conversion from ARGB1555 to ARGB. #else #if defined(HAS_ARGB1555TOARGBROW_SSE2) @@ -2743,7 +2786,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, #endif { #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_LSX)) + defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); @@ -2751,7 +2794,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_LSX)) + defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, @@ -2771,7 +2814,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } if (height & 1) { #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_LSX)) + defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); #else @@ -2781,7 +2824,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, #endif } #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_LSX)) + defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) free_aligned_buffer_64(row); #endif } @@ -2873,6 +2916,14 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, } } #endif +#if defined(HAS_ARGB4444TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_LASX; + } + } +#endif #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYRow = ARGBToYRow_Any_SSSE3; |