diff options
author | Hao Chen <chenhao@loongson.cn> | 2021-12-20 20:14:11 +0800 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2022-01-21 01:34:38 +0000 |
commit | dfe046d27255cff06fc4cfe42c6d373fd83bc2aa (patch) | |
tree | ce440885c31987ee6177ead9edc5aa2be7439695 /source/convert.cc | |
parent | de8ae8c679f5a42fb9f9f65318d6cb95112180d6 (diff) | |
download | libyuv-dfe046d27255cff06fc4cfe42c6d373fd83bc2aa.tar.gz |
Add optimization functions in row_lsx.cc file.
Optimize 44 functions in source/row_lsx.cc file.
All test cases passed on loongarch platform.
Bug: libyuv:913
Change-Id: Ic80a5751314adc2e9bd435f2bbd928ab017a90f9
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3351467
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/convert.cc')
-rw-r--r-- | source/convert.cc | 120 |
1 files changed, 107 insertions, 13 deletions
diff --git a/source/convert.cc b/source/convert.cc index 1e524de3..67bcca80 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -644,6 +644,14 @@ int I422ToNV21(const uint8_t* src_y, } } #endif +#if defined(HAS_MERGEUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MergeUVRow = MergeUVRow_Any_LSX; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_LSX; + } + } +#endif #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; @@ -684,6 +692,14 @@ int I422ToNV21(const uint8_t* src_y, } } #endif +#if defined(HAS_INTERPOLATEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + InterpolateRow = InterpolateRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + InterpolateRow = InterpolateRow_LSX; + } + } +#endif if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, halfwidth, height); @@ -1562,6 +1578,16 @@ int BGRAToI420(const uint8_t* src_bgra, } } #endif +#if defined(HAS_BGRATOYROW_LSX) && defined(HAS_BGRATOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + BGRAToYRow = BGRAToYRow_Any_LSX; + BGRAToUVRow = BGRAToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + BGRAToYRow = BGRAToYRow_LSX; + BGRAToUVRow = BGRAToUVRow_LSX; + } + } +#endif for (y = 0; y < height - 1; y += 2) { BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width); @@ -1676,6 +1702,16 @@ int ABGRToI420(const uint8_t* src_abgr, } } #endif +#if defined(HAS_ABGRTOYROW_LSX) && defined(HAS_ABGRTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYRow = ABGRToYRow_Any_LSX; + ABGRToUVRow = ABGRToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_LSX; + ABGRToUVRow = ABGRToUVRow_LSX; + } + } +#endif for (y = 0; y < height - 1; y += 2) { ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); @@ -1774,6 +1810,16 @@ int RGBAToI420(const uint8_t* src_rgba, } } #endif +#if defined(HAS_RGBATOYROW_LSX) && defined(HAS_RGBATOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGBAToYRow = RGBAToYRow_Any_LSX; + RGBAToUVRow = RGBAToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGBAToYRow = RGBAToYRow_LSX; + RGBAToUVRow = RGBAToUVRow_LSX; + } + } +#endif for (y = 0; y < height - 1; y += 2) { RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); @@ -1793,7 +1839,7 @@ int RGBAToI420(const uint8_t* src_rgba, // Enabled if 1 pass is available #if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_MMI)) + defined(HAS_RGB24TOYROW_MMI) || defined(HAS_RGB24TOYROW_LSX)) #define HAS_RGB24TOYROW #endif @@ -1872,6 +1918,16 @@ int RGB24ToI420(const uint8_t* src_rgb24, } } #endif +#if defined(HAS_RGB24TOYROW_LSX) && defined(HAS_RGB24TOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB24ToUVRow = RGB24ToUVRow_Any_LSX; + RGB24ToYRow = RGB24ToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB24ToYRow = RGB24ToYRow_LSX; + RGB24ToUVRow = RGB24ToUVRow_LSX; + } + } +#endif // Other platforms do intermediate conversion from RGB24 to ARGB. #else // HAS_RGB24TOYROW @@ -2131,7 +2187,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24, // Enabled if 1 pass is available #if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_MMI)) + defined(HAS_RAWTOYROW_MMI) || defined(HAS_RAWTOYROW_LSX)) #define HAS_RAWTOYROW #endif @@ -2209,6 +2265,16 @@ int RAWToI420(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTOYROW_LSX) && defined(HAS_RAWTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RAWToUVRow = RAWToUVRow_Any_LSX; + RAWToYRow = RAWToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToYRow = RAWToYRow_LSX; + RAWToUVRow = RAWToUVRow_LSX; + } + } +#endif // Other platforms do intermediate conversion from RAW to ARGB. #else // HAS_RAWTOYROW @@ -2480,7 +2546,7 @@ int RGB565ToI420(const uint8_t* src_rgb565, int height) { int y; #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_MMI)) + defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_LSX)) void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width) = RGB565ToUVRow_C; @@ -2518,7 +2584,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, } } // MMI and MSA version does direct RGB565 to YUV. -#elif (defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_MSA)) +#elif (defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_MSA) \ + || defined(HAS_RGB565TOYROW_LSX)) #if defined(HAS_RGB565TOYROW_MMI) && defined(HAS_RGB565TOUVROW_MMI) if (TestCpuFlag(kCpuHasMMI)) { RGB565ToUVRow = RGB565ToUVRow_Any_MMI; @@ -2541,6 +2608,16 @@ int RGB565ToI420(const uint8_t* src_rgb565, } } #endif +#if defined(HAS_RGB565TOYROW_LSX) && defined(HAS_RGB565TOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB565ToUVRow = RGB565ToUVRow_Any_LSX; + RGB565ToYRow = RGB565ToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB565ToYRow = RGB565ToYRow_LSX; + RGB565ToUVRow = RGB565ToUVRow_LSX; + } + } +#endif // Other platforms do intermediate conversion from RGB565 to ARGB. #else #if defined(HAS_RGB565TOARGBROW_SSE2) @@ -2594,14 +2671,14 @@ int RGB565ToI420(const uint8_t* src_rgb565, #endif { #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_MMI)) + defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_LSX)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_MMI)) + defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_LSX)) RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); @@ -2619,7 +2696,7 @@ int RGB565ToI420(const uint8_t* src_rgb565, } if (height & 1) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_MMI)) + defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_LSX)) RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); #else @@ -2629,7 +2706,7 @@ int RGB565ToI420(const uint8_t* src_rgb565, #endif } #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_MMI)) + defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_LSX)) free_aligned_buffer_64(row); #endif } @@ -2650,7 +2727,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, int height) { int y; #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_MMI)) + defined(HAS_ARGB1555TOYROW_MMI) || defined(HAS_ARGB1555TOYROW_LSX)) void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGB1555ToUVRow_C; @@ -2712,6 +2789,15 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } } #endif +#elif (defined(HAS_ARGB1555TOYROW_LSX) && defined(HAS_ARGB1555TOUVROW_LSX)) + if (TestCpuFlag(kCpuHasLSX)) { + ARGB1555ToUVRow = ARGB1555ToUVRow_Any_LSX; + ARGB1555ToYRow = ARGB1555ToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGB1555ToYRow = ARGB1555ToYRow_LSX; + ARGB1555ToUVRow = ARGB1555ToUVRow_LSX; + } + } // Other platforms do intermediate conversion from ARGB1555 to ARGB. #else #if defined(HAS_ARGB1555TOARGBROW_SSE2) @@ -2765,7 +2851,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, #endif { #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_MMI)) + defined(HAS_ARGB1555TOYROW_MMI) || defined(HAS_ARGB1555TOYROW_LSX)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); @@ -2773,7 +2859,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_MMI)) + defined(HAS_ARGB1555TOYROW_MMI) || defined(HAS_ARGB1555TOYROW_LSX)) ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, @@ -2793,7 +2879,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } if (height & 1) { #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_MMI)) + defined(HAS_ARGB1555TOYROW_MMI) || defined(HAS_ARGB1555TOYROW_LSX)) ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); #else @@ -2803,7 +2889,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, #endif } #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_MMI)) + defined(HAS_ARGB1555TOYROW_MMI) || defined(HAS_ARGB1555TOYROW_LSX)) free_aligned_buffer_64(row); #endif } @@ -2898,6 +2984,14 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, } } #endif +#if defined(HAS_ARGB4444TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYRow = ARGBToYRow_Any_SSSE3; |