diff options
author | Hao Chen <chenhao@loongson.cn> | 2021-12-20 20:14:11 +0800 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2022-01-21 01:34:38 +0000 |
commit | dfe046d27255cff06fc4cfe42c6d373fd83bc2aa (patch) | |
tree | ce440885c31987ee6177ead9edc5aa2be7439695 /source/planar_functions.cc | |
parent | de8ae8c679f5a42fb9f9f65318d6cb95112180d6 (diff) | |
download | libyuv-dfe046d27255cff06fc4cfe42c6d373fd83bc2aa.tar.gz |
Add optimization functions in row_lsx.cc file.
Optimize 44 functions in source/row_lsx.cc file.
All test cases passed on loongarch platform.
Bug: libyuv:913
Change-Id: Ic80a5751314adc2e9bd435f2bbd928ab017a90f9
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3351467
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/planar_functions.cc')
-rw-r--r-- | source/planar_functions.cc | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/source/planar_functions.cc b/source/planar_functions.cc index af555338..03a16c69 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -466,6 +466,14 @@ void SplitUVPlane(const uint8_t* src_uv, } } #endif +#if defined(HAS_SPLITUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SplitUVRow = SplitUVRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + SplitUVRow = SplitUVRow_LSX; + } + } +#endif for (y = 0; y < height; ++y) { // Copy a row of UV. @@ -541,6 +549,14 @@ void MergeUVPlane(const uint8_t* src_u, } } #endif +#if defined(HAS_MERGEUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MergeUVRow = MergeUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + MergeUVRow = MergeUVRow_LSX; + } + } +#endif for (y = 0; y < height; ++y) { // Merge a row of U and V into a row of UV. @@ -2322,6 +2338,11 @@ ARGBBlendRow GetARGBBlend() { ARGBBlendRow = ARGBBlendRow_MSA; } #endif +#if defined(HAS_ARGBBLENDROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBBlendRow = ARGBBlendRow_LSX; + } +#endif return ARGBBlendRow; } @@ -2904,6 +2925,14 @@ int RAWToRGB24(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTORGB24ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RAWToRGB24Row = RAWToRGB24Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToRGB24Row = RAWToRGB24Row_LSX; + } + } +#endif for (y = 0; y < height; ++y) { RAWToRGB24Row(src_raw, dst_rgb24, width); @@ -2958,6 +2987,14 @@ void SetPlane(uint8_t* dst_y, SetRow = SetRow_MSA; } #endif +#if defined(HAS_SETROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SetRow = SetRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + SetRow = SetRow_LSX; + } + } +#endif // Set plane for (y = 0; y < height; ++y) { @@ -3055,6 +3092,14 @@ int ARGBRect(uint8_t* dst_argb, } } #endif +#if defined(HAS_ARGBSETROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBSetRow = ARGBSetRow_Any_LSX; + if (IS_ALIGNED(width, 4)) { + ARGBSetRow = ARGBSetRow_LSX; + } + } +#endif // Set plane for (y = 0; y < height; ++y) { @@ -3423,6 +3468,11 @@ int ARGBColorMatrix(const uint8_t* src_argb, ARGBColorMatrixRow = ARGBColorMatrixRow_MSA; } #endif +#if defined(HAS_ARGBCOLORMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { + ARGBColorMatrixRow = ARGBColorMatrixRow_LSX; + } +#endif for (y = 0; y < height; ++y) { ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width); src_argb += src_stride_argb; @@ -3588,6 +3638,11 @@ int ARGBQuantize(uint8_t* dst_argb, ARGBQuantizeRow = ARGBQuantizeRow_MSA; } #endif +#if defined(HAS_ARGBQUANTIZEROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { + ARGBQuantizeRow = ARGBQuantizeRow_LSX; + } +#endif for (y = 0; y < height; ++y) { ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width); dst += dst_stride_argb; @@ -3881,6 +3936,14 @@ int InterpolatePlane(const uint8_t* src0, } } #endif +#if defined(HAS_INTERPOLATEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + InterpolateRow = InterpolateRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + InterpolateRow = InterpolateRow_LSX; + } + } +#endif for (y = 0; y < height; ++y) { InterpolateRow(dst, src0, src1 - src0, width, interpolation); @@ -4243,6 +4306,14 @@ static int ARGBSobelize(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYJRow = ARGBToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_LSX; + } + } +#endif #if defined(HAS_SOBELYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { @@ -4374,6 +4445,14 @@ int ARGBSobel(const uint8_t* src_argb, } } #endif +#if defined(HAS_SOBELROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SobelRow = SobelRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + SobelRow = SobelRow_LSX; + } + } +#endif return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height, SobelRow); } @@ -4420,6 +4499,14 @@ int ARGBSobelToPlane(const uint8_t* src_argb, } } #endif +#if defined(HAS_SOBELTOPLANEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SobelToPlaneRow = SobelToPlaneRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + SobelToPlaneRow = SobelToPlaneRow_LSX; + } + } +#endif return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width, height, SobelToPlaneRow); } @@ -4467,6 +4554,14 @@ int ARGBSobelXY(const uint8_t* src_argb, } } #endif +#if defined(HAS_SOBELXYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SobelXYRow = SobelXYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + SobelXYRow = SobelXYRow_LSX; + } + } +#endif return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height, SobelXYRow); } @@ -4590,6 +4685,14 @@ int HalfFloatPlane(const uint16_t* src_y, } } #endif +#if defined(HAS_HALFFLOATROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + HalfFloatRow = HalfFloatRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + HalfFloatRow = HalfFloatRow_LSX; + } + } +#endif for (y = 0; y < height; ++y) { HalfFloatRow(src_y, dst_y, scale, width); @@ -4776,6 +4879,12 @@ int ARGBExtractAlpha(const uint8_t* src_argb, : ARGBExtractAlphaRow_Any_MSA; } #endif +#if defined(HAS_ARGBEXTRACTALPHAROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_LSX + : ARGBExtractAlphaRow_Any_LSX; + } +#endif for (int y = 0; y < height; ++y) { ARGBExtractAlphaRow(src_argb, dst_a, width); @@ -4912,6 +5021,14 @@ int YUY2ToNV12(const uint8_t* src_yuy2, } } #endif +#if defined(HAS_SPLITUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SplitUVRow = SplitUVRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + SplitUVRow = SplitUVRow_LSX; + } + } +#endif #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; @@ -4952,6 +5069,14 @@ int YUY2ToNV12(const uint8_t* src_yuy2, } } #endif +#if defined(HAS_INTERPOLATEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + InterpolateRow = InterpolateRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + InterpolateRow = InterpolateRow_LSX; + } + } +#endif { int awidth = halfwidth * 2; @@ -5044,6 +5169,14 @@ int UYVYToNV12(const uint8_t* src_uyvy, } } #endif +#if defined(HAS_SPLITUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SplitUVRow = SplitUVRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + SplitUVRow = SplitUVRow_LSX; + } + } +#endif #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; @@ -5084,6 +5217,14 @@ int UYVYToNV12(const uint8_t* src_uyvy, } } #endif +#if defined(HAS_INTERPOLATEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + InterpolateRow = InterpolateRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + InterpolateRow = InterpolateRow_LSX; + } + } +#endif { int awidth = halfwidth * 2; |