aboutsummaryrefslogtreecommitdiff
path: root/source/planar_functions.cc
diff options
context:
space:
mode:
authorHao Chen <chenhao@loongson.cn>2021-12-20 20:14:11 +0800
committerFrank Barchard <fbarchard@chromium.org>2022-01-21 01:34:38 +0000
commitdfe046d27255cff06fc4cfe42c6d373fd83bc2aa (patch)
treece440885c31987ee6177ead9edc5aa2be7439695 /source/planar_functions.cc
parentde8ae8c679f5a42fb9f9f65318d6cb95112180d6 (diff)
downloadlibyuv-dfe046d27255cff06fc4cfe42c6d373fd83bc2aa.tar.gz
Add optimization functions in row_lsx.cc file.
Optimize 44 functions in source/row_lsx.cc file. All test cases passed on loongarch platform. Bug: libyuv:913 Change-Id: Ic80a5751314adc2e9bd435f2bbd928ab017a90f9 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3351467 Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/planar_functions.cc')
-rw-r--r--source/planar_functions.cc141
1 files changed, 141 insertions, 0 deletions
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index af555338..03a16c69 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -466,6 +466,14 @@ void SplitUVPlane(const uint8_t* src_uv,
}
}
#endif
+#if defined(HAS_SPLITUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ SplitUVRow = SplitUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ SplitUVRow = SplitUVRow_LSX;
+ }
+ }
+#endif
for (y = 0; y < height; ++y) {
// Copy a row of UV.
@@ -541,6 +549,14 @@ void MergeUVPlane(const uint8_t* src_u,
}
}
#endif
+#if defined(HAS_MERGEUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MergeUVRow = MergeUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ MergeUVRow = MergeUVRow_LSX;
+ }
+ }
+#endif
for (y = 0; y < height; ++y) {
// Merge a row of U and V into a row of UV.
@@ -2322,6 +2338,11 @@ ARGBBlendRow GetARGBBlend() {
ARGBBlendRow = ARGBBlendRow_MSA;
}
#endif
+#if defined(HAS_ARGBBLENDROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBBlendRow = ARGBBlendRow_LSX;
+ }
+#endif
return ARGBBlendRow;
}
@@ -2904,6 +2925,14 @@ int RAWToRGB24(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTORGB24ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RAWToRGB24Row = RAWToRGB24Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToRGB24Row = RAWToRGB24Row_LSX;
+ }
+ }
+#endif
for (y = 0; y < height; ++y) {
RAWToRGB24Row(src_raw, dst_rgb24, width);
@@ -2958,6 +2987,14 @@ void SetPlane(uint8_t* dst_y,
SetRow = SetRow_MSA;
}
#endif
+#if defined(HAS_SETROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ SetRow = SetRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ SetRow = SetRow_LSX;
+ }
+ }
+#endif
// Set plane
for (y = 0; y < height; ++y) {
@@ -3055,6 +3092,14 @@ int ARGBRect(uint8_t* dst_argb,
}
}
#endif
+#if defined(HAS_ARGBSETROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBSetRow = ARGBSetRow_Any_LSX;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBSetRow = ARGBSetRow_LSX;
+ }
+ }
+#endif
// Set plane
for (y = 0; y < height; ++y) {
@@ -3423,6 +3468,11 @@ int ARGBColorMatrix(const uint8_t* src_argb,
ARGBColorMatrixRow = ARGBColorMatrixRow_MSA;
}
#endif
+#if defined(HAS_ARGBCOLORMATRIXROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
+ ARGBColorMatrixRow = ARGBColorMatrixRow_LSX;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
src_argb += src_stride_argb;
@@ -3588,6 +3638,11 @@ int ARGBQuantize(uint8_t* dst_argb,
ARGBQuantizeRow = ARGBQuantizeRow_MSA;
}
#endif
+#if defined(HAS_ARGBQUANTIZEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
+ ARGBQuantizeRow = ARGBQuantizeRow_LSX;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
dst += dst_stride_argb;
@@ -3881,6 +3936,14 @@ int InterpolatePlane(const uint8_t* src0,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ InterpolateRow = InterpolateRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_LSX;
+ }
+ }
+#endif
for (y = 0; y < height; ++y) {
InterpolateRow(dst, src0, src1 - src0, width, interpolation);
@@ -4243,6 +4306,14 @@ static int ARGBSobelize(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYJRow = ARGBToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_SOBELYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
@@ -4374,6 +4445,14 @@ int ARGBSobel(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_SOBELROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ SobelRow = SobelRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ SobelRow = SobelRow_LSX;
+ }
+ }
+#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width, height, SobelRow);
}
@@ -4420,6 +4499,14 @@ int ARGBSobelToPlane(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_SOBELTOPLANEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ SobelToPlaneRow = SobelToPlaneRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ SobelToPlaneRow = SobelToPlaneRow_LSX;
+ }
+ }
+#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
height, SobelToPlaneRow);
}
@@ -4467,6 +4554,14 @@ int ARGBSobelXY(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_SOBELXYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ SobelXYRow = SobelXYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ SobelXYRow = SobelXYRow_LSX;
+ }
+ }
+#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width, height, SobelXYRow);
}
@@ -4590,6 +4685,14 @@ int HalfFloatPlane(const uint16_t* src_y,
}
}
#endif
+#if defined(HAS_HALFFLOATROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ HalfFloatRow = HalfFloatRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ HalfFloatRow = HalfFloatRow_LSX;
+ }
+ }
+#endif
for (y = 0; y < height; ++y) {
HalfFloatRow(src_y, dst_y, scale, width);
@@ -4776,6 +4879,12 @@ int ARGBExtractAlpha(const uint8_t* src_argb,
: ARGBExtractAlphaRow_Any_MSA;
}
#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_LSX
+ : ARGBExtractAlphaRow_Any_LSX;
+ }
+#endif
for (int y = 0; y < height; ++y) {
ARGBExtractAlphaRow(src_argb, dst_a, width);
@@ -4912,6 +5021,14 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
}
}
#endif
+#if defined(HAS_SPLITUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ SplitUVRow = SplitUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ SplitUVRow = SplitUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
@@ -4952,6 +5069,14 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ InterpolateRow = InterpolateRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_LSX;
+ }
+ }
+#endif
{
int awidth = halfwidth * 2;
@@ -5044,6 +5169,14 @@ int UYVYToNV12(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_SPLITUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ SplitUVRow = SplitUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ SplitUVRow = SplitUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
@@ -5084,6 +5217,14 @@ int UYVYToNV12(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ InterpolateRow = InterpolateRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_LSX;
+ }
+ }
+#endif
{
int awidth = halfwidth * 2;