aboutsummaryrefslogtreecommitdiff
path: root/source/convert.cc
diff options
context:
space:
mode:
authorHao Chen <chenhao@loongson.cn>2021-12-20 20:14:11 +0800
committerFrank Barchard <fbarchard@chromium.org>2022-01-21 01:34:38 +0000
commitdfe046d27255cff06fc4cfe42c6d373fd83bc2aa (patch)
treece440885c31987ee6177ead9edc5aa2be7439695 /source/convert.cc
parentde8ae8c679f5a42fb9f9f65318d6cb95112180d6 (diff)
downloadlibyuv-dfe046d27255cff06fc4cfe42c6d373fd83bc2aa.tar.gz
Add optimization functions in row_lsx.cc file.
Optimize 44 functions in source/row_lsx.cc file. All test cases passed on loongarch platform. Bug: libyuv:913 Change-Id: Ic80a5751314adc2e9bd435f2bbd928ab017a90f9 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3351467 Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/convert.cc')
-rw-r--r--source/convert.cc120
1 files changed, 107 insertions, 13 deletions
diff --git a/source/convert.cc b/source/convert.cc
index 1e524de3..67bcca80 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -644,6 +644,14 @@ int I422ToNV21(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_MERGEUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MergeUVRow = MergeUVRow_Any_LSX;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ MergeUVRow = MergeUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
@@ -684,6 +692,14 @@ int I422ToNV21(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ InterpolateRow = InterpolateRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_LSX;
+ }
+ }
+#endif
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, halfwidth, height);
@@ -1562,6 +1578,16 @@ int BGRAToI420(const uint8_t* src_bgra,
}
}
#endif
+#if defined(HAS_BGRATOYROW_LSX) && defined(HAS_BGRATOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ BGRAToYRow = BGRAToYRow_Any_LSX;
+ BGRAToUVRow = BGRAToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ BGRAToYRow = BGRAToYRow_LSX;
+ BGRAToUVRow = BGRAToUVRow_LSX;
+ }
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
@@ -1676,6 +1702,16 @@ int ABGRToI420(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_ABGRTOYROW_LSX) && defined(HAS_ABGRTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ABGRToYRow = ABGRToYRow_Any_LSX;
+ ABGRToUVRow = ABGRToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYRow = ABGRToYRow_LSX;
+ ABGRToUVRow = ABGRToUVRow_LSX;
+ }
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
@@ -1774,6 +1810,16 @@ int RGBAToI420(const uint8_t* src_rgba,
}
}
#endif
+#if defined(HAS_RGBATOYROW_LSX) && defined(HAS_RGBATOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RGBAToYRow = RGBAToYRow_Any_LSX;
+ RGBAToUVRow = RGBAToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToYRow = RGBAToYRow_LSX;
+ RGBAToUVRow = RGBAToUVRow_LSX;
+ }
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
@@ -1793,7 +1839,7 @@ int RGBAToI420(const uint8_t* src_rgba,
// Enabled if 1 pass is available
#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
- defined(HAS_RGB24TOYROW_MMI))
+ defined(HAS_RGB24TOYROW_MMI) || defined(HAS_RGB24TOYROW_LSX))
#define HAS_RGB24TOYROW
#endif
@@ -1872,6 +1918,16 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
}
#endif
+#if defined(HAS_RGB24TOYROW_LSX) && defined(HAS_RGB24TOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RGB24ToUVRow = RGB24ToUVRow_Any_LSX;
+ RGB24ToYRow = RGB24ToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToYRow = RGB24ToYRow_LSX;
+ RGB24ToUVRow = RGB24ToUVRow_LSX;
+ }
+ }
+#endif
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else // HAS_RGB24TOYROW
@@ -2131,7 +2187,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
// Enabled if 1 pass is available
#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
- defined(HAS_RAWTOYROW_MMI))
+ defined(HAS_RAWTOYROW_MMI) || defined(HAS_RAWTOYROW_LSX))
#define HAS_RAWTOYROW
#endif
@@ -2209,6 +2265,16 @@ int RAWToI420(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTOYROW_LSX) && defined(HAS_RAWTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RAWToUVRow = RAWToUVRow_Any_LSX;
+ RAWToYRow = RAWToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToYRow = RAWToYRow_LSX;
+ RAWToUVRow = RAWToUVRow_LSX;
+ }
+ }
+#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYROW
@@ -2480,7 +2546,7 @@ int RGB565ToI420(const uint8_t* src_rgb565,
int height) {
int y;
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
- defined(HAS_RGB565TOYROW_MMI))
+ defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_LSX))
void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565,
uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB565ToUVRow_C;
@@ -2518,7 +2584,8 @@ int RGB565ToI420(const uint8_t* src_rgb565,
}
}
// MMI and MSA version does direct RGB565 to YUV.
-#elif (defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_MSA))
+#elif (defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_MSA) \
+ || defined(HAS_RGB565TOYROW_LSX))
#if defined(HAS_RGB565TOYROW_MMI) && defined(HAS_RGB565TOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGB565ToUVRow = RGB565ToUVRow_Any_MMI;
@@ -2541,6 +2608,16 @@ int RGB565ToI420(const uint8_t* src_rgb565,
}
}
#endif
+#if defined(HAS_RGB565TOYROW_LSX) && defined(HAS_RGB565TOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RGB565ToUVRow = RGB565ToUVRow_Any_LSX;
+ RGB565ToYRow = RGB565ToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RGB565ToYRow = RGB565ToYRow_LSX;
+ RGB565ToUVRow = RGB565ToUVRow_LSX;
+ }
+ }
+#endif
// Other platforms do intermediate conversion from RGB565 to ARGB.
#else
#if defined(HAS_RGB565TOARGBROW_SSE2)
@@ -2594,14 +2671,14 @@ int RGB565ToI420(const uint8_t* src_rgb565,
#endif
{
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
- defined(HAS_RGB565TOYROW_MMI))
+ defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_LSX))
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
- defined(HAS_RGB565TOYROW_MMI))
+ defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_LSX))
RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
RGB565ToYRow(src_rgb565, dst_y, width);
RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
@@ -2619,7 +2696,7 @@ int RGB565ToI420(const uint8_t* src_rgb565,
}
if (height & 1) {
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
- defined(HAS_RGB565TOYROW_MMI))
+ defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_LSX))
RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
RGB565ToYRow(src_rgb565, dst_y, width);
#else
@@ -2629,7 +2706,7 @@ int RGB565ToI420(const uint8_t* src_rgb565,
#endif
}
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
- defined(HAS_RGB565TOYROW_MMI))
+ defined(HAS_RGB565TOYROW_MMI) || defined(HAS_RGB565TOYROW_LSX))
free_aligned_buffer_64(row);
#endif
}
@@ -2650,7 +2727,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
int height) {
int y;
#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
- defined(HAS_ARGB1555TOYROW_MMI))
+ defined(HAS_ARGB1555TOYROW_MMI) || defined(HAS_ARGB1555TOYROW_LSX))
void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGB1555ToUVRow_C;
@@ -2712,6 +2789,15 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
}
}
#endif
+#elif (defined(HAS_ARGB1555TOYROW_LSX) && defined(HAS_ARGB1555TOUVROW_LSX))
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGB1555ToUVRow = ARGB1555ToUVRow_Any_LSX;
+ ARGB1555ToYRow = ARGB1555ToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB1555ToYRow = ARGB1555ToYRow_LSX;
+ ARGB1555ToUVRow = ARGB1555ToUVRow_LSX;
+ }
+ }
// Other platforms do intermediate conversion from ARGB1555 to ARGB.
#else
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
@@ -2765,7 +2851,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
#endif
{
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
- defined(HAS_ARGB1555TOYROW_MMI))
+ defined(HAS_ARGB1555TOYROW_MMI) || defined(HAS_ARGB1555TOYROW_LSX))
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
@@ -2773,7 +2859,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
- defined(HAS_ARGB1555TOYROW_MMI))
+ defined(HAS_ARGB1555TOYROW_MMI) || defined(HAS_ARGB1555TOYROW_LSX))
ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
ARGB1555ToYRow(src_argb1555, dst_y, width);
ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
@@ -2793,7 +2879,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
}
if (height & 1) {
#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
- defined(HAS_ARGB1555TOYROW_MMI))
+ defined(HAS_ARGB1555TOYROW_MMI) || defined(HAS_ARGB1555TOYROW_LSX))
ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
ARGB1555ToYRow(src_argb1555, dst_y, width);
#else
@@ -2803,7 +2889,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
#endif
}
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
- defined(HAS_ARGB1555TOYROW_MMI))
+ defined(HAS_ARGB1555TOYROW_MMI) || defined(HAS_ARGB1555TOYROW_LSX))
free_aligned_buffer_64(row);
#endif
}
@@ -2898,6 +2984,14 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
}
}
#endif
+#if defined(HAS_ARGB4444TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;