Add optimization functions in row_lasx.cc file.

Optimize 32 functions in source/row_lasx.cc file. All test cases passed on loongarch platform. Bug: libyuv:912 Signed-off-by: Hao Chen <chenhao@loongson.cn> Change-Id: I7d3f649f753f72ca9bd052d5e0562dbc6f6ccfed Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3351466 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
author: Hao Chen <chenhao@loongson.cn> 2021-12-20 19:57:26 +0800
committer: Frank Barchard <fbarchard@chromium.org> 2022-01-21 01:34:38 +0000
commit: de8ae8c679f5a42fb9f9f65318d6cb95112180d6 (patch)
tree: 4f504ae4587084990aa39a10f820591f40ff30ed
parent: 51de1e16f20bb93468d7c538629b40ece8420b71 (diff)
download: libyuv-de8ae8c679f5a42fb9f9f65318d6cb95112180d6.tar.gz
9 files changed, 1720 insertions, 0 deletions
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 37825a39..957eb587 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -689,6 +689,38 @@ extern "C" {
 #define HAS_I422TOARGBROW_LASX
 #define HAS_I422TORGBAROW_LASX
 #define HAS_I422ALPHATOARGBROW_LASX
+#define HAS_I422TOYUY2ROW_LASX
+#define HAS_I422TOUYVYROW_LASX
+#define HAS_MIRRORROW_LASX
+#define HAS_MIRRORUVROW_LASX
+#define HAS_ARGBMIRRORROW_LASX
+#define HAS_I422TORGB24ROW_LASX
+#define HAS_I422TORGB565ROW_LASX
+#define HAS_I422TOARGB4444ROW_LASX
+#define HAS_I422TOARGB1555ROW_LASX
+#define HAS_YUY2TOUVROW_LASX
+#define HAS_YUY2TOYROW_LASX
+#define HAS_YUY2TOUV422ROW_LASX
+#define HAS_UYVYTOYROW_LASX
+#define HAS_UYVYTOUVROW_LASX
+#define HAS_UYVYTOUV422ROW_LASX
+#define HAS_ARGBTOYROW_LASX
+#define HAS_ARGBTOUVROW_LASX
+#define HAS_ARGBTORGB24ROW_LASX
+#define HAS_ARGBTORAWROW_LASX
+#define HAS_ARGBTORGB565ROW_LASX
+#define HAS_ARGBTOARGB1555ROW_LASX
+#define HAS_ARGBTOARGB4444ROW_LASX
+#define HAS_ARGBTOUV444ROW_LASX
+#define HAS_ARGBMULTIPLYROW_LASX
+#define HAS_ARGBADDROW_LASX
+#define HAS_ARGBSUBTRACTROW_LASX
+#define HAS_ARGBATTENUATEROW_LASX
+#define HAS_ARGBTORGB565DITHERROW_LASX
+#define HAS_ARGBSHUFFLEROW_LASX
+#define HAS_ARGBSHADEROW_LASX
+#define HAS_ARGBGRAYROW_LASX
+#define HAS_ARGBSEPIAROW_LASX
 #endif
 
 #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
@@ -1005,24 +1037,48 @@ void I422ToRGB24Row_MSA(const uint8_t* src_y,
                         uint8_t* dst_argb,
                         const struct YuvConstants* yuvconstants,
                         int width);
+void I422ToRGB24Row_LASX(const uint8_t* src_y,
+                         const uint8_t* src_u,
+                         const uint8_t* src_v,
+                         uint8_t* dst_argb,
+                         const struct YuvConstants* yuvconstants,
+                         int width);
 void I422ToRGB565Row_MSA(const uint8_t* src_y,
                          const uint8_t* src_u,
                          const uint8_t* src_v,
                          uint8_t* dst_rgb565,
                          const struct YuvConstants* yuvconstants,
                          int width);
+void I422ToRGB565Row_LASX(const uint8_t* src_y,
+                          const uint8_t* src_u,
+                          const uint8_t* src_v,
+                          uint8_t* dst_rgb565,
+                          const struct YuvConstants* yuvconstants,
+                          int width);
 void I422ToARGB4444Row_MSA(const uint8_t* src_y,
                            const uint8_t* src_u,
                            const uint8_t* src_v,
                            uint8_t* dst_argb4444,
                            const struct YuvConstants* yuvconstants,
                            int width);
+void I422ToARGB4444Row_LASX(const uint8_t* src_y,
+                            const uint8_t* src_u,
+                            const uint8_t* src_v,
+                            uint8_t* dst_argb4444,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
 void I422ToARGB1555Row_MSA(const uint8_t* src_y,
                            const uint8_t* src_u,
                            const uint8_t* src_v,
                            uint8_t* dst_argb1555,
                            const struct YuvConstants* yuvconstants,
                            int width);
+void I422ToARGB1555Row_LASX(const uint8_t* src_y,
+                            const uint8_t* src_u,
+                            const uint8_t* src_v,
+                            uint8_t* dst_argb1555,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
 void NV12ToARGBRow_MSA(const uint8_t* src_y,
                        const uint8_t* src_uv,
                        uint8_t* dst_argb,
@@ -1074,6 +1130,7 @@ void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
 void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
 void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
 void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
 void ARGBToUV444Row_NEON(const uint8_t* src_argb,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
@@ -1092,6 +1149,15 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb,
                      uint8_t* dst_u,
                      uint8_t* dst_v,
                      int width);
+void ARGBToUVRow_LASX(const uint8_t* src_argb,
+                      int src_stride_argb,
+                      uint8_t* dst_u,
+                      uint8_t* dst_v,
+                      int width);
+void ARGBToUV444Row_LASX(const uint8_t* src_argb,
+                         uint8_t* dst_u,
+                         uint8_t* dst_v,
+                         int width);
 void ARGBToUV444Row_MMI(const uint8_t* src_argb,
                         uint8_t* dst_u,
                         uint8_t* dst_v,
@@ -1341,6 +1407,8 @@ void ARGB4444ToYRow_Any_MMI(const uint8_t* src_ptr,
                             uint8_t* dst_ptr,
                             int width);
 
+void ARGBToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+
 void ARGBToUVRow_AVX2(const uint8_t* src_argb,
                       int src_stride_argb,
                       uint8_t* dst_u,
@@ -1439,6 +1507,15 @@ void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
+void ARGBToUVRow_Any_LASX(const uint8_t* src_ptr,
+                          int src_stride_ptr,
+                          uint8_t* dst_u,
+                          uint8_t* dst_v,
+                          int width);
+void ARGBToUV444Row_Any_LASX(const uint8_t* src_ptr,
+                             uint8_t* dst_u,
+                             uint8_t* dst_v,
+                             int width);
 void ARGBToUV444Row_Any_MMI(const uint8_t* src_ptr,
                             uint8_t* dst_u,
                             uint8_t* dst_v,
@@ -1678,6 +1755,7 @@ void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
 void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
 void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
 void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
 void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
 void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -1685,15 +1763,18 @@ void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
 void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void MirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width);
 void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width);
 void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_uv, int width);
 void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width);
+void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width);
 void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width);
 void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void MirrorUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 
 void MirrorSplitUVRow_SSSE3(const uint8_t* src,
                             uint8_t* dst_u,
@@ -1721,6 +1802,7 @@ void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
 void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
 void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
 void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
 void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr,
                             uint8_t* dst_ptr,
@@ -1733,6 +1815,7 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
                             int width);
 void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void ARGBMirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 
 void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24,
                           uint8_t* dst_rgb24,
@@ -2537,6 +2620,10 @@ void ARGBShuffleRow_MMI(const uint8_t* src_argb,
                         uint8_t* dst_argb,
                         const uint8_t* shuffler,
                         int width);
+void ARGBShuffleRow_LASX(const uint8_t* src_argb,
+                         uint8_t* dst_argb,
+                         const uint8_t* shuffler,
+                         int width);
 void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr,
                               uint8_t* dst_ptr,
                               const uint8_t* param,
@@ -2557,6 +2644,10 @@ void ARGBShuffleRow_Any_MMI(const uint8_t* src_ptr,
                             uint8_t* dst_ptr,
                             const uint8_t* param,
                             int width);
+void ARGBShuffleRow_Any_LASX(const uint8_t* src_ptr,
+                             uint8_t* dst_ptr,
+                             const uint8_t* param,
+                             int width);
 
 void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
                           uint8_t* dst_argb,
@@ -2777,6 +2868,20 @@ void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
                                uint8_t* dst_rgb,
                                const uint32_t dither4,
                                int width);
+void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb,
+                                uint8_t* dst_rgb,
+                                const uint32_t dither4,
+                                int width);
+
+void ARGBToRGB24Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRAWRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRGB565Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToARGB1555Row_LASX(const uint8_t* src_argb,
+                            uint8_t* dst_rgb,
+                            int width);
+void ARGBToARGB4444Row_LASX(const uint8_t* src_argb,
+                            uint8_t* dst_rgb,
+                            int width);
 
 void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
 void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@@ -3896,6 +4001,14 @@ void ARGBMultiplyRow_Any_MMI(const uint8_t* y_buf,
                              const uint8_t* uv_buf,
                              uint8_t* dst_ptr,
                              int width);
+void ARGBMultiplyRow_LASX(const uint8_t* src_argb0,
+                          const uint8_t* src_argb1,
+                          uint8_t* dst_argb,
+                          int width);
+void ARGBMultiplyRow_Any_LASX(const uint8_t* y_buf,
+                              const uint8_t* uv_buf,
+                              uint8_t* dst_ptr,
+                              int width);
 
 // ARGB add images.
 void ARGBAddRow_C(const uint8_t* src_argb,
@@ -3942,6 +4055,14 @@ void ARGBAddRow_Any_MMI(const uint8_t* y_buf,
                         const uint8_t* uv_buf,
                         uint8_t* dst_ptr,
                         int width);
+void ARGBAddRow_LASX(const uint8_t* src_argb0,
+                     const uint8_t* src_argb1,
+                     uint8_t* dst_argb,
+                     int width);
+void ARGBAddRow_Any_LASX(const uint8_t* y_buf,
+                         const uint8_t* uv_buf,
+                         uint8_t* dst_ptr,
+                         int width);
 
 // ARGB subtract images. Same API as Blend, but these require
 // pointer and width alignment for SSE2.
@@ -3989,6 +4110,14 @@ void ARGBSubtractRow_Any_MMI(const uint8_t* y_buf,
                              const uint8_t* uv_buf,
                              uint8_t* dst_ptr,
                              int width);
+void ARGBSubtractRow_LASX(const uint8_t* src_argb0,
+                          const uint8_t* src_argb1,
+                          uint8_t* dst_argb,
+                          int width);
+void ARGBSubtractRow_Any_LASX(const uint8_t* y_buf,
+                              const uint8_t* uv_buf,
+                              uint8_t* dst_ptr,
+                              int width);
 
 void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr,
                               uint8_t* dst_ptr,
@@ -4077,6 +4206,24 @@ void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr,
                                    uint8_t* dst_ptr,
                                    const uint32_t param,
                                    int width);
+void ARGBToRGB565DitherRow_Any_LASX(const uint8_t* src_ptr,
+                                    uint8_t* dst_ptr,
+                                    const uint32_t param,
+                                    int width);
+
+void ARGBToRGB24Row_Any_LASX(const uint8_t* src_ptr,
+                             uint8_t* dst_ptr,
+                             int width);
+void ARGBToRAWRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToRGB565Row_Any_LASX(const uint8_t* src_ptr,
+                              uint8_t* dst_ptr,
+                              int width);
+void ARGBToARGB1555Row_Any_LASX(const uint8_t* src_ptr,
+                                uint8_t* dst_ptr,
+                                int width);
+void ARGBToARGB4444Row_Any_LASX(const uint8_t* src_ptr,
+                                uint8_t* dst_ptr,
+                                int width);
 
 void ARGBToRGB24Row_Any_MMI(const uint8_t* src_ptr,
                             uint8_t* dst_ptr,
@@ -4291,24 +4438,48 @@ void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf,
                             uint8_t* dst_ptr,
                             const struct YuvConstants* yuvconstants,
                             int width);
+void I422ToRGB24Row_Any_LASX(const uint8_t* y_buf,
+                             const uint8_t* u_buf,
+                             const uint8_t* v_buf,
+                             uint8_t* dst_ptr,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
 void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf,
                              const uint8_t* u_buf,
                              const uint8_t* v_buf,
                              uint8_t* dst_ptr,
                              const struct YuvConstants* yuvconstants,
                              int width);
+void I422ToRGB565Row_Any_LASX(const uint8_t* y_buf,
+                              const uint8_t* u_buf,
+                              const uint8_t* v_buf,
+                              uint8_t* dst_ptr,
+                              const struct YuvConstants* yuvconstants,
+                              int width);
 void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf,
                                const uint8_t* u_buf,
                                const uint8_t* v_buf,
                                uint8_t* dst_ptr,
                                const struct YuvConstants* yuvconstants,
                                int width);
+void I422ToARGB4444Row_Any_LASX(const uint8_t* y_buf,
+                                const uint8_t* u_buf,
+                                const uint8_t* v_buf,
+                                uint8_t* dst_ptr,
+                                const struct YuvConstants* yuvconstants,
+                                int width);
 void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf,
                                const uint8_t* u_buf,
                                const uint8_t* v_buf,
                                uint8_t* dst_ptr,
                                const struct YuvConstants* yuvconstants,
                                int width);
+void I422ToARGB1555Row_Any_LASX(const uint8_t* y_buf,
+                                const uint8_t* u_buf,
+                                const uint8_t* v_buf,
+                                uint8_t* dst_ptr,
+                                const struct YuvConstants* yuvconstants,
+                                int width);
 void NV12ToARGBRow_Any_MSA(const uint8_t* y_buf,
                            const uint8_t* uv_buf,
                            uint8_t* dst_ptr,
@@ -4365,11 +4536,17 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
                          int width);
 void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
 void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToYRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
 void YUY2ToUVRow_MSA(const uint8_t* src_yuy2,
                      int src_stride_yuy2,
                      uint8_t* dst_u,
                      uint8_t* dst_v,
                      int width);
+void YUY2ToUVRow_LASX(const uint8_t* src_yuy2,
+                      int src_stride_yuy2,
+                      uint8_t* dst_u,
+                      uint8_t* dst_v,
+                      int width);
 void YUY2ToUVRow_MMI(const uint8_t* src_yuy2,
                      int src_stride_yuy2,
                      uint8_t* dst_u,
@@ -4379,6 +4556,10 @@ void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2,
                         uint8_t* dst_u,
                         uint8_t* dst_v,
                         int width);
+void YUY2ToUV422Row_LASX(const uint8_t* src_yuy2,
+                         uint8_t* dst_u,
+                         uint8_t* dst_v,
+                         int width);
 void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
                         uint8_t* dst_u,
                         uint8_t* dst_v,
@@ -4425,11 +4606,17 @@ void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr,
                              int width);
 void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void YUY2ToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr,
                          int src_stride_ptr,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
+void YUY2ToUVRow_Any_LASX(const uint8_t* src_ptr,
+                          int src_stride_ptr,
+                          uint8_t* dst_u,
+                          uint8_t* dst_v,
+                          int width);
 void YUY2ToUVRow_Any_MMI(const uint8_t* src_ptr,
                          int src_stride_ptr,
                          uint8_t* dst_u,
@@ -4439,6 +4626,10 @@ void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr,
                             uint8_t* dst_u,
                             uint8_t* dst_v,
                             int width);
+void YUY2ToUV422Row_Any_LASX(const uint8_t* src_ptr,
+                             uint8_t* dst_u,
+                             uint8_t* dst_v,
+                             int width);
 void YUY2ToUV422Row_Any_MMI(const uint8_t* src_ptr,
                             uint8_t* dst_u,
                             uint8_t* dst_v,
@@ -4485,11 +4676,17 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
                          int width);
 void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
 void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToYRow_LASX(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
 void UYVYToUVRow_MSA(const uint8_t* src_uyvy,
                      int src_stride_uyvy,
                      uint8_t* dst_u,
                      uint8_t* dst_v,
                      int width);
+void UYVYToUVRow_LASX(const uint8_t* src_uyvy,
+                      int src_stride_uyvy,
+                      uint8_t* dst_u,
+                      uint8_t* dst_v,
+                      int width);
 void UYVYToUVRow_MMI(const uint8_t* src_uyvy,
                      int src_stride_uyvy,
                      uint8_t* dst_u,
@@ -4499,6 +4696,10 @@ void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
                         uint8_t* dst_u,
                         uint8_t* dst_v,
                         int width);
+void UYVYToUV422Row_LASX(const uint8_t* src_uyvy,
+                         uint8_t* dst_u,
+                         uint8_t* dst_v,
+                         int width);
 void UYVYToUV422Row_MMI(const uint8_t* src_uyvy,
                         uint8_t* dst_u,
                         uint8_t* dst_v,
@@ -4546,11 +4747,17 @@ void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr,
                              int width);
 void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void UYVYToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void UYVYToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr,
                          int src_stride_ptr,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
+void UYVYToUVRow_Any_LASX(const uint8_t* src_ptr,
+                          int src_stride_ptr,
+                          uint8_t* dst_u,
+                          uint8_t* dst_v,
+                          int width);
 void UYVYToUVRow_Any_MMI(const uint8_t* src_ptr,
                          int src_stride_ptr,
                          uint8_t* dst_u,
@@ -4560,6 +4767,10 @@ void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr,
                             uint8_t* dst_u,
                             uint8_t* dst_v,
                             int width);
+void UYVYToUV422Row_Any_LASX(const uint8_t* src_ptr,
+                             uint8_t* dst_u,
+                             uint8_t* dst_v,
+                             int width);
 void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr,
                             uint8_t* dst_u,
                             uint8_t* dst_v,
@@ -4679,6 +4890,11 @@ void I422ToYUY2Row_MMI(const uint8_t* src_y,
                        const uint8_t* src_v,
                        uint8_t* dst_yuy2,
                        int width);
+void I422ToYUY2Row_LASX(const uint8_t* src_y,
+                        const uint8_t* src_u,
+                        const uint8_t* src_v,
+                        uint8_t* dst_yuy2,
+                        int width);
 void I422ToUYVYRow_MSA(const uint8_t* src_y,
                        const uint8_t* src_u,
                        const uint8_t* src_v,
@@ -4689,6 +4905,11 @@ void I422ToUYVYRow_MMI(const uint8_t* src_y,
                        const uint8_t* src_v,
                        uint8_t* dst_uyvy,
                        int width);
+void I422ToUYVYRow_LASX(const uint8_t* src_y,
+                        const uint8_t* src_u,
+                        const uint8_t* src_v,
+                        uint8_t* dst_uyvy,
+                        int width);
 void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf,
                            const uint8_t* u_buf,
                            const uint8_t* v_buf,
@@ -4699,6 +4920,11 @@ void I422ToYUY2Row_Any_MMI(const uint8_t* y_buf,
                            const uint8_t* v_buf,
                            uint8_t* dst_ptr,
                            int width);
+void I422ToYUY2Row_Any_LASX(const uint8_t* y_buf,
+                            const uint8_t* u_buf,
+                            const uint8_t* v_buf,
+                            uint8_t* dst_ptr,
+                            int width);
 void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf,
                            const uint8_t* u_buf,
                            const uint8_t* v_buf,
@@ -4709,6 +4935,11 @@ void I422ToUYVYRow_Any_MMI(const uint8_t* y_buf,
                            const uint8_t* v_buf,
                            uint8_t* dst_ptr,
                            int width);
+void I422ToUYVYRow_Any_LASX(const uint8_t* y_buf,
+                            const uint8_t* u_buf,
+                            const uint8_t* v_buf,
+                            uint8_t* dst_ptr,
+                            int width);
 
 // Effects related row functions.
 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
@@ -4727,6 +4958,9 @@ void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
 void ARGBAttenuateRow_MMI(const uint8_t* src_argb,
                           uint8_t* dst_argb,
                           int width);
+void ARGBAttenuateRow_LASX(const uint8_t* src_argb,
+                           uint8_t* dst_argb,
+                           int width);
 void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr,
                                 uint8_t* dst_ptr,
                                 int width);
@@ -4742,6 +4976,9 @@ void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr,
 void ARGBAttenuateRow_Any_MMI(const uint8_t* src_ptr,
                               uint8_t* dst_ptr,
                               int width);
+void ARGBAttenuateRow_Any_LASX(const uint8_t* src_ptr,
+                               uint8_t* dst_ptr,
+                               int width);
 
 // Inverse table for unattenuate, shared by C and SSE2.
 extern const uint32_t fixed_invtbl8[256];
@@ -4766,12 +5003,14 @@ void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width);
 void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
 void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width);
 void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+void ARGBGrayRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width);
 
 void ARGBSepiaRow_C(uint8_t* dst_argb, int width);
 void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width);
 void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width);
 void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width);
 void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width);
+void ARGBSepiaRow_LASX(uint8_t* dst_argb, int width);
 
 void ARGBColorMatrixRow_C(const uint8_t* src_argb,
                           uint8_t* dst_argb,
@@ -4849,6 +5088,10 @@ void ARGBShadeRow_MMI(const uint8_t* src_argb,
                       uint8_t* dst_argb,
                       int width,
                       uint32_t value);
+void ARGBShadeRow_LASX(const uint8_t* src_argb,
+                       uint8_t* dst_argb,
+                       int width,
+                       uint32_t value);
 
 // Used for blur.
 void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
diff --git a/source/convert.cc b/source/convert.cc
index c070bf81..1e524de3 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -1095,6 +1095,16 @@ int YUY2ToI420(const uint8_t* src_yuy2,
     }
   }
 #endif
+#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUVROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    YUY2ToYRow = YUY2ToYRow_Any_LASX;
+    YUY2ToUVRow = YUY2ToUVRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      YUY2ToYRow = YUY2ToYRow_LASX;
+      YUY2ToUVRow = YUY2ToUVRow_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height - 1; y += 2) {
     YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width);
@@ -1186,6 +1196,16 @@ int UYVYToI420(const uint8_t* src_uyvy,
     }
   }
 #endif
+#if defined(HAS_UYVYTOYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    UYVYToYRow = UYVYToYRow_Any_LASX;
+    UYVYToUVRow = UYVYToUVRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      UYVYToYRow = UYVYToYRow_LASX;
+      UYVYToUVRow = UYVYToUVRow_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height - 1; y += 2) {
     UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width);
@@ -1440,6 +1460,16 @@ int ARGBToI420(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToYRow = ARGBToYRow_Any_LASX;
+    ARGBToUVRow = ARGBToUVRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_LASX;
+      ARGBToUVRow = ARGBToUVRow_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height - 1; y += 2) {
     ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
@@ -2924,6 +2954,16 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToYRow = ARGBToYRow_Any_LASX;
+    ARGBToUVRow = ARGBToUVRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_LASX;
+      ARGBToUVRow = ARGBToUVRow_LASX;
+    }
+  }
+#endif
 #endif
 
   {
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index 21367c9b..7128e9f9 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -4668,6 +4668,14 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_I422TORGB24ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I422ToRGB24Row = I422ToRGB24Row_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      I422ToRGB24Row = I422ToRGB24Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
@@ -4856,6 +4864,14 @@ int I420ToARGB1555(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_I422TOARGB1555ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I422ToARGB1555Row = I422ToARGB1555Row_Any_LASX;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGB1555Row = I422ToARGB1555Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants,
@@ -4937,6 +4953,14 @@ int I420ToARGB4444(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_I422TOARGB4444ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I422ToARGB4444Row = I422ToARGB4444Row_Any_LASX;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGB4444Row = I422ToARGB4444Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants,
@@ -5018,6 +5042,14 @@ int I420ToRGB565Matrix(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_I422TORGB565ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I422ToRGB565Row = I422ToRGB565Row_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      I422ToRGB565Row = I422ToRGB565Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, yuvconstants, width);
@@ -5140,6 +5172,14 @@ int I422ToRGB565(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_I422TORGB565ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I422ToRGB565Row = I422ToRGB565Row_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      I422ToRGB565Row = I422ToRGB565Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width);
@@ -5285,6 +5325,14 @@ int I420ToRGB565Dither(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_ARGBTORGB565DITHERROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LASX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_LASX;
+    }
+  }
+#endif
   {
     // Allocate a row of argb.
     align_buffer_64(row_argb, width * 4);
diff --git a/source/convert_from.cc b/source/convert_from.cc
index 62a13d04..41a3c17a 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -452,6 +452,14 @@ int I420ToYUY2(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_I422TOYUY2ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I422ToYUY2Row = I422ToYUY2Row_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      I422ToYUY2Row = I422ToYUY2Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height - 1; y += 2) {
     I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
@@ -539,6 +547,14 @@ int I422ToUYVY(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_I422TOUYVYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      I422ToUYVYRow = I422ToUYVYRow_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
@@ -614,6 +630,14 @@ int I420ToUYVY(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_I422TOUYVYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      I422ToUYVYRow = I422ToUYVYRow_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height - 1; y += 2) {
     I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index 55c9ee61..6d147975 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -84,6 +84,14 @@ int ARGBToI444(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUV444ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToUV444Row = ARGBToUV444Row_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToUV444Row = ARGBToUV444Row_LASX;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
     ARGBToYRow = ARGBToYRow_Any_SSSE3;
@@ -124,6 +132,14 @@ int ARGBToI444(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToYRow = ARGBToYRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBToUV444Row(src_argb, dst_u, dst_v, width);
@@ -245,6 +261,17 @@ int ARGBToI422(const uint8_t* src_argb,
   }
 #endif
 
+#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToYRow = ARGBToYRow_Any_LASX;
+    ARGBToUVRow = ARGBToUVRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_LASX;
+      ARGBToUVRow = ARGBToUVRow_LASX;
+    }
+  }
+#endif
+
   for (y = 0; y < height; ++y) {
     ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
     ARGBToYRow(src_argb, dst_y, width);
@@ -355,6 +382,16 @@ int ARGBToNV12(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToYRow = ARGBToYRow_Any_LASX;
+    ARGBToUVRow = ARGBToUVRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_LASX;
+      ARGBToUVRow = ARGBToUVRow_LASX;
+    }
+  }
+#endif
 #if defined(HAS_MERGEUVROW_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
     MergeUVRow_ = MergeUVRow_Any_SSE2;
@@ -519,6 +556,16 @@ int ARGBToNV21(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToYRow = ARGBToYRow_Any_LASX;
+    ARGBToUVRow = ARGBToUVRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_LASX;
+      ARGBToUVRow = ARGBToUVRow_LASX;
+    }
+  }
+#endif
 #if defined(HAS_MERGEUVROW_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
     MergeUVRow_ = MergeUVRow_Any_SSE2;
@@ -1015,6 +1062,16 @@ int ARGBToYUY2(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToYRow = ARGBToYRow_Any_LASX;
+    ARGBToUVRow = ARGBToUVRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_LASX;
+      ARGBToUVRow = ARGBToUVRow_LASX;
+    }
+  }
+#endif
 #if defined(HAS_I422TOYUY2ROW_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
     I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
@@ -1055,6 +1112,14 @@ int ARGBToYUY2(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_I422TOYUY2ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I422ToYUY2Row = I422ToYUY2Row_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      I422ToYUY2Row = I422ToYUY2Row_LASX;
+    }
+  }
+#endif
 
   {
     // Allocate a rows of yuv.
@@ -1180,6 +1245,16 @@ int ARGBToUYVY(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToYRow = ARGBToYRow_Any_LASX;
+    ARGBToUVRow = ARGBToUVRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_LASX;
+      ARGBToUVRow = ARGBToUVRow_LASX;
+    }
+  }
+#endif
 #if defined(HAS_I422TOUYVYROW_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
     I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
@@ -1220,6 +1295,14 @@ int ARGBToUYVY(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_I422TOUYVYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      I422ToUYVYRow = I422ToUYVYRow_LASX;
+    }
+  }
+#endif
 
   {
     // Allocate a rows of yuv.
@@ -1305,6 +1388,14 @@ int ARGBToI400(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToYRow = ARGBToYRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBToYRow(src_argb, dst_y, width);
@@ -1403,6 +1494,14 @@ int ARGBToRGB24(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTORGB24ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToRGB24Row = ARGBToRGB24Row_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToRGB24Row = ARGBToRGB24Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBToRGB24Row(src_argb, dst_rgb24, width);
@@ -1477,6 +1576,14 @@ int ARGBToRAW(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTORAWROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToRAWRow = ARGBToRAWRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToRAWRow = ARGBToRAWRow_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBToRAWRow(src_argb, dst_raw, width);
@@ -1555,6 +1662,14 @@ int ARGBToRGB565Dither(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTORGB565DITHERROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LASX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBToRGB565DitherRow(src_argb, dst_rgb565,
@@ -1632,6 +1747,14 @@ int ARGBToRGB565(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTORGB565ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToRGB565Row = ARGBToRGB565Row_Any_LASX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToRGB565Row = ARGBToRGB565Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBToRGB565Row(src_argb, dst_rgb565, width);
@@ -1706,6 +1829,14 @@ int ARGBToARGB1555(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOARGB1555ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToARGB1555Row = ARGBToARGB1555Row_Any_LASX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToARGB1555Row = ARGBToARGB1555Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBToARGB1555Row(src_argb, dst_argb1555, width);
@@ -1780,6 +1911,14 @@ int ARGBToARGB4444(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOARGB4444ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToARGB4444Row = ARGBToARGB4444Row_Any_LASX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToARGB4444Row = ARGBToARGB4444Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBToARGB4444Row(src_argb, dst_argb4444, width);
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 7cea06c8..af555338 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -1728,6 +1728,16 @@ int YUY2ToI422(const uint8_t* src_yuy2,
     }
   }
 #endif
+#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    YUY2ToYRow = YUY2ToYRow_Any_LASX;
+    YUY2ToUV422Row = YUY2ToUV422Row_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      YUY2ToYRow = YUY2ToYRow_LASX;
+      YUY2ToUV422Row = YUY2ToUV422Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
@@ -1824,6 +1834,16 @@ int UYVYToI422(const uint8_t* src_uyvy,
     }
   }
 #endif
+#if defined(HAS_UYVYTOYROW_LASX) && defined(HAS_UYVYTOUV422ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    UYVYToYRow = UYVYToYRow_Any_LASX;
+    UYVYToUV422Row = UYVYToUV422Row_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      UYVYToYRow = UYVYToYRow_LASX;
+      UYVYToUV422Row = UYVYToUV422Row_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
@@ -1968,6 +1988,14 @@ void MirrorPlane(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_MIRRORROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    MirrorRow = MirrorRow_Any_LASX;
+    if (IS_ALIGNED(width, 64)) {
+      MirrorRow = MirrorRow_LASX;
+    }
+  }
+#endif
 
   // Mirror plane
   for (y = 0; y < height; ++y) {
@@ -2026,6 +2054,14 @@ void MirrorUVPlane(const uint8_t* src_uv,
     }
   }
 #endif
+#if defined(HAS_MIRRORUVROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    MirrorUVRow = MirrorUVRow_Any_LASX;
+    if (IS_ALIGNED(width, 16)) {
+      MirrorUVRow = MirrorUVRow_LASX;
+    }
+  }
+#endif
 
   // MirrorUV plane
   for (y = 0; y < height; ++y) {
@@ -2194,6 +2230,14 @@ int ARGBMirror(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBMIRRORROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBMirrorRow = ARGBMirrorRow_LASX;
+    }
+  }
+#endif
 
   // Mirror plane
   for (y = 0; y < height; ++y) {
@@ -2602,6 +2646,14 @@ int ARGBMultiply(const uint8_t* src_argb0,
     }
   }
 #endif
+#if defined(HAS_ARGBMULTIPLYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBMultiplyRow = ARGBMultiplyRow_Any_LASX;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBMultiplyRow = ARGBMultiplyRow_LASX;
+    }
+  }
+#endif
 
   // Multiply plane
   for (y = 0; y < height; ++y) {
@@ -2687,6 +2739,14 @@ int ARGBAdd(const uint8_t* src_argb0,
     }
   }
 #endif
+#if defined(HAS_ARGBADDROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBAddRow = ARGBAddRow_Any_LASX;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAddRow = ARGBAddRow_LASX;
+    }
+  }
+#endif
 
   // Add plane
   for (y = 0; y < height; ++y) {
@@ -2767,6 +2827,14 @@ int ARGBSubtract(const uint8_t* src_argb0,
     }
   }
 #endif
+#if defined(HAS_ARGBSUBTRACTROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBSubtractRow = ARGBSubtractRow_Any_LASX;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBSubtractRow = ARGBSubtractRow_LASX;
+    }
+  }
+#endif
 
   // Subtract plane
   for (y = 0; y < height; ++y) {
@@ -3073,6 +3141,14 @@ int ARGBAttenuate(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBATTENUATEROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_LASX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBAttenuateRow(src_argb, dst_argb, width);
@@ -3178,6 +3254,11 @@ int ARGBGrayTo(const uint8_t* src_argb,
     ARGBGrayRow = ARGBGrayRow_MSA;
   }
 #endif
+#if defined(HAS_ARGBGRAYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
+    ARGBGrayRow = ARGBGrayRow_LASX;
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBGrayRow(src_argb, dst_argb, width);
@@ -3228,6 +3309,11 @@ int ARGBGray(uint8_t* dst_argb,
     ARGBGrayRow = ARGBGrayRow_MSA;
   }
 #endif
+#if defined(HAS_ARGBGRAYROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
+    ARGBGrayRow = ARGBGrayRow_LASX;
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBGrayRow(dst, dst, width);
@@ -3276,6 +3362,11 @@ int ARGBSepia(uint8_t* dst_argb,
     ARGBSepiaRow = ARGBSepiaRow_MSA;
   }
 #endif
+#if defined(HAS_ARGBSEPIAROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
+    ARGBSepiaRow = ARGBSepiaRow_LASX;
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBSepiaRow(dst, width);
@@ -3706,6 +3797,11 @@ int ARGBShade(const uint8_t* src_argb,
     ARGBShadeRow = ARGBShadeRow_MSA;
   }
 #endif
+#if defined(HAS_ARGBSHADEROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 8)) {
+    ARGBShadeRow = ARGBShadeRow_LASX;
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBShadeRow(src_argb, dst_argb, width, value);
@@ -3916,6 +4012,14 @@ int ARGBShuffle(const uint8_t* src_bgra,
     }
   }
 #endif
+#if defined(HAS_ARGBSHUFFLEROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBShuffleRow = ARGBShuffleRow_Any_LASX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBShuffleRow = ARGBShuffleRow_LASX;
+    }
+  }
+#endif
 
   for (y = 0; y < height; ++y) {
     ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc
index ae653886..4d36a910 100644
--- a/source/rotate_argb.cc
+++ b/source/rotate_argb.cc
@@ -163,6 +163,14 @@ static int ARGBRotate180(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBMIRRORROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBMirrorRow = ARGBMirrorRow_LASX;
+    }
+  }
+#endif
 #if defined(HAS_COPYROW_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
     CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
diff --git a/source/row_any.cc b/source/row_any.cc
index 7d24b15c..b1b5f8a9 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -297,6 +297,9 @@ ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31)
 #ifdef HAS_I422TOYUY2ROW_MMI
 ANY31(I422ToYUY2Row_Any_MMI, I422ToYUY2Row_MMI, 1, 1, 4, 7)
 #endif
+#ifdef HAS_I422TOYUY2ROW_LASX
+ANY31(I422ToYUY2Row_Any_LASX, I422ToYUY2Row_LASX, 1, 1, 4, 31)
+#endif
 #ifdef HAS_I422TOUYVYROW_NEON
 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
 #endif
@@ -306,6 +309,9 @@ ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31)
 #ifdef HAS_I422TOUYVYROW_MMI
 ANY31(I422ToUYVYRow_Any_MMI, I422ToUYVYRow_MMI, 1, 1, 4, 7)
 #endif
+#ifdef HAS_I422TOUYVYROW_LASX
+ANY31(I422ToUYVYRow_Any_LASX, I422ToUYVYRow_LASX, 1, 1, 4, 31)
+#endif
 #ifdef HAS_BLENDPLANEROW_AVX2
 ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
 #endif
@@ -425,6 +431,10 @@ ANY31C(I422ToRGBARow_Any_MMI, I422ToRGBARow_MMI, 1, 0, 4, 7)
 #ifdef HAS_I422TOARGBROW_LASX
 ANY31C(I422ToARGBRow_Any_LASX, I422ToARGBRow_LASX, 1, 0, 4, 31)
 ANY31C(I422ToRGBARow_Any_LASX, I422ToRGBARow_LASX, 1, 0, 4, 31)
+ANY31C(I422ToRGB24Row_Any_LASX, I422ToRGB24Row_LASX, 1, 0, 3, 31)
+ANY31C(I422ToRGB565Row_Any_LASX, I422ToRGB565Row_LASX, 1, 0, 2, 31)
+ANY31C(I422ToARGB4444Row_Any_LASX, I422ToARGB4444Row_LASX, 1, 0, 2, 31)
+ANY31C(I422ToARGB1555Row_Any_LASX, I422ToARGB1555Row_LASX, 1, 0, 2, 31)
 #endif
 #undef ANY31C
 
@@ -631,18 +641,27 @@ ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3)
 #ifdef HAS_ARGBMULTIPLYROW_MMI
 ANY21(ARGBMultiplyRow_Any_MMI, ARGBMultiplyRow_MMI, 0, 4, 4, 4, 1)
 #endif
+#ifdef HAS_ARGBMULTIPLYROW_LASX
+ANY21(ARGBMultiplyRow_Any_LASX, ARGBMultiplyRow_LASX, 0, 4, 4, 4, 7)
+#endif
 #ifdef HAS_ARGBADDROW_MSA
 ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7)
 #endif
 #ifdef HAS_ARGBADDROW_MMI
 ANY21(ARGBAddRow_Any_MMI, ARGBAddRow_MMI, 0, 4, 4, 4, 1)
 #endif
+#ifdef HAS_ARGBADDROW_LASX
+ANY21(ARGBAddRow_Any_LASX, ARGBAddRow_LASX, 0, 4, 4, 4, 7)
+#endif
 #ifdef HAS_ARGBSUBTRACTROW_MSA
 ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7)
 #endif
 #ifdef HAS_ARGBSUBTRACTROW_MMI
 ANY21(ARGBSubtractRow_Any_MMI, ARGBSubtractRow_MMI, 0, 4, 4, 4, 1)
 #endif
+#ifdef HAS_ARGBSUBTRACTROW_LASX
+ANY21(ARGBSubtractRow_Any_LASX, ARGBSubtractRow_LASX, 0, 4, 4, 4, 7)
+#endif
 #ifdef HAS_SOBELROW_SSE2
 ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
 #endif
@@ -953,6 +972,13 @@ ANY11(ARGBToARGB1555Row_Any_MMI, ARGBToARGB1555Row_MMI, 0, 4, 2, 3)
 ANY11(ARGBToARGB4444Row_Any_MMI, ARGBToARGB4444Row_MMI, 0, 4, 2, 3)
 ANY11(J400ToARGBRow_Any_MMI, J400ToARGBRow_MMI, 0, 1, 4, 3)
 #endif
+#if defined(HAS_ARGBTORGB24ROW_LASX)
+ANY11(ARGBToRGB24Row_Any_LASX, ARGBToRGB24Row_LASX, 0, 4, 3, 31)
+ANY11(ARGBToRAWRow_Any_LASX, ARGBToRAWRow_LASX, 0, 4, 3, 31)
+ANY11(ARGBToRGB565Row_Any_LASX, ARGBToRGB565Row_LASX, 0, 4, 2, 15)
+ANY11(ARGBToARGB1555Row_Any_LASX, ARGBToARGB1555Row_LASX, 0, 4, 2, 15)
+ANY11(ARGBToARGB4444Row_Any_LASX, ARGBToARGB4444Row_LASX, 0, 4, 2, 15)
+#endif
 #if defined(HAS_RAWTORGB24ROW_NEON)
 ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
 #endif
@@ -1007,6 +1033,9 @@ ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
 #ifdef HAS_ARGBTOYROW_MMI
 ANY11(ARGBToYRow_Any_MMI, ARGBToYRow_MMI, 0, 4, 1, 7)
 #endif
+#ifdef HAS_ARGBTOYROW_LASX
+ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31)
+#endif
 #ifdef HAS_ARGBTOYJROW_NEON
 ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7)
 #endif
@@ -1115,12 +1144,18 @@ ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15)
 #ifdef HAS_YUY2TOYROW_MSA
 ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31)
 #endif
+#ifdef HAS_YUY2TOYROW_LASX
+ANY11(YUY2ToYRow_Any_LASX, YUY2ToYRow_LASX, 1, 4, 1, 31)
+#endif
 #ifdef HAS_YUY2TOYROW_MMI
 ANY11(YUY2ToYRow_Any_MMI, YUY2ToYRow_MMI, 1, 4, 1, 7)
 #endif
 #ifdef HAS_UYVYTOYROW_MSA
 ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
 #endif
+#ifdef HAS_UYVYTOYROW_LASX
+ANY11(UYVYToYRow_Any_LASX, UYVYToYRow_LASX, 1, 4, 1, 31)
+#endif
 #ifdef HAS_UYVYTOYROW_MMI
 ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15)
 #endif
@@ -1205,6 +1240,9 @@ ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7)
 #ifdef HAS_ARGBATTENUATEROW_MMI
 ANY11(ARGBAttenuateRow_Any_MMI, ARGBAttenuateRow_MMI, 0, 4, 4, 1)
 #endif
+#ifdef HAS_ARGBATTENUATEROW_LASX
+ANY11(ARGBAttenuateRow_Any_LASX, ARGBAttenuateRow_LASX, 0, 4, 4, 15)
+#endif
 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
 ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
 #endif
@@ -1354,6 +1392,14 @@ ANY11P(ARGBToRGB565DitherRow_Any_MMI,
        2,
        3)
 #endif
+#if defined(HAS_ARGBTORGB565DITHERROW_LASX)
+ANY11P(ARGBToRGB565DitherRow_Any_LASX,
+       ARGBToRGB565DitherRow_LASX,
+       const uint32_t,
+       4,
+       2,
+       15)
+#endif
 #ifdef HAS_ARGBSHUFFLEROW_SSSE3
 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7)
 #endif
@@ -1369,6 +1415,9 @@ ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7)
 #ifdef HAS_ARGBSHUFFLEROW_MMI
 ANY11P(ARGBShuffleRow_Any_MMI, ARGBShuffleRow_MMI, const uint8_t*, 4, 4, 1)
 #endif
+#ifdef HAS_ARGBSHUFFLEROW_LASX
+ANY11P(ARGBShuffleRow_Any_LASX, ARGBShuffleRow_LASX, const uint8_t*, 4, 4, 15)
+#endif
 #undef ANY11P
 #undef ANY11P
 
@@ -1667,6 +1716,9 @@ ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
 #ifdef HAS_MIRRORROW_MMI
 ANY11M(MirrorRow_Any_MMI, MirrorRow_MMI, 1, 7)
 #endif
+#ifdef HAS_MIRRORROW_LASX
+ANY11M(MirrorRow_Any_LASX, MirrorRow_LASX, 1, 63)
+#endif
 #ifdef HAS_MIRRORUVROW_AVX2
 ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15)
 #endif
@@ -1679,6 +1731,9 @@ ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31)
 #ifdef HAS_MIRRORUVROW_MSA
 ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7)
 #endif
+#ifdef HAS_MIRRORUVROW_LASX
+ANY11M(MirrorUVRow_Any_LASX, MirrorUVRow_LASX, 2, 15)
+#endif
 #ifdef HAS_ARGBMIRRORROW_AVX2
 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
 #endif
@@ -1691,6 +1746,9 @@ ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7)
 #ifdef HAS_ARGBMIRRORROW_MSA
 ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
 #endif
+#ifdef HAS_ARGBMIRRORROW_LASX
+ANY11M(ARGBMirrorRow_Any_LASX, ARGBMirrorRow_LASX, 4, 15)
+#endif
 #ifdef HAS_ARGBMIRRORROW_MMI
 ANY11M(ARGBMirrorRow_Any_MMI, ARGBMirrorRow_MMI, 4, 1)
 #endif
@@ -1791,6 +1849,11 @@ ANY12(ARGBToUV444Row_Any_MMI, ARGBToUV444Row_MMI, 0, 4, 0, 7)
 ANY12(UYVYToUV422Row_Any_MMI, UYVYToUV422Row_MMI, 1, 4, 1, 15)
 ANY12(YUY2ToUV422Row_Any_MMI, YUY2ToUV422Row_MMI, 1, 4, 1, 15)
 #endif
+#ifdef HAS_YUY2TOUV422ROW_LASX
+ANY12(ARGBToUV444Row_Any_LASX, ARGBToUV444Row_LASX, 0, 4, 0, 31)
+ANY12(YUY2ToUV422Row_Any_LASX, YUY2ToUV422Row_LASX, 1, 4, 1, 31)
+ANY12(UYVYToUV422Row_Any_LASX, UYVYToUV422Row_LASX, 1, 4, 1, 31)
+#endif
 #undef ANY12
 
 // Any 2 16 bit planes with parameter to 1
@@ -1951,6 +2014,9 @@ ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31)
 #ifdef HAS_ARGBTOUVROW_MMI
 ANY12S(ARGBToUVRow_Any_MMI, ARGBToUVRow_MMI, 0, 4, 15)
 #endif
+#ifdef HAS_ARGBTOUVROW_LASX
+ANY12S(ARGBToUVRow_Any_LASX, ARGBToUVRow_LASX, 0, 4, 31)
+#endif
 #ifdef HAS_ARGBTOUVJROW_NEON
 ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
 #endif
@@ -2047,9 +2113,15 @@ ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
 #ifdef HAS_YUY2TOUVROW_MMI
 ANY12S(YUY2ToUVRow_Any_MMI, YUY2ToUVRow_MMI, 1, 4, 15)
 #endif
+#ifdef HAS_YUY2TOUVROW_LASX
+ANY12S(YUY2ToUVRow_Any_LASX, YUY2ToUVRow_LASX, 1, 4, 31)
+#endif
 #ifdef HAS_UYVYTOUVROW_MSA
 ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31)
 #endif
+#ifdef HAS_UYVYTOUVROW_LASX
+ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31)
+#endif
 #ifdef HAS_UYVYTOUVROW_MMI
 ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15)
 #endif
diff --git a/source/row_lasx.cc b/source/row_lasx.cc
index 0e7b38a1..b9c7cc16 100644
--- a/source/row_lasx.cc
+++ b/source/row_lasx.cc
@@ -197,6 +197,125 @@ extern "C" {
     pdst_argb += 64;                                         \
   }
 
+void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width) {
+  int x;
+  int len = width / 64;
+  __m256i src0, src1;
+  __m256i shuffler = {0x08090A0B0C0D0E0F, 0x0001020304050607,
+                      0x08090A0B0C0D0E0F, 0x0001020304050607};
+  src += width - 64;
+  for (x = 0; x < len; x++) {
+      DUP2_ARG2(__lasx_xvld, src, 0, src, 32, src0, src1);
+      DUP2_ARG3(__lasx_xvshuf_b, src0, src0, shuffler,
+                src1, src1, shuffler, src0, src1);
+      src0 = __lasx_xvpermi_q(src0, src0, 0x01);
+      src1 = __lasx_xvpermi_q(src1, src1, 0x01);
+      __lasx_xvst(src1, dst, 0);
+      __lasx_xvst(src0, dst, 32);
+      dst += 64;
+      src -= 64;
+  }
+}
+
+void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
+  int x;
+  int len = width / 16;
+  __m256i src, dst;
+  __m256i shuffler = {0x0004000500060007, 0x0000000100020003,
+                      0x0004000500060007, 0x0000000100020003};
+
+  src_uv += (width - 16) << 1;
+  for (x = 0; x < len; x++) {
+      src = __lasx_xvld(src_uv, 0);
+      dst = __lasx_xvshuf_h(shuffler, src, src);
+      dst = __lasx_xvpermi_q(dst, dst, 0x01);
+      __lasx_xvst(dst, dst_uv, 0);
+      src_uv -= 32;
+      dst_uv += 32;
+  }
+}
+
+void ARGBMirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width) {
+  int x;
+  int len = width / 16;
+  __m256i src0, src1;
+  __m256i dst0, dst1;
+  __m256i shuffler = {0x0B0A09080F0E0D0C, 0x0302010007060504,
+                      0x0B0A09080F0E0D0C, 0x0302010007060504};
+  src += (width * 4) - 64;
+  for (x = 0; x < len; x++) {
+      DUP2_ARG2(__lasx_xvld, src, 0, src, 32, src0, src1);
+      DUP2_ARG3(__lasx_xvshuf_b, src0, src0, shuffler,
+                src1, src1, shuffler, src0, src1);
+      dst1 = __lasx_xvpermi_q(src0, src0, 0x01);
+      dst0 = __lasx_xvpermi_q(src1, src1, 0x01);
+      __lasx_xvst(dst0, dst, 0);
+      __lasx_xvst(dst1, dst, 32);
+      dst += 64;
+      src -= 64;
+  }
+}
+
+void I422ToYUY2Row_LASX(const uint8_t* src_y,
+                        const uint8_t* src_u,
+                        const uint8_t* src_v,
+                        uint8_t* dst_yuy2,
+                        int width) {
+  int x;
+  int len = width / 32;
+  __m256i src_u0, src_v0, src_y0, vec_uv0;
+  __m256i vec_yuy2_0, vec_yuy2_1;
+  __m256i dst_yuy2_0, dst_yuy2_1;
+
+  for (x = 0; x < len; x++) {
+      DUP2_ARG2(__lasx_xvld, src_u, 0, src_v, 0, src_u0, src_v0);
+      src_y0 = __lasx_xvld(src_y, 0);
+      src_u0 = __lasx_xvpermi_d(src_u0, 0xD8);
+      src_v0 = __lasx_xvpermi_d(src_v0, 0xD8);
+      vec_uv0 = __lasx_xvilvl_b(src_v0, src_u0);
+      vec_yuy2_0 = __lasx_xvilvl_b(vec_uv0, src_y0);
+      vec_yuy2_1 = __lasx_xvilvh_b(vec_uv0, src_y0);
+      dst_yuy2_0 = __lasx_xvpermi_q(vec_yuy2_1, vec_yuy2_0, 0x20);
+      dst_yuy2_1 = __lasx_xvpermi_q(vec_yuy2_1, vec_yuy2_0, 0x31);
+      __lasx_xvst(dst_yuy2_0, dst_yuy2, 0);
+      __lasx_xvst(dst_yuy2_1, dst_yuy2, 32);
+      src_u += 16;
+      src_v += 16;
+      src_y += 32;
+      dst_yuy2 += 64;
+  }
+}
+
+void I422ToUYVYRow_LASX(const uint8_t* src_y,
+                        const uint8_t* src_u,
+                        const uint8_t* src_v,
+                        uint8_t* dst_uyvy,
+                        int width) {
+  int x;
+  int len = width / 32;
+  __m256i src_u0, src_v0, src_y0, vec_uv0;
+  __m256i vec_uyvy0, vec_uyvy1;
+  __m256i dst_uyvy0, dst_uyvy1;
+
+  for (x = 0; x < len; x++) {
+      DUP2_ARG2(__lasx_xvld, src_u, 0, src_v, 0, src_u0, src_v0);
+      src_y0 = __lasx_xvld(src_y, 0);
+      src_u0 = __lasx_xvpermi_d(src_u0, 0xD8);
+      src_v0 = __lasx_xvpermi_d(src_v0, 0xD8);
+      vec_uv0 = __lasx_xvilvl_b(src_v0, src_u0);
+      vec_uyvy0 = __lasx_xvilvl_b(src_y0, vec_uv0);
+      vec_uyvy1 = __lasx_xvilvh_b(src_y0, vec_uv0);
+      dst_uyvy0 = __lasx_xvpermi_q(vec_uyvy1, vec_uyvy0, 0x20);
+      dst_uyvy1 = __lasx_xvpermi_q(vec_uyvy1, vec_uyvy0, 0x31);
+      __lasx_xvst(dst_uyvy0, dst_uyvy, 0);
+      __lasx_xvst(dst_uyvy1, dst_uyvy, 32);
+      src_u += 16;
+      src_v += 16;
+      src_y += 32;
+      dst_uyvy +=64;
+  }
+}
+
 void I422ToARGBRow_LASX(const uint8_t* src_y,
                         const uint8_t* src_u,
                         const uint8_t* src_v,
@@ -295,6 +414,929 @@ void I422AlphaToARGBRow_LASX(const uint8_t* src_y,
   }
 }
 
+void I422ToRGB24Row_LASX(const uint8_t* src_y,
+                         const uint8_t* src_u,
+                         const uint8_t* src_v,
+                         uint8_t* dst_argb,
+                         const struct YuvConstants* yuvconstants,
+                         int32_t width) {
+  int x;
+  int len = width / 32;
+  __m256i vec_yb, vec_yg;
+  __m256i vec_ubvr, vec_ugvg;
+  __m256i const_0x80 = __lasx_xvldi(0x80);
+  __m256i shuffler0 = {0x0504120302100100, 0x0A18090816070614,
+                       0x0504120302100100, 0x0A18090816070614};
+  __m256i shuffler1 = {0x1E0F0E1C0D0C1A0B, 0x1E0F0E1C0D0C1A0B,
+                       0x1E0F0E1C0D0C1A0B, 0x1E0F0E1C0D0C1A0B};
+
+  YUVTORGB_SETUP(yuvconstants, vec_ubvr, vec_ugvg, vec_yg, vec_yb);
+
+  for (x = 0; x < len; x++) {
+    __m256i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+    __m256i temp0, temp1, temp2, temp3;
+
+    READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+    YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg,
+                vec_yb, b_l, b_h, g_l, g_h, r_l, r_h);
+    temp0 = __lasx_xvpackev_b(g_l, b_l);
+    temp1 = __lasx_xvpackev_b(g_h, b_h);
+    DUP4_ARG3(__lasx_xvshuf_b, r_l, temp0, shuffler1, r_h, temp1, shuffler1,
+              r_l, temp0, shuffler0, r_h, temp1, shuffler0, temp2, temp3, temp0, temp1);
+
+    b_l = __lasx_xvilvl_d(temp1, temp2);
+    b_h = __lasx_xvilvh_d(temp3, temp1);
+    temp1 = __lasx_xvpermi_q(b_l, temp0, 0x20);
+    temp2 = __lasx_xvpermi_q(temp0, b_h, 0x30);
+    temp3 = __lasx_xvpermi_q(b_h, b_l, 0x31);
+    __lasx_xvst(temp1, dst_argb, 0);
+    __lasx_xvst(temp2, dst_argb, 32);
+    __lasx_xvst(temp3, dst_argb, 64);
+    dst_argb += 96;
+    src_y += 32;
+    src_u += 16;
+    src_v += 16;
+  }
+}
+
+// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R.
+void I422ToRGB565Row_LASX(const uint8_t* src_y,
+                          const uint8_t* src_u,
+                          const uint8_t* src_v,
+                          uint8_t* dst_rgb565,
+                          const struct YuvConstants* yuvconstants,
+                          int width) {
+  int x;
+  int len = width / 32;
+  __m256i vec_yb, vec_yg;
+  __m256i vec_ubvr, vec_ugvg;
+  __m256i const_0x80 = __lasx_xvldi(0x80);
+
+  YUVTORGB_SETUP(yuvconstants, vec_ubvr, vec_ugvg, vec_yg, vec_yb);
+
+  for (x = 0; x < len; x++) {
+    __m256i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+    __m256i dst_l, dst_h;
+
+    READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+    YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg,
+                vec_yb, b_l, b_h, g_l, g_h, r_l, r_h);
+    b_l   = __lasx_xvsrli_h(b_l, 3);
+    b_h   = __lasx_xvsrli_h(b_h, 3);
+    g_l   = __lasx_xvsrli_h(g_l, 2);
+    g_h   = __lasx_xvsrli_h(g_h, 2);
+    r_l   = __lasx_xvsrli_h(r_l, 3);
+    r_h   = __lasx_xvsrli_h(r_h, 3);
+    r_l   = __lasx_xvslli_h(r_l, 11);
+    r_h   = __lasx_xvslli_h(r_h, 11);
+    g_l   = __lasx_xvslli_h(g_l, 5);
+    g_h   = __lasx_xvslli_h(g_h, 5);
+    r_l   = __lasx_xvor_v(r_l, g_l);
+    r_l   = __lasx_xvor_v(r_l, b_l);
+    r_h   = __lasx_xvor_v(r_h, g_h);
+    r_h   = __lasx_xvor_v(r_h, b_h);
+    dst_l = __lasx_xvpermi_q(r_h, r_l, 0x20);
+    dst_h = __lasx_xvpermi_q(r_h, r_l, 0x31);
+    __lasx_xvst(dst_l, dst_rgb565, 0);
+    __lasx_xvst(dst_h, dst_rgb565, 32);
+    dst_rgb565 += 64;
+    src_y += 32;
+    src_u += 16;
+    src_v += 16;
+  }
+}
+
+// TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G.
+void I422ToARGB4444Row_LASX(const uint8_t* src_y,
+                            const uint8_t* src_u,
+                            const uint8_t* src_v,
+                            uint8_t* dst_argb4444,
+                            const struct YuvConstants* yuvconstants,
+                            int width) {
+  int x;
+  int len = width / 32;
+  __m256i vec_yb, vec_yg;
+  __m256i vec_ubvr, vec_ugvg;
+  __m256i const_0x80 = __lasx_xvldi(0x80);
+  __m256i alpha = {0xF000F000F000F000, 0xF000F000F000F000,
+                   0xF000F000F000F000, 0xF000F000F000F000};
+  __m256i mask  = {0x00F000F000F000F0, 0x00F000F000F000F0,
+                   0x00F000F000F000F0, 0x00F000F000F000F0};
+
+  YUVTORGB_SETUP(yuvconstants, vec_ubvr, vec_ugvg, vec_yg, vec_yb);
+
+  for (x = 0; x < len; x++) {
+    __m256i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+    __m256i dst_l, dst_h;
+
+    READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+    YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg,
+                vec_yb, b_l, b_h, g_l, g_h, r_l, r_h);
+    b_l = __lasx_xvsrli_h(b_l, 4);
+    b_h = __lasx_xvsrli_h(b_h, 4);
+    r_l = __lasx_xvsrli_h(r_l, 4);
+    r_h = __lasx_xvsrli_h(r_h, 4);
+    g_l = __lasx_xvand_v(g_l, mask);
+    g_h = __lasx_xvand_v(g_h, mask);
+    r_l = __lasx_xvslli_h(r_l, 8);
+    r_h = __lasx_xvslli_h(r_h, 8);
+    r_l = __lasx_xvor_v(r_l, alpha);
+    r_h = __lasx_xvor_v(r_h, alpha);
+    r_l = __lasx_xvor_v(r_l, g_l);
+    r_h = __lasx_xvor_v(r_h, g_h);
+    r_l = __lasx_xvor_v(r_l, b_l);
+    r_h = __lasx_xvor_v(r_h, b_h);
+    dst_l = __lasx_xvpermi_q(r_h, r_l, 0x20);
+    dst_h = __lasx_xvpermi_q(r_h, r_l, 0x31);
+    __lasx_xvst(dst_l, dst_argb4444, 0);
+    __lasx_xvst(dst_h, dst_argb4444, 32);
+    dst_argb4444 += 64;
+    src_y += 32;
+    src_u += 16;
+    src_v += 16;
+  }
+}
+
+void I422ToARGB1555Row_LASX(const uint8_t* src_y,
+                            const uint8_t* src_u,
+                            const uint8_t* src_v,
+                            uint8_t* dst_argb1555,
+                            const struct YuvConstants* yuvconstants,
+                            int width) {
+  int x;
+  int len = width / 32;
+  __m256i vec_yb, vec_yg;
+  __m256i vec_ubvr, vec_ugvg;
+  __m256i const_0x80 = __lasx_xvldi(0x80);
+  __m256i alpha = {0x8000800080008000, 0x8000800080008000,
+                   0x8000800080008000, 0x8000800080008000};
+
+  YUVTORGB_SETUP(yuvconstants, vec_ubvr, vec_ugvg, vec_yg, vec_yb);
+
+  for (x = 0; x < len; x++) {
+    __m256i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+    __m256i dst_l, dst_h;
+
+    READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+    YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg,
+                vec_yb, b_l, b_h, g_l, g_h, r_l, r_h);
+    b_l = __lasx_xvsrli_h(b_l, 3);
+    b_h = __lasx_xvsrli_h(b_h, 3);
+    g_l = __lasx_xvsrli_h(g_l, 3);
+    g_h = __lasx_xvsrli_h(g_h, 3);
+    g_l = __lasx_xvslli_h(g_l, 5);
+    g_h = __lasx_xvslli_h(g_h, 5);
+    r_l = __lasx_xvsrli_h(r_l, 3);
+    r_h = __lasx_xvsrli_h(r_h, 3);
+    r_l = __lasx_xvslli_h(r_l, 10);
+    r_h = __lasx_xvslli_h(r_h, 10);
+    r_l = __lasx_xvor_v(r_l, alpha);
+    r_h = __lasx_xvor_v(r_h, alpha);
+    r_l = __lasx_xvor_v(r_l, g_l);
+    r_h = __lasx_xvor_v(r_h, g_h);
+    r_l = __lasx_xvor_v(r_l, b_l);
+    r_h = __lasx_xvor_v(r_h, b_h);
+    dst_l = __lasx_xvpermi_q(r_h, r_l, 0x20);
+    dst_h = __lasx_xvpermi_q(r_h, r_l, 0x31);
+    __lasx_xvst(dst_l, dst_argb1555, 0);
+    __lasx_xvst(dst_h, dst_argb1555, 32);
+    dst_argb1555 += 64;
+    src_y += 32;
+    src_u += 16;
+    src_v += 16;
+  }
+}
+
+void YUY2ToYRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+  int x;
+  int len = width / 32;
+  __m256i src0, src1, dst0;
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_yuy2, 0, src_yuy2, 32, src0, src1);
+    dst0 = __lasx_xvpickev_b(src1, src0);
+    dst0 = __lasx_xvpermi_d(dst0, 0xD8);
+    __lasx_xvst(dst0, dst_y, 0);
+    src_yuy2 += 64;
+    dst_y += 32;
+  }
+}
+
+void YUY2ToUVRow_LASX(const uint8_t* src_yuy2,
+                      int src_stride_yuy2,
+                      uint8_t* dst_u,
+                      uint8_t* dst_v,
+                      int width) {
+  const uint8_t* src_yuy2_next = src_yuy2 + src_stride_yuy2;
+  int x;
+  int len = width / 32;
+  __m256i src0, src1, src2, src3;
+  __m256i tmp0, dst0, dst1;
+
+  for (x = 0; x < len; x++) {
+    DUP4_ARG2(__lasx_xvld, src_yuy2, 0, src_yuy2, 32, src_yuy2_next, 0,
+              src_yuy2_next, 32, src0, src1, src2, src3);
+    src0 = __lasx_xvpickod_b(src1, src0);
+    src1 = __lasx_xvpickod_b(src3, src2);
+    tmp0 = __lasx_xvavgr_bu(src1, src0);
+    tmp0 = __lasx_xvpermi_d(tmp0, 0xD8);
+    dst0 = __lasx_xvpickev_b(tmp0, tmp0);
+    dst1 = __lasx_xvpickod_b(tmp0, tmp0);
+    __lasx_xvstelm_d(dst0, dst_u, 0, 0);
+    __lasx_xvstelm_d(dst0, dst_u, 8, 2);
+    __lasx_xvstelm_d(dst1, dst_v, 0, 0);
+    __lasx_xvstelm_d(dst1, dst_v, 8, 2);
+    src_yuy2 += 64;
+    src_yuy2_next += 64;
+    dst_u += 16;
+    dst_v += 16;
+  }
+}
+
+void YUY2ToUV422Row_LASX(const uint8_t* src_yuy2,
+                         uint8_t* dst_u,
+                         uint8_t* dst_v,
+                         int width) {
+  int x;
+  int len = width / 32;
+  __m256i src0, src1, tmp0, dst0, dst1;
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_yuy2, 0, src_yuy2, 32, src0, src1);
+    tmp0 = __lasx_xvpickod_b(src1, src0);
+    tmp0 = __lasx_xvpermi_d(tmp0, 0xD8);
+    dst0 = __lasx_xvpickev_b(tmp0, tmp0);
+    dst1 = __lasx_xvpickod_b(tmp0, tmp0);
+    __lasx_xvstelm_d(dst0, dst_u, 0, 0);
+    __lasx_xvstelm_d(dst0, dst_u, 8, 2);
+    __lasx_xvstelm_d(dst1, dst_v, 0, 0);
+    __lasx_xvstelm_d(dst1, dst_v, 8, 2);
+    src_yuy2 += 64;
+    dst_u += 16;
+    dst_v += 16;
+  }
+}
+
+void UYVYToYRow_LASX(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
+  int x;
+  int len = width / 32;
+  __m256i src0, src1, dst0;
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_uyvy, 0, src_uyvy, 32, src0, src1);
+    dst0 = __lasx_xvpickod_b(src1, src0);
+    dst0 = __lasx_xvpermi_d(dst0, 0xD8);
+    __lasx_xvst(dst0, dst_y, 0);
+    src_uyvy += 64;
+    dst_y += 32;
+  }
+}
+
+void UYVYToUVRow_LASX(const uint8_t* src_uyvy,
+                      int src_stride_uyvy,
+                      uint8_t* dst_u,
+                      uint8_t* dst_v,
+                      int width) {
+  const uint8_t* src_uyvy_next = src_uyvy + src_stride_uyvy;
+  int x;
+  int len = width / 32;
+  __m256i src0, src1, src2, src3, tmp0, dst0, dst1;
+
+  for (x = 0; x < len; x++) {
+    DUP4_ARG2(__lasx_xvld, src_uyvy, 0, src_uyvy, 32, src_uyvy_next, 0,
+              src_uyvy_next, 32, src0, src1, src2, src3);
+    src0 = __lasx_xvpickev_b(src1, src0);
+    src1 = __lasx_xvpickev_b(src3, src2);
+    tmp0 = __lasx_xvavgr_bu(src1, src0);
+    tmp0 = __lasx_xvpermi_d(tmp0, 0xD8);
+    dst0 = __lasx_xvpickev_b(tmp0, tmp0);
+    dst1 = __lasx_xvpickod_b(tmp0, tmp0);
+    __lasx_xvstelm_d(dst0, dst_u, 0, 0);
+    __lasx_xvstelm_d(dst0, dst_u, 8, 2);
+    __lasx_xvstelm_d(dst1, dst_v, 0, 0);
+    __lasx_xvstelm_d(dst1, dst_v, 8, 2);
+    src_uyvy += 64;
+    src_uyvy_next += 64;
+    dst_u += 16;
+    dst_v += 16;
+  }
+}
+
+void UYVYToUV422Row_LASX(const uint8_t* src_uyvy,
+                         uint8_t* dst_u,
+                         uint8_t* dst_v,
+                         int width) {
+  int x;
+  int len = width / 32;
+  __m256i src0, src1, tmp0, dst0, dst1;
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_uyvy, 0, src_uyvy, 32, src0, src1);
+    tmp0 = __lasx_xvpickev_b(src1, src0);
+    tmp0 = __lasx_xvpermi_d(tmp0, 0xD8);
+    dst0 = __lasx_xvpickev_b(tmp0, tmp0);
+    dst1 = __lasx_xvpickod_b(tmp0, tmp0);
+    __lasx_xvstelm_d(dst0, dst_u, 0, 0);
+    __lasx_xvstelm_d(dst0, dst_u, 8, 2);
+    __lasx_xvstelm_d(dst1, dst_v, 0, 0);
+    __lasx_xvstelm_d(dst1, dst_v, 8, 2);
+    src_uyvy += 64;
+    dst_u += 16;
+    dst_v += 16;
+  }
+}
+
+void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+  int x;
+  int len = width / 32;
+  __m256i src0, src1, src2, src3, vec0, vec1, vec2, vec3;
+  __m256i tmp0, tmp1, dst0;
+  __m256i const_19   = __lasx_xvldi(0x19);
+  __m256i const_42   = __lasx_xvldi(0x42);
+  __m256i const_81   = __lasx_xvldi(0x81);
+  __m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
+                        0x1080108010801080, 0x1080108010801080};
+  __m256i control    = {0x0000000400000000, 0x0000000500000001,
+                        0x0000000600000002, 0x0000000700000003};
+
+  for (x = 0; x < len; x++) {
+    DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
+              src_argb0, 96, src0, src1, src2, src3);
+    vec0 = __lasx_xvpickev_b(src1, src0);
+    vec1 = __lasx_xvpickev_b(src3, src2);
+    vec2 = __lasx_xvpickod_b(src1, src0);
+    vec3 = __lasx_xvpickod_b(src3, src2);
+    tmp0 = __lasx_xvmaddwev_h_bu(const_1080, vec0, const_19);
+    tmp1 = __lasx_xvmaddwev_h_bu(const_1080, vec1, const_19);
+    tmp0 = __lasx_xvmaddwev_h_bu(tmp0, vec2, const_81);
+    tmp1 = __lasx_xvmaddwev_h_bu(tmp1, vec3, const_81);
+    tmp0 = __lasx_xvmaddwod_h_bu(tmp0, vec0, const_42);
+    tmp1 = __lasx_xvmaddwod_h_bu(tmp1, vec1, const_42);
+    dst0 = __lasx_xvssrani_b_h(tmp1, tmp0, 8);
+    dst0 = __lasx_xvperm_w(dst0, control);
+    __lasx_xvst(dst0, dst_y, 0);
+    src_argb0 += 128;
+    dst_y += 32;
+  }
+}
+
+void ARGBToUVRow_LASX(const uint8_t* src_argb0,
+                      int src_stride_argb,
+                      uint8_t* dst_u,
+                      uint8_t* dst_v,
+                      int width) {
+  int x;
+  int len = width / 32;
+  const uint8_t* src_argb1 = src_argb0 + src_stride_argb;
+
+  __m256i src0, src1, src2, src3, src4, src5, src6, src7;
+  __m256i vec0, vec1, vec2, vec3;
+  __m256i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, dst0, dst1;
+  __m256i const_0x70 = {0x0038003800380038, 0x0038003800380038,
+                        0x0038003800380038, 0x0038003800380038};
+  __m256i const_0x4A = {0x0025002500250025, 0x0025002500250025,
+                        0x0025002500250025, 0x0025002500250025};
+  __m256i const_0x26 = {0x0013001300130013, 0x0013001300130013,
+                        0x0013001300130013, 0x0013001300130013};
+  __m256i const_0x5E = {0x002f002f002f002f, 0x002f002f002f002f,
+                        0x002f002f002f002f, 0x002f002f002f002f};
+  __m256i const_0x12 = {0x0009000900090009, 0x0009000900090009,
+                        0x0009000900090009, 0x0009000900090009};
+  __m256i control    = {0x0000000400000000, 0x0000000500000001,
+                        0x0000000600000002, 0x0000000700000003};
+  __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
+                          0x8080808080808080, 0x8080808080808080};
+
+  for (x = 0; x < len; x++) {
+    DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
+              src_argb0, 96, src0, src1, src2, src3);
+    DUP4_ARG2(__lasx_xvld, src_argb1, 0, src_argb1, 32, src_argb1, 64,
+              src_argb1, 96, src4, src5, src6, src7);
+    vec0 = __lasx_xvaddwev_h_bu(src0, src4);
+    vec1 = __lasx_xvaddwev_h_bu(src1, src5);
+    vec2 = __lasx_xvaddwev_h_bu(src2, src6);
+    vec3 = __lasx_xvaddwev_h_bu(src3, src7);
+    tmp0 = __lasx_xvpickev_h(vec1, vec0);
+    tmp1 = __lasx_xvpickev_h(vec3, vec2);
+    tmp2 = __lasx_xvpickod_h(vec1, vec0);
+    tmp3 = __lasx_xvpickod_h(vec3, vec2);
+    vec0 = __lasx_xvaddwod_h_bu(src0, src4);
+    vec1 = __lasx_xvaddwod_h_bu(src1, src5);
+    vec2 = __lasx_xvaddwod_h_bu(src2, src6);
+    vec3 = __lasx_xvaddwod_h_bu(src3, src7);
+    tmp4 = __lasx_xvpickev_h(vec1, vec0);
+    tmp5 = __lasx_xvpickev_h(vec3, vec2);
+    vec0 = __lasx_xvpickev_h(tmp1, tmp0);
+    vec1 = __lasx_xvpickod_h(tmp1, tmp0);
+    src0 = __lasx_xvavgr_h(vec0, vec1);
+    vec0 = __lasx_xvpickev_h(tmp3, tmp2);
+    vec1 = __lasx_xvpickod_h(tmp3, tmp2);
+    src1 = __lasx_xvavgr_h(vec0, vec1);
+    vec0 = __lasx_xvpickev_h(tmp5, tmp4);
+    vec1 = __lasx_xvpickod_h(tmp5, tmp4);
+    src2 = __lasx_xvavgr_h(vec0, vec1);
+    dst0 = __lasx_xvmadd_h(const_0x8080, src0, const_0x70);
+    dst0 = __lasx_xvmsub_h(dst0, src2, const_0x4A);
+    dst0 = __lasx_xvmsub_h(dst0, src1, const_0x26);
+    dst1 = __lasx_xvmadd_h(const_0x8080, src1, const_0x70);
+    dst1 = __lasx_xvmsub_h(dst1, src2, const_0x5E);
+    dst1 = __lasx_xvmsub_h(dst1, src0, const_0x12);
+    dst0 = __lasx_xvperm_w(dst0, control);
+    dst1 = __lasx_xvperm_w(dst1, control);
+    dst0 = __lasx_xvssrani_b_h(dst0, dst0, 8);
+    dst1 = __lasx_xvssrani_b_h(dst1, dst1, 8);
+    __lasx_xvstelm_d(dst0, dst_u, 0, 0);
+    __lasx_xvstelm_d(dst0, dst_u, 8, 2);
+    __lasx_xvstelm_d(dst1, dst_v, 0, 0);
+    __lasx_xvstelm_d(dst1, dst_v, 8, 2);
+    src_argb0 += 128;
+    src_argb1 += 128;
+    dst_u += 16;
+    dst_v += 16;
+  }
+}
+
+void ARGBToRGB24Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+  int x;
+  int len = (width / 32) - 1;
+  __m256i src0, src1, src2, src3;
+  __m256i tmp0, tmp1, tmp2, tmp3;
+  __m256i shuf    = {0x0908060504020100, 0x000000000E0D0C0A,
+                     0x0908060504020100, 0x000000000E0D0C0A};
+  __m256i control = {0x0000000100000000, 0x0000000400000002,
+                     0x0000000600000005, 0x0000000700000003};
+  for (x = 0; x < len; x++) {
+    DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64,
+              src_argb, 96, src0, src1, src2, src3);
+    tmp0 = __lasx_xvshuf_b(src0, src0, shuf);
+    tmp1 = __lasx_xvshuf_b(src1, src1, shuf);
+    tmp2 = __lasx_xvshuf_b(src2, src2, shuf);
+    tmp3 = __lasx_xvshuf_b(src3, src3, shuf);
+    tmp0 = __lasx_xvperm_w(tmp0, control);
+    tmp1 = __lasx_xvperm_w(tmp1, control);
+    tmp2 = __lasx_xvperm_w(tmp2, control);
+    tmp3 = __lasx_xvperm_w(tmp3, control);
+    __lasx_xvst(tmp0, dst_rgb, 0);
+    __lasx_xvst(tmp1, dst_rgb, 24);
+    __lasx_xvst(tmp2, dst_rgb, 48);
+    __lasx_xvst(tmp3, dst_rgb, 72);
+    dst_rgb += 96;
+    src_argb += 128;
+  }
+  DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64,
+            src_argb, 96, src0, src1, src2, src3);
+  tmp0 = __lasx_xvshuf_b(src0, src0, shuf);
+  tmp1 = __lasx_xvshuf_b(src1, src1, shuf);
+  tmp2 = __lasx_xvshuf_b(src2, src2, shuf);
+  tmp3 = __lasx_xvshuf_b(src3, src3, shuf);
+  tmp0 = __lasx_xvperm_w(tmp0, control);
+  tmp1 = __lasx_xvperm_w(tmp1, control);
+  tmp2 = __lasx_xvperm_w(tmp2, control);
+  tmp3 = __lasx_xvperm_w(tmp3, control);
+  __lasx_xvst(tmp0, dst_rgb, 0);
+  __lasx_xvst(tmp1, dst_rgb, 24);
+  __lasx_xvst(tmp2, dst_rgb, 48);
+  dst_rgb += 72;
+  __lasx_xvstelm_d(tmp3, dst_rgb, 0, 0);
+  __lasx_xvstelm_d(tmp3, dst_rgb, 8, 1);
+  __lasx_xvstelm_d(tmp3, dst_rgb, 16, 2);
+}
+
+void ARGBToRAWRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+  int x;
+  int len = (width / 32) - 1;
+  __m256i src0, src1, src2, src3;
+  __m256i tmp0, tmp1, tmp2, tmp3;
+  __m256i shuf    = {0x090A040506000102, 0x000000000C0D0E08,
+                     0x090A040506000102, 0x000000000C0D0E08};
+  __m256i control = {0x0000000100000000, 0x0000000400000002,
+                     0x0000000600000005, 0x0000000700000003};
+  for (x = 0; x < len; x++) {
+    DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64,
+              src_argb, 96, src0, src1, src2, src3);
+    tmp0 = __lasx_xvshuf_b(src0, src0, shuf);
+    tmp1 = __lasx_xvshuf_b(src1, src1, shuf);
+    tmp2 = __lasx_xvshuf_b(src2, src2, shuf);
+    tmp3 = __lasx_xvshuf_b(src3, src3, shuf);
+    tmp0 = __lasx_xvperm_w(tmp0, control);
+    tmp1 = __lasx_xvperm_w(tmp1, control);
+    tmp2 = __lasx_xvperm_w(tmp2, control);
+    tmp3 = __lasx_xvperm_w(tmp3, control);
+    __lasx_xvst(tmp0, dst_rgb, 0);
+    __lasx_xvst(tmp1, dst_rgb, 24);
+    __lasx_xvst(tmp2, dst_rgb, 48);
+    __lasx_xvst(tmp3, dst_rgb, 72);
+    dst_rgb += 96;
+    src_argb += 128;
+  }
+  DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64,
+            src_argb, 96, src0, src1, src2, src3);
+  tmp0 = __lasx_xvshuf_b(src0, src0, shuf);
+  tmp1 = __lasx_xvshuf_b(src1, src1, shuf);
+  tmp2 = __lasx_xvshuf_b(src2, src2, shuf);
+  tmp3 = __lasx_xvshuf_b(src3, src3, shuf);
+  tmp0 = __lasx_xvperm_w(tmp0, control);
+  tmp1 = __lasx_xvperm_w(tmp1, control);
+  tmp2 = __lasx_xvperm_w(tmp2, control);
+  tmp3 = __lasx_xvperm_w(tmp3, control);
+  __lasx_xvst(tmp0, dst_rgb, 0);
+  __lasx_xvst(tmp1, dst_rgb, 24);
+  __lasx_xvst(tmp2, dst_rgb, 48);
+  dst_rgb += 72;
+  __lasx_xvstelm_d(tmp3, dst_rgb, 0, 0);
+  __lasx_xvstelm_d(tmp3, dst_rgb, 8, 1);
+  __lasx_xvstelm_d(tmp3, dst_rgb, 16, 2);
+}
+
+void ARGBToRGB565Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+  int x;
+  int len = width / 16;
+  __m256i zero = __lasx_xvldi(0);
+  __m256i src0, src1, tmp0, tmp1, dst0;
+  __m256i shift = {0x0300030003000300, 0x0300030003000300,
+                   0x0300030003000300, 0x0300030003000300};
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src0, src1);
+    tmp0 = __lasx_xvpickev_b(src1, src0);
+    tmp1 = __lasx_xvpickod_b(src1, src0);
+    tmp0 = __lasx_xvsrli_b(tmp0, 3);
+    tmp1 = __lasx_xvpackev_b(zero, tmp1);
+    tmp1 = __lasx_xvsrli_h(tmp1, 2);
+    tmp0 = __lasx_xvsll_b(tmp0, shift);
+    tmp1 = __lasx_xvslli_h(tmp1, 5);
+    dst0 = __lasx_xvor_v(tmp0, tmp1);
+    dst0 = __lasx_xvpermi_d(dst0, 0xD8);
+    __lasx_xvst(dst0, dst_rgb, 0);
+    dst_rgb += 32;
+    src_argb += 64;
+  }
+}
+
+void ARGBToARGB1555Row_LASX(const uint8_t* src_argb,
+                            uint8_t* dst_rgb,
+                            int width) {
+  int x;
+  int len = width / 16;
+  __m256i zero = __lasx_xvldi(0);
+  __m256i src0, src1, tmp0, tmp1, tmp2, tmp3, dst0;
+  __m256i shift1 = {0x0703070307030703, 0x0703070307030703,
+                    0x0703070307030703, 0x0703070307030703};
+  __m256i shift2 = {0x0200020002000200, 0x0200020002000200,
+                    0x0200020002000200, 0x0200020002000200};
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src0, src1);
+    tmp0 = __lasx_xvpickev_b(src1, src0);
+    tmp1 = __lasx_xvpickod_b(src1, src0);
+    tmp0 = __lasx_xvsrli_b(tmp0, 3);
+    tmp1 = __lasx_xvsrl_b(tmp1, shift1);
+    tmp0 = __lasx_xvsll_b(tmp0, shift2);
+    tmp2 = __lasx_xvpackev_b(zero, tmp1);
+    tmp3 = __lasx_xvpackod_b(zero, tmp1);
+    tmp2 = __lasx_xvslli_h(tmp2, 5);
+    tmp3 = __lasx_xvslli_h(tmp3, 15);
+    dst0 = __lasx_xvor_v(tmp0, tmp2);
+    dst0 = __lasx_xvor_v(dst0, tmp3);
+    dst0 = __lasx_xvpermi_d(dst0, 0xD8);
+    __lasx_xvst(dst0, dst_rgb, 0);
+    dst_rgb += 32;
+    src_argb += 64;
+  }
+}
+
+void ARGBToARGB4444Row_LASX(const uint8_t* src_argb,
+                            uint8_t* dst_rgb,
+                            int width) {
+  int x;
+  int len = width / 16;
+  __m256i src0, src1, tmp0, tmp1, dst0;
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src0, src1);
+    tmp0 = __lasx_xvpickev_b(src1, src0);
+    tmp1 = __lasx_xvpickod_b(src1, src0);
+    tmp1 = __lasx_xvandi_b(tmp1, 0xF0);
+    tmp0 = __lasx_xvsrli_b(tmp0, 4);
+    dst0 = __lasx_xvor_v(tmp1, tmp0);
+    dst0 = __lasx_xvpermi_d(dst0, 0xD8);
+    __lasx_xvst(dst0, dst_rgb, 0);
+    dst_rgb += 32;
+    src_argb += 64;
+  }
+}
+
+void ARGBToUV444Row_LASX(const uint8_t* src_argb,
+                         uint8_t* dst_u,
+                         uint8_t* dst_v,
+                         int32_t width) {
+  int x;
+  int len = width / 32;
+  __m256i src0, src1, src2, src3;
+  __m256i tmp0, tmp1, tmp2, tmp3;
+  __m256i reg0, reg1, reg2, reg3, dst0, dst1;
+  __m256i const_112 = __lasx_xvldi(112);
+  __m256i const_74  = __lasx_xvldi(74);
+  __m256i const_38  = __lasx_xvldi(38);
+  __m256i const_94  = __lasx_xvldi(94);
+  __m256i const_18  = __lasx_xvldi(18);
+  __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
+                          0x8080808080808080, 0x8080808080808080};
+  __m256i control = {0x0000000400000000, 0x0000000500000001,
+                     0x0000000600000002, 0x0000000700000003};
+  for (x = 0; x < len; x++) {
+    DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64,
+              src_argb, 96, src0, src1, src2, src3);
+    tmp0 = __lasx_xvpickev_h(src1, src0);
+    tmp1 = __lasx_xvpickod_h(src1, src0);
+    tmp2 = __lasx_xvpickev_h(src3, src2);
+    tmp3 = __lasx_xvpickod_h(src3, src2);
+    reg0 = __lasx_xvmaddwev_h_bu(const_0x8080, tmp0, const_112);
+    reg1 = __lasx_xvmaddwev_h_bu(const_0x8080, tmp2, const_112);
+    reg2 = __lasx_xvmulwod_h_bu(tmp0, const_74);
+    reg3 = __lasx_xvmulwod_h_bu(tmp2, const_74);
+    reg2 = __lasx_xvmaddwev_h_bu(reg2, tmp1, const_38);
+    reg3 = __lasx_xvmaddwev_h_bu(reg3, tmp3, const_38);
+    reg0 = __lasx_xvsub_h(reg0, reg2);
+    reg1 = __lasx_xvsub_h(reg1, reg3);
+    dst0 = __lasx_xvssrani_b_h(reg1, reg0, 8);
+    dst0 = __lasx_xvperm_w(dst0, control);
+    reg0 = __lasx_xvmaddwev_h_bu(const_0x8080, tmp1, const_112);
+    reg1 = __lasx_xvmaddwev_h_bu(const_0x8080, tmp3, const_112);
+    reg2 = __lasx_xvmulwev_h_bu(tmp0, const_18);
+    reg3 = __lasx_xvmulwev_h_bu(tmp2, const_18);
+    reg2 = __lasx_xvmaddwod_h_bu(reg2, tmp0, const_94);
+    reg3 = __lasx_xvmaddwod_h_bu(reg3, tmp2, const_94);
+    reg0 = __lasx_xvsub_h(reg0, reg2);
+    reg1 = __lasx_xvsub_h(reg1, reg3);
+    dst1 = __lasx_xvssrani_b_h(reg1, reg0, 8);
+    dst1 = __lasx_xvperm_w(dst1, control);
+    __lasx_xvst(dst0, dst_u, 0);
+    __lasx_xvst(dst1, dst_v, 0);
+    dst_u += 32;
+    dst_v += 32;
+    src_argb += 128;
+  }
+}
+
+void ARGBMultiplyRow_LASX(const uint8_t* src_argb0,
+                          const uint8_t* src_argb1,
+                          uint8_t* dst_argb,
+                          int width) {
+  int x;
+  int len = width / 8;
+  __m256i zero = __lasx_xvldi(0);
+  __m256i src0, src1, dst0, dst1;
+  __m256i tmp0, tmp1, tmp2, tmp3;
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_argb0, 0, src_argb1, 0, src0, src1);
+    tmp0 = __lasx_xvilvl_b(src0, src0);
+    tmp1 = __lasx_xvilvh_b(src0, src0);
+    tmp2 = __lasx_xvilvl_b(zero, src1);
+    tmp3 = __lasx_xvilvh_b(zero, src1);
+    dst0 = __lasx_xvmuh_hu(tmp0, tmp2);
+    dst1 = __lasx_xvmuh_hu(tmp1, tmp3);
+    dst0 = __lasx_xvpickev_b(dst1, dst0);
+    __lasx_xvst(dst0, dst_argb, 0);
+    src_argb0 += 32;
+    src_argb1 += 32;
+    dst_argb  += 32;
+  }
+}
+
+void ARGBAddRow_LASX(const uint8_t* src_argb0,
+                     const uint8_t* src_argb1,
+                     uint8_t* dst_argb,
+                     int width) {
+  int x;
+  int len = width / 8;
+  __m256i src0, src1, dst0;
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_argb0, 0, src_argb1, 0, src0, src1);
+    dst0 = __lasx_xvsadd_bu(src0, src1);
+    __lasx_xvst(dst0, dst_argb, 0);
+    src_argb0 += 32;
+    src_argb1 += 32;
+    dst_argb  += 32;
+  }
+}
+
+void ARGBSubtractRow_LASX(const uint8_t* src_argb0,
+                          const uint8_t* src_argb1,
+                          uint8_t* dst_argb,
+                          int width) {
+  int x;
+  int len = width / 8;
+  __m256i src0, src1, dst0;
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_argb0, 0, src_argb1, 0, src0, src1);
+    dst0 = __lasx_xvssub_bu(src0, src1);
+    __lasx_xvst(dst0, dst_argb, 0);
+    src_argb0 += 32;
+    src_argb1 += 32;
+    dst_argb  += 32;
+  }
+}
+
+void ARGBAttenuateRow_LASX(const uint8_t* src_argb,
+                           uint8_t* dst_argb,
+                           int width) {
+  int x;
+  int len = width / 16;
+  __m256i src0, src1, tmp0, tmp1;
+  __m256i reg0, reg1, reg2, reg3, reg4, reg5;
+  __m256i b, g, r, a, dst0, dst1;
+  __m256i control = {0x0005000100040000, 0x0007000300060002,
+                     0x0005000100040000, 0x0007000300060002};
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src0, src1);
+    tmp0 = __lasx_xvpickev_b(src1, src0);
+    tmp1 = __lasx_xvpickod_b(src1, src0);
+    b = __lasx_xvpackev_b(tmp0, tmp0);
+    r = __lasx_xvpackod_b(tmp0, tmp0);
+    g = __lasx_xvpackev_b(tmp1, tmp1);
+    a = __lasx_xvpackod_b(tmp1, tmp1);
+    reg0 = __lasx_xvmulwev_w_hu(b, a);
+    reg1 = __lasx_xvmulwod_w_hu(b, a);
+    reg2 = __lasx_xvmulwev_w_hu(r, a);
+    reg3 = __lasx_xvmulwod_w_hu(r, a);
+    reg4 = __lasx_xvmulwev_w_hu(g, a);
+    reg5 = __lasx_xvmulwod_w_hu(g, a);
+    reg0 = __lasx_xvssrani_h_w(reg1, reg0, 24);
+    reg2 = __lasx_xvssrani_h_w(reg3, reg2, 24);
+    reg4 = __lasx_xvssrani_h_w(reg5, reg4, 24);
+    reg0 = __lasx_xvshuf_h(control, reg0, reg0);
+    reg2 = __lasx_xvshuf_h(control, reg2, reg2);
+    reg4 = __lasx_xvshuf_h(control, reg4, reg4);
+    tmp0 = __lasx_xvpackev_b(reg4, reg0);
+    tmp1 = __lasx_xvpackev_b(a, reg2);
+    dst0 = __lasx_xvilvl_h(tmp1, tmp0);
+    dst1 = __lasx_xvilvh_h(tmp1, tmp0);
+    __lasx_xvst(dst0, dst_argb, 0);
+    __lasx_xvst(dst1, dst_argb, 32);
+    dst_argb += 64;
+    src_argb += 64;
+  }
+}
+
+void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb,
+                                uint8_t* dst_rgb,
+                                const uint32_t dither4,
+                                int width) {
+  int x;
+  int len = width / 16;
+  __m256i src0, src1, tmp0, tmp1, dst0;
+  __m256i b, g, r;
+  __m256i zero = __lasx_xvldi(0);
+  __m256i vec_dither = __lasx_xvldrepl_w(&dither4, 0);
+
+  vec_dither = __lasx_xvilvl_b(zero, vec_dither);
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src0, src1);
+    tmp0 = __lasx_xvpickev_b(src1, src0);
+    tmp1 = __lasx_xvpickod_b(src1, src0);
+    b    = __lasx_xvpackev_b(zero, tmp0);
+    r    = __lasx_xvpackod_b(zero, tmp0);
+    g    = __lasx_xvpackev_b(zero, tmp1);
+    b    = __lasx_xvadd_h(b, vec_dither);
+    g    = __lasx_xvadd_h(g, vec_dither);
+    r    = __lasx_xvadd_h(r, vec_dither);
+    DUP2_ARG1(__lasx_xvclip255_h, b, g, b, g);
+    r = __lasx_xvclip255_h(r);
+    b = __lasx_xvsrai_h(b, 3);
+    g = __lasx_xvsrai_h(g, 2);
+    r = __lasx_xvsrai_h(r, 3);
+    g = __lasx_xvslli_h(g, 5);
+    r = __lasx_xvslli_h(r, 11);
+    dst0 = __lasx_xvor_v(b, g);
+    dst0 = __lasx_xvor_v(dst0, r);
+    dst0 = __lasx_xvpermi_d(dst0, 0xD8);
+    __lasx_xvst(dst0, dst_rgb, 0);
+    src_argb += 64;
+    dst_rgb += 32;
+  }
+}
+
+void ARGBShuffleRow_LASX(const uint8_t* src_argb,
+                         uint8_t* dst_argb,
+                         const uint8_t* shuffler,
+                         int width) {
+  int x;
+  int len = width / 16;
+  __m256i src0, src1, dst0, dst1;
+  __m256i shuf = {0x0404040400000000, 0x0C0C0C0C08080808,
+                  0x0404040400000000, 0x0C0C0C0C08080808};
+  __m256i temp = __lasx_xvldrepl_w(shuffler, 0);
+
+  shuf = __lasx_xvadd_b(shuf, temp);
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src0, src1);
+    dst0 = __lasx_xvshuf_b(src0, src0, shuf);
+    dst1 = __lasx_xvshuf_b(src1, src1, shuf);
+    __lasx_xvst(dst0, dst_argb, 0);
+    __lasx_xvst(dst1, dst_argb, 32);
+    src_argb += 64;
+    dst_argb += 64;
+  }
+}
+
+void ARGBShadeRow_LASX(const uint8_t* src_argb,
+                       uint8_t* dst_argb,
+                       int width,
+                       uint32_t value) {
+  int x;
+  int len = width / 8;
+  __m256i src0, dst0, tmp0, tmp1;
+  __m256i vec_value = __lasx_xvreplgr2vr_w(value);
+
+  vec_value = __lasx_xvilvl_b(vec_value, vec_value);
+  for (x = 0; x < len; x++) {
+    src0 = __lasx_xvld(src_argb, 0);
+    tmp0 = __lasx_xvilvl_b(src0, src0);
+    tmp1 = __lasx_xvilvh_b(src0, src0);
+    tmp0 = __lasx_xvmuh_hu(tmp0, vec_value);
+    tmp1 = __lasx_xvmuh_hu(tmp1, vec_value);
+    dst0 = __lasx_xvpickod_b(tmp1, tmp0);
+    __lasx_xvst(dst0, dst_argb, 0);
+    src_argb += 32;
+    dst_argb += 32;
+  }
+}
+
+void ARGBGrayRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
+  int x;
+  int len = width / 16;
+  __m256i src0, src1, tmp0, tmp1;
+  __m256i reg0, reg1, reg2, dst0, dst1;
+  __m256i const_128 = __lasx_xvldi(0x480);
+  __m256i const_150 = __lasx_xvldi(0x96);
+  __m256i const_br  = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D,
+                       0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src0, src1);
+    tmp0 = __lasx_xvpickev_b(src1, src0);
+    tmp1 = __lasx_xvpickod_b(src1, src0);
+    reg0 = __lasx_xvdp2_h_bu(tmp0, const_br);
+    reg1 = __lasx_xvmaddwev_h_bu(const_128, tmp1, const_150);
+    reg2 = __lasx_xvadd_h(reg0, reg1);
+    tmp0 = __lasx_xvpackod_b(reg2, reg2);
+    tmp1 = __lasx_xvpackod_b(tmp1, reg2);
+    dst0 = __lasx_xvilvl_h(tmp1, tmp0);
+    dst1 = __lasx_xvilvh_h(tmp1, tmp0);
+    __lasx_xvst(dst0, dst_argb, 0);
+    __lasx_xvst(dst1, dst_argb, 32);
+    src_argb += 64;
+    dst_argb += 64;
+  }
+}
+
+void ARGBSepiaRow_LASX(uint8_t* dst_argb, int width) {
+  int x;
+  int len = width / 16;
+  __m256i src0, src1, tmp0, tmp1;
+  __m256i reg0, reg1, spb, spg, spr;
+  __m256i dst0, dst1;
+  __m256i spb_g  = __lasx_xvldi(68);
+  __m256i spg_g  = __lasx_xvldi(88);
+  __m256i spr_g  = __lasx_xvldi(98);
+  __m256i spb_br = {0x2311231123112311, 0x2311231123112311,
+                    0x2311231123112311, 0x2311231123112311};
+  __m256i spg_br = {0x2D162D162D162D16, 0x2D162D162D162D16,
+                    0x2D162D162D162D16, 0x2D162D162D162D16};
+  __m256i spr_br = {0x3218321832183218, 0x3218321832183218,
+                    0x3218321832183218, 0x3218321832183218};
+  __m256i shuff  = {0x1706150413021100, 0x1F0E1D0C1B0A1908,
+                    0x1706150413021100, 0x1F0E1D0C1B0A1908};
+
+  for (x = 0; x < len; x++) {
+    DUP2_ARG2(__lasx_xvld, dst_argb, 0, dst_argb, 32, src0, src1);
+    tmp0 = __lasx_xvpickev_b(src1, src0);
+    tmp1 = __lasx_xvpickod_b(src1, src0);
+    DUP2_ARG2(__lasx_xvdp2_h_bu, tmp0, spb_br, tmp0, spg_br, spb, spg);
+    spr = __lasx_xvdp2_h_bu(tmp0, spr_br);
+    spb  = __lasx_xvmaddwev_h_bu(spb, tmp1, spb_g);
+    spg  = __lasx_xvmaddwev_h_bu(spg, tmp1, spg_g);
+    spr  = __lasx_xvmaddwev_h_bu(spr, tmp1, spr_g);
+    spb  = __lasx_xvsrli_h(spb, 7);
+    spg  = __lasx_xvsrli_h(spg, 7);
+    spr  = __lasx_xvsrli_h(spr, 7);
+    spg  = __lasx_xvsat_hu(spg, 7);
+    spr  = __lasx_xvsat_hu(spr, 7);
+    reg0 = __lasx_xvpackev_b(spg, spb);
+    reg1 = __lasx_xvshuf_b(tmp1, spr, shuff);
+    dst0 = __lasx_xvilvl_h(reg1, reg0);
+    dst1 = __lasx_xvilvh_h(reg1, reg0);
+    __lasx_xvst(dst0, dst_argb, 0);
+    __lasx_xvst(dst1, dst_argb, 32);
+    dst_argb += 64;
+  }
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
author	Hao Chen <chenhao@loongson.cn>	2021-12-20 19:57:26 +0800
committer	Frank Barchard <fbarchard@chromium.org>	2022-01-21 01:34:38 +0000
commit	de8ae8c679f5a42fb9f9f65318d6cb95112180d6 (patch)
tree	4f504ae4587084990aa39a10f820591f40ff30ed
parent	51de1e16f20bb93468d7c538629b40ece8420b71 (diff)
download	libyuv-de8ae8c679f5a42fb9f9f65318d6cb95112180d6.tar.gz