I420ToRGB24MatrixFilter function added

- Implemented as 3 steps: Upsample UV to 4:4:4, I444ToARGB, ARGBToRGB24 - Fix some build warnings for missing prototypes. Pixel 4 I420ToRGB24_Opt (743 ms) I420ToRGB24Filter_Opt (1331 ms) Windows with skylake xeon: x86 32 bit I420ToRGB24_Opt (387 ms) I420ToRGB24Filter_Opt (571 ms) x64 64 bit I420ToRGB24_Opt (384 ms) I420ToRGB24Filter_Opt (582 ms) Bug: libyuv:938, libyuv:830 Change-Id: Ie27f70816ec084437014f8a1c630ae011ee2348c Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3900298 Reviewed-by: Wan-Teh Chang <wtc@google.com>
author: Frank Barchard <fbarchard@google.com> 2022-09-16 11:12:39 -0700
committer: Frank Barchard <fbarchard@chromium.org> 2022-09-16 19:46:47 +0000
commit: f71c83552d373f0ff41833b17e2880632d8561d7 (patch)
tree: 09088188086a6b03d07a5ebaa8edf01658466ad8
parent: 3e38ce50589d9319badc0501f96d6c5b2b177472 (diff)
download: libyuv-f71c83552d373f0ff41833b17e2880632d8561d7.tar.gz
19 files changed, 549 insertions, 298 deletions
diff --git a/README.chromium b/README.chromium
index 1e46d348..681e9cc0 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1840
+Version: 1841
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h
index 8c92a9c6..4025050a 100644
--- a/include/libyuv/convert_argb.h
+++ b/include/libyuv/convert_argb.h
@@ -1975,6 +1975,21 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y,
                            int height,
                            enum FilterMode filter);
 
+// Convert I420 to RGB24 with matrix and UV filter mode.
+LIBYUV_API
+int I420ToRGB24MatrixFilter(const uint8_t* src_y,
+                            int src_stride_y,
+                            const uint8_t* src_u,
+                            int src_stride_u,
+                            const uint8_t* src_v,
+                            int src_stride_v,
+                            uint8_t* dst_rgb24,
+                            int dst_stride_rgb24,
+                            const struct YuvConstants* yuvconstants,
+                            int width,
+                            int height,
+                            enum FilterMode filter);
+
 // Convert I010 to AR30 with matrix and UV filter mode.
 LIBYUV_API
 int I010ToAR30MatrixFilter(const uint16_t* src_y,
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 70d789e2..1c14ef3b 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -1824,6 +1824,11 @@ void RGBAToUVRow_C(const uint8_t* src_rgb,
                    uint8_t* dst_u,
                    uint8_t* dst_v,
                    int width);
+void RGBAToUVJRow_C(const uint8_t* src_rgb,
+                    int src_stride_rgb,
+                    uint8_t* dst_u,
+                    uint8_t* dst_v,
+                    int width);
 void RGB24ToUVRow_C(const uint8_t* src_rgb,
                     int src_stride_rgb,
                     uint8_t* dst_u,
@@ -2044,11 +2049,11 @@ void DetileSplitUVRow_Any_NEON(const uint8_t* src_uv,
                                uint8_t* dst_v,
                                int width);
 void DetileToYUY2_C(const uint8_t* src_y,
-                      ptrdiff_t src_y_tile_stride,
-                      const uint8_t* src_uv,
-                      ptrdiff_t src_uv_tile_stride,
-                      uint8_t* dst_yuy2,
-                      int width);
+                    ptrdiff_t src_y_tile_stride,
+                    const uint8_t* src_uv,
+                    ptrdiff_t src_uv_tile_stride,
+                    uint8_t* dst_yuy2,
+                    int width);
 void DetileToYUY2_SSE2(const uint8_t* src_y,
                        ptrdiff_t src_y_tile_stride,
                        const uint8_t* src_uv,
@@ -5608,6 +5613,17 @@ void GaussCol_F32_C(const float* src0,
                     float* dst,
                     int width);
 
+void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
+void GaussCol_C(const uint16_t* src0,
+                const uint16_t* src1,
+                const uint16_t* src2,
+                const uint16_t* src3,
+                const uint16_t* src4,
+                uint32_t* dst,
+                int width);
+
+void ClampFloatToZero_SSE2(const float* src_x, float* dst_y, int width);
+
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 02b66d4e..afe00d4b 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 1840
+#define LIBYUV_VERSION 1841
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_
diff --git a/source/convert.cc b/source/convert.cc
index a740d5ca..8b745e7d 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -727,7 +727,7 @@ int MM21ToYUY2(const uint8_t* src_y,
   }
 
   DetileToYUY2(src_y, src_stride_y, src_uv, src_stride_uv, dst_yuy2,
-                   dst_stride_yuy2, width, height, 32);
+               dst_stride_yuy2, width, height, 32);
 
   return 0;
 }
@@ -2054,8 +2054,8 @@ int RGB24ToI420(const uint8_t* src_rgb24,
   {
 #if !defined(HAS_RGB24TOYROW)
     // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 31) & ~31;
-    align_buffer_64(row, kRowSize * 2);
+    const int row_size = (width * 4 + 31) & ~31;
+    align_buffer_64(row, row_size * 2);
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -2065,10 +2065,10 @@ int RGB24ToI420(const uint8_t* src_rgb24,
       RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
 #else
       RGB24ToARGBRow(src_rgb24, row, width);
-      RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
-      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+      RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width);
+      ARGBToUVRow(row, row_size, dst_u, dst_v, width);
       ARGBToYRow(row, dst_y, width);
-      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+      ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
 #endif
       src_rgb24 += src_stride_rgb24 * 2;
       dst_y += dst_stride_y * 2;
@@ -2208,8 +2208,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
   {
 #if !defined(HAS_RGB24TOYJROW)
     // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 31) & ~31;
-    align_buffer_64(row, kRowSize * 2);
+    const int row_size = (width * 4 + 31) & ~31;
+    align_buffer_64(row, row_size * 2);
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -2219,10 +2219,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
       RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
 #else
       RGB24ToARGBRow(src_rgb24, row, width);
-      RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
-      ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
+      RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width);
+      ARGBToUVJRow(row, row_size, dst_u, dst_v, width);
       ARGBToYJRow(row, dst_y, width);
-      ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
+      ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
 #endif
       src_rgb24 += src_stride_rgb24 * 2;
       dst_y += dst_stride_y * 2;
@@ -2382,8 +2382,8 @@ int RAWToI420(const uint8_t* src_raw,
   {
 #if !defined(HAS_RAWTOYROW)
     // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 31) & ~31;
-    align_buffer_64(row, kRowSize * 2);
+    const int row_size = (width * 4 + 31) & ~31;
+    align_buffer_64(row, row_size * 2);
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -2393,10 +2393,10 @@ int RAWToI420(const uint8_t* src_raw,
       RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
 #else
       RAWToARGBRow(src_raw, row, width);
-      RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
-      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+      RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
+      ARGBToUVRow(row, row_size, dst_u, dst_v, width);
       ARGBToYRow(row, dst_y, width);
-      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+      ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
 #endif
       src_raw += src_stride_raw * 2;
       dst_y += dst_stride_y * 2;
@@ -2536,8 +2536,8 @@ int RAWToJ420(const uint8_t* src_raw,
   {
 #if !defined(HAS_RAWTOYJROW)
     // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 31) & ~31;
-    align_buffer_64(row, kRowSize * 2);
+    const int row_size = (width * 4 + 31) & ~31;
+    align_buffer_64(row, row_size * 2);
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -2547,10 +2547,10 @@ int RAWToJ420(const uint8_t* src_raw,
       RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
 #else
       RAWToARGBRow(src_raw, row, width);
-      RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
-      ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
+      RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
+      ARGBToUVJRow(row, row_size, dst_u, dst_v, width);
       ARGBToYJRow(row, dst_y, width);
-      ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
+      ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
 #endif
       src_raw += src_stride_raw * 2;
       dst_y += dst_stride_y * 2;
@@ -2714,8 +2714,8 @@ int RGB565ToI420(const uint8_t* src_rgb565,
 #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
       defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX))
     // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 31) & ~31;
-    align_buffer_64(row, kRowSize * 2);
+    const int row_size = (width * 4 + 31) & ~31;
+    align_buffer_64(row, row_size * 2);
 #endif
     for (y = 0; y < height - 1; y += 2) {
 #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
@@ -2725,10 +2725,10 @@ int RGB565ToI420(const uint8_t* src_rgb565,
       RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
 #else
       RGB565ToARGBRow(src_rgb565, row, width);
-      RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
-      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+      RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + row_size, width);
+      ARGBToUVRow(row, row_size, dst_u, dst_v, width);
       ARGBToYRow(row, dst_y, width);
-      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+      ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
 #endif
       src_rgb565 += src_stride_rgb565 * 2;
       dst_y += dst_stride_y * 2;
@@ -2894,8 +2894,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
 #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
       defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX))
     // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 31) & ~31;
-    align_buffer_64(row, kRowSize * 2);
+    const int row_size = (width * 4 + 31) & ~31;
+    align_buffer_64(row, row_size * 2);
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -2907,11 +2907,11 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
                      width);
 #else
       ARGB1555ToARGBRow(src_argb1555, row, width);
-      ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
+      ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + row_size,
                         width);
-      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+      ARGBToUVRow(row, row_size, dst_u, dst_v, width);
       ARGBToYRow(row, dst_y, width);
-      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+      ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
 #endif
       src_argb1555 += src_stride_argb1555 * 2;
       dst_y += dst_stride_y * 2;
@@ -3089,8 +3089,8 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
   {
 #if !(defined(HAS_ARGB4444TOYROW_NEON))
     // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 31) & ~31;
-    align_buffer_64(row, kRowSize * 2);
+    const int row_size = (width * 4 + 31) & ~31;
+    align_buffer_64(row, row_size * 2);
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -3101,11 +3101,11 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
                      width);
 #else
       ARGB4444ToARGBRow(src_argb4444, row, width);
-      ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
+      ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + row_size,
                         width);
-      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+      ARGBToUVRow(row, row_size, dst_u, dst_v, width);
       ARGBToYRow(row, dst_y, width);
-      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+      ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
 #endif
       src_argb4444 += src_stride_argb4444 * 2;
       dst_y += dst_stride_y * 2;
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index 1ebb107a..5500fcb2 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -7,8 +7,10 @@
  *  in the file PATENTS. All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
+
 #include "libyuv/convert_argb.h"
 
+#include "libyuv/convert_from_argb.h"
 #include "libyuv/cpu_id.h"
 #ifdef HAVE_JPEG
 #include "libyuv/mjpeg_decoder.h"
@@ -5497,22 +5499,22 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
 #endif
 
   // alloc 4 lines temp
-  const int kRowSize = (width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 4);
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 4);
   uint8_t* temp_u_1 = row;
-  uint8_t* temp_u_2 = row + kRowSize;
-  uint8_t* temp_v_1 = row + kRowSize * 2;
-  uint8_t* temp_v_2 = row + kRowSize * 3;
+  uint8_t* temp_u_2 = row + row_size;
+  uint8_t* temp_v_1 = row + row_size * 2;
+  uint8_t* temp_v_2 = row + row_size * 3;
 
-  Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
-  Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
   I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
   dst_argb += dst_stride_argb;
   src_y += src_stride_y;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
-    Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
     I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
@@ -5524,8 +5526,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
-    Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
     I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
   }
 
@@ -5622,10 +5624,10 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
 #endif
 
   // alloc 2 lines temp
-  const int kRowSize = (width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 2);
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 2);
   uint8_t* temp_u = row;
-  uint8_t* temp_v = row + kRowSize;
+  uint8_t* temp_v = row + row_size;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp(src_u, temp_u, width);
@@ -5641,6 +5643,188 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
   return 0;
 }
 
+static int I420ToRGB24MatrixBilinear(const uint8_t* src_y,
+                                     int src_stride_y,
+                                     const uint8_t* src_u,
+                                     int src_stride_u,
+                                     const uint8_t* src_v,
+                                     int src_stride_v,
+                                     uint8_t* dst_rgb24,
+                                     int dst_stride_rgb24,
+                                     const struct YuvConstants* yuvconstants,
+                                     int width,
+                                     int height) {
+  int y;
+  void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+                        const uint8_t* v_buf, uint8_t* rgb_buf,
+                        const struct YuvConstants* yuvconstants, int width) =
+      I444ToARGBRow_C;
+  void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
+      ARGBToRGB24Row_C;
+  void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+                      uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
+      ScaleRowUp2_Bilinear_Any_C;
+  if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+    dst_stride_rgb24 = -dst_stride_rgb24;
+  }
+#if defined(HAS_I444TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I444ToARGBRow = I444ToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I444TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I444ToARGBRow = I444ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I444ToARGBRow = I444ToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I444TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I444ToARGBRow = I444ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I444ToARGBRow = I444ToARGBRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I444TOARGBROW_MSA)
+  if (TestCpuFlag(kCpuHasMSA)) {
+    I444ToARGBRow = I444ToARGBRow_Any_MSA;
+    if (IS_ALIGNED(width, 8)) {
+      I444ToARGBRow = I444ToARGBRow_MSA;
+    }
+  }
+#endif
+#if defined(HAS_I444TOARGBROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I444ToARGBRow = I444ToARGBRow_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      I444ToARGBRow = I444ToARGBRow_LASX;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTORGB24ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTORGB24ROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToRGB24Row = ARGBToRGB24Row_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
+  if (TestCpuFlag(kCpuHasAVX512VBMI)) {
+    ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX512VBMI;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToRGB24Row = ARGBToRGB24Row_AVX512VBMI;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTORGB24ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToRGB24Row = ARGBToRGB24Row_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTORGB24ROW_MSA)
+  if (TestCpuFlag(kCpuHasMSA)) {
+    ARGBToRGB24Row = ARGBToRGB24Row_Any_MSA;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToRGB24Row = ARGBToRGB24Row_MSA;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTORGB24ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    ARGBToRGB24Row = ARGBToRGB24Row_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToRGB24Row = ARGBToRGB24Row_LASX;
+    }
+  }
+#endif
+// TODO: Fix HAS macros to match function names
+#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
+  }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
+  }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
+  }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
+  }
+#endif
+
+  // alloc 4 lines temp
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 8);
+  uint8_t* temp_u_1 = row;
+  uint8_t* temp_u_2 = row + row_size;
+  uint8_t* temp_v_1 = row + row_size * 2;
+  uint8_t* temp_v_2 = row + row_size * 3;
+  uint8_t* temp_argb = row + row_size * 4;
+
+  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+  I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width);
+  ARGBToRGB24Row(temp_argb, dst_rgb24, width);
+  dst_rgb24 += dst_stride_rgb24;
+  src_y += src_stride_y;
+
+  for (y = 0; y < height - 2; y += 2) {
+    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
+    I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width);
+    ARGBToRGB24Row(temp_argb, dst_rgb24, width);
+    dst_rgb24 += dst_stride_rgb24;
+    src_y += src_stride_y;
+    I444ToARGBRow(src_y, temp_u_2, temp_v_2, temp_argb, yuvconstants, width);
+    ARGBToRGB24Row(temp_argb, dst_rgb24, width);
+    dst_rgb24 += dst_stride_rgb24;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+
+  if (!(height & 1)) {
+    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+    I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width);
+    ARGBToRGB24Row(temp_argb, dst_rgb24, width);
+  }
+
+  free_aligned_buffer_64(row);
+  return 0;
+}
+
 static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
                                     int src_stride_y,
                                     const uint16_t* src_u,
@@ -5705,22 +5889,22 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
 #endif
 
   // alloc 4 lines temp
-  const int kRowSize = (width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t));
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
   uint16_t* temp_u_1 = (uint16_t*)(row);
-  uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize;
-  uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2;
-  uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3;
+  uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
+  uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
+  uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
 
-  Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
-  Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
   I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
   dst_ar30 += dst_stride_ar30;
   src_y += src_stride_y;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
-    Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
     I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
     dst_ar30 += dst_stride_ar30;
     src_y += src_stride_y;
@@ -5732,8 +5916,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
-    Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
     I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
   }
 
@@ -5803,10 +5987,10 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
 #endif
 
   // alloc 2 lines temp
-  const int kRowSize = (width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_u = (uint16_t*)(row);
-  uint16_t* temp_v = (uint16_t*)(row) + kRowSize;
+  uint16_t* temp_v = (uint16_t*)(row) + row_size;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp(src_u, temp_u, width);
@@ -5885,22 +6069,22 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
 #endif
 
   // alloc 4 lines temp
-  const int kRowSize = (width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t));
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
   uint16_t* temp_u_1 = (uint16_t*)(row);
-  uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize;
-  uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2;
-  uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3;
+  uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
+  uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
+  uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
 
-  Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
-  Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
   I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
   dst_argb += dst_stride_argb;
   src_y += src_stride_y;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
-    Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
     I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
@@ -5912,8 +6096,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
-    Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
     I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
   }
 
@@ -5982,10 +6166,10 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
 #endif
 
   // alloc 2 lines temp
-  const int kRowSize = (width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_u = (uint16_t*)(row);
-  uint16_t* temp_v = (uint16_t*)(row) + kRowSize;
+  uint16_t* temp_v = (uint16_t*)(row) + row_size;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp(src_u, temp_u, width);
@@ -6134,15 +6318,15 @@ static int I420AlphaToARGBMatrixBilinear(
 #endif
 
   // alloc 4 lines temp
-  const int kRowSize = (width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 4);
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 4);
   uint8_t* temp_u_1 = row;
-  uint8_t* temp_u_2 = row + kRowSize;
-  uint8_t* temp_v_1 = row + kRowSize * 2;
-  uint8_t* temp_v_2 = row + kRowSize * 3;
+  uint8_t* temp_u_2 = row + row_size;
+  uint8_t* temp_v_1 = row + row_size * 2;
+  uint8_t* temp_v_2 = row + row_size * 3;
 
-  Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
-  Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
   I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                      width);
   if (attenuate) {
@@ -6153,8 +6337,8 @@ static int I420AlphaToARGBMatrixBilinear(
   src_a += src_stride_a;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
-    Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
     I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                        width);
     if (attenuate) {
@@ -6176,8 +6360,8 @@ static int I420AlphaToARGBMatrixBilinear(
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
-    Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
     I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                        width);
     if (attenuate) {
@@ -6317,10 +6501,10 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
 #endif
 
   // alloc 2 lines temp
-  const int kRowSize = (width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 2);
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 2);
   uint8_t* temp_u = row;
-  uint8_t* temp_v = row + kRowSize;
+  uint8_t* temp_v = row + row_size;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp(src_u, temp_u, width);
@@ -6445,15 +6629,15 @@ static int I010AlphaToARGBMatrixBilinear(
 #endif
 
   // alloc 4 lines temp
-  const int kRowSize = (width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t));
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
   uint16_t* temp_u_1 = (uint16_t*)(row);
-  uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize;
-  uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2;
-  uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3;
+  uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
+  uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
+  uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
 
-  Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
-  Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
   I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                      width);
   if (attenuate) {
@@ -6464,8 +6648,8 @@ static int I010AlphaToARGBMatrixBilinear(
   src_a += src_stride_a;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
-    Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
     I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                        width);
     if (attenuate) {
@@ -6487,8 +6671,8 @@ static int I010AlphaToARGBMatrixBilinear(
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
-    Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
+    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
     I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                        width);
     if (attenuate) {
@@ -6600,10 +6784,10 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
 #endif
 
   // alloc 2 lines temp
-  const int kRowSize = (width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_u = (uint16_t*)(row);
-  uint16_t* temp_v = (uint16_t*)(row) + kRowSize;
+  uint16_t* temp_v = (uint16_t*)(row) + row_size;
 
   for (y = 0; y < height; ++y) {
     ScaleRowUp(src_u, temp_u, width);
@@ -6684,18 +6868,18 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
 #endif
 
   // alloc 2 lines temp
-  const int kRowSize = (2 * width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+  const int row_size = (2 * width + 31) & ~31;
+  align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_uv_1 = (uint16_t*)(row);
-  uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize;
+  uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
 
-  Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+  Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
   P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
   dst_argb += dst_stride_argb;
   src_y += src_stride_y;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width);
+    Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width);
     P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
@@ -6706,7 +6890,7 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+    Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
     P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
   }
 
@@ -6773,8 +6957,8 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
   }
 #endif
 
-  const int kRowSize = (2 * width + 31) & ~31;
-  align_buffer_64(row, kRowSize * sizeof(uint16_t));
+  const int row_size = (2 * width + 31) & ~31;
+  align_buffer_64(row, row_size * sizeof(uint16_t));
   uint16_t* temp_uv = (uint16_t*)(row);
 
   for (y = 0; y < height; ++y) {
@@ -6850,18 +7034,18 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
 #endif
 
   // alloc 2 lines temp
-  const int kRowSize = (2 * width + 31) & ~31;
-  align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+  const int row_size = (2 * width + 31) & ~31;
+  align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
   uint16_t* temp_uv_1 = (uint16_t*)(row);
-  uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize;
+  uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
 
-  Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+  Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
   P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
   dst_ar30 += dst_stride_ar30;
   src_y += src_stride_y;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width);
+    Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width);
     P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
     dst_ar30 += dst_stride_ar30;
     src_y += src_stride_y;
@@ -6872,7 +7056,7 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+    Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
     P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
   }
 
@@ -6939,8 +7123,8 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
   }
 #endif
 
-  const int kRowSize = (2 * width + 31) & ~31;
-  align_buffer_64(row, kRowSize * sizeof(uint16_t));
+  const int row_size = (2 * width + 31) & ~31;
+  align_buffer_64(row, row_size * sizeof(uint16_t));
   uint16_t* temp_uv = (uint16_t*)(row);
 
   for (y = 0; y < height; ++y) {
@@ -7016,6 +7200,37 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y,
 }
 
 LIBYUV_API
+int I420ToRGB24MatrixFilter(const uint8_t* src_y,
+                            int src_stride_y,
+                            const uint8_t* src_u,
+                            int src_stride_u,
+                            const uint8_t* src_v,
+                            int src_stride_v,
+                            uint8_t* dst_rgb24,
+                            int dst_stride_rgb24,
+                            const struct YuvConstants* yuvconstants,
+                            int width,
+                            int height,
+                            enum FilterMode filter) {
+  switch (filter) {
+    case kFilterNone:
+      return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+                               src_stride_v, dst_rgb24, dst_stride_rgb24,
+                               yuvconstants, width, height);
+    case kFilterBilinear:
+    case kFilterBox:
+      return I420ToRGB24MatrixBilinear(
+          src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
+          dst_rgb24, dst_stride_rgb24, yuvconstants, width, height);
+    case kFilterLinear:
+      // TODO: Implement Linear using Bilinear with Scale2RowUp stride 0
+      return -1;
+  }
+
+  return -1;
+}
+
+LIBYUV_API
 int I010ToAR30MatrixFilter(const uint16_t* src_y,
                            int src_stride_y,
                            const uint16_t* src_u,
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index 2f38a488..f7eab0c6 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -1866,7 +1866,7 @@ int ARGBToJ420(const uint8_t* src_argb,
                int height) {
   int y;
   void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
-                      uint8_t* dst_uj, uint8_t* dst_vj, int width) =
+                       uint8_t* dst_uj, uint8_t* dst_vj, int width) =
       ARGBToUVJRow_C;
   void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
       ARGBToYJRow_C;
@@ -2238,7 +2238,7 @@ int ABGRToJ420(const uint8_t* src_abgr,
                int height) {
   int y;
   void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr,
-                      uint8_t* dst_uj, uint8_t* dst_vj, int width) =
+                       uint8_t* dst_uj, uint8_t* dst_vj, int width) =
       ABGRToUVJRow_C;
   void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) =
       ABGRToYJRow_C;
@@ -2804,8 +2804,8 @@ int RAWToJNV21(const uint8_t* src_raw,
     uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31);
 #if !defined(HAS_RAWTOYJROW)
     // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 31) & ~31;
-    align_buffer_64(row, kRowSize * 2);
+    const int row_size = (width * 4 + 31) & ~31;
+    align_buffer_64(row, row_size * 2);
 #endif
 
     for (y = 0; y < height - 1; y += 2) {
@@ -2816,11 +2816,11 @@ int RAWToJNV21(const uint8_t* src_raw,
       RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
 #else
       RAWToARGBRow(src_raw, row, width);
-      RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
-      ARGBToUVJRow(row, kRowSize, row_uj, row_vj, width);
+      RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
+      ARGBToUVJRow(row, row_size, row_uj, row_vj, width);
       MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
       ARGBToYJRow(row, dst_y, width);
-      ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
+      ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
 #endif
       src_raw += src_stride_raw * 2;
       dst_y += dst_stride_y * 2;
diff --git a/source/cpu_id.cc b/source/cpu_id.cc
index 56fe60e4..7467c5d9 100644
--- a/source/cpu_id.cc
+++ b/source/cpu_id.cc
@@ -115,7 +115,7 @@ void CpuId(int eax, int ecx, int* cpu_info) {
      defined(__x86_64__)) &&                                     \
     !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
-int GetXCR0() {
+static int GetXCR0() {
   int xcr0 = 0;
 #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
   xcr0 = (int)_xgetbv(0);  // VS2010 SP1 required.  NOLINT
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 1de71dbb..45c34d30 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -1035,20 +1035,20 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
 
 LIBYUV_API
 void DetileToYUY2(const uint8_t* src_y,
-                      int src_stride_y,
-                      const uint8_t* src_uv,
-                      int src_stride_uv,
-                      uint8_t* dst_yuy2,
-                      int dst_stride_yuy2,
-                      int width,
-                      int height,
-                      int tile_height) {
+                  int src_stride_y,
+                  const uint8_t* src_uv,
+                  int src_stride_uv,
+                  uint8_t* dst_yuy2,
+                  int dst_stride_yuy2,
+                  int width,
+                  int height,
+                  int tile_height) {
   const ptrdiff_t src_y_tile_stride = 16 * tile_height;
   const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2;
   int y;
   void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride,
-                         const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride,
-                         uint8_t* dst_yuy2, int width) = DetileToYUY2_C;
+                       const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride,
+                       uint8_t* dst_yuy2, int width) = DetileToYUY2_C;
   assert(src_stride_y >= 0);
   assert(src_stride_y > 0);
   assert(src_stride_uv >= 0);
@@ -1085,8 +1085,8 @@ void DetileToYUY2(const uint8_t* src_y,
 
   // Detile plane
   for (y = 0; y < height; ++y) {
-    DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride,
-                   dst_yuy2, width);
+    DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2,
+                 width);
     dst_yuy2 += dst_stride_yuy2;
     src_y += 16;
 
@@ -1224,18 +1224,18 @@ void MergeRGBPlane(const uint8_t* src_r,
 }
 
 LIBYUV_NOINLINE
-void SplitARGBPlaneAlpha(const uint8_t* src_argb,
-                         int src_stride_argb,
-                         uint8_t* dst_r,
-                         int dst_stride_r,
-                         uint8_t* dst_g,
-                         int dst_stride_g,
-                         uint8_t* dst_b,
-                         int dst_stride_b,
-                         uint8_t* dst_a,
-                         int dst_stride_a,
-                         int width,
-                         int height) {
+static void SplitARGBPlaneAlpha(const uint8_t* src_argb,
+                                int src_stride_argb,
+                                uint8_t* dst_r,
+                                int dst_stride_r,
+                                uint8_t* dst_g,
+                                int dst_stride_g,
+                                uint8_t* dst_b,
+                                int dst_stride_b,
+                                uint8_t* dst_a,
+                                int dst_stride_a,
+                                int width,
+                                int height) {
   int y;
   void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
                        uint8_t* dst_b, uint8_t* dst_a, int width) =
@@ -1295,16 +1295,16 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb,
 }
 
 LIBYUV_NOINLINE
-void SplitARGBPlaneOpaque(const uint8_t* src_argb,
-                          int src_stride_argb,
-                          uint8_t* dst_r,
-                          int dst_stride_r,
-                          uint8_t* dst_g,
-                          int dst_stride_g,
-                          uint8_t* dst_b,
-                          int dst_stride_b,
-                          int width,
-                          int height) {
+static void SplitARGBPlaneOpaque(const uint8_t* src_argb,
+                                 int src_stride_argb,
+                                 uint8_t* dst_r,
+                                 int dst_stride_r,
+                                 uint8_t* dst_g,
+                                 int dst_stride_g,
+                                 uint8_t* dst_b,
+                                 int dst_stride_b,
+                                 int width,
+                                 int height) {
   int y;
   void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
                        uint8_t* dst_b, int width) = SplitXRGBRow_C;
@@ -1396,18 +1396,18 @@ void SplitARGBPlane(const uint8_t* src_argb,
 }
 
 LIBYUV_NOINLINE
-void MergeARGBPlaneAlpha(const uint8_t* src_r,
-                         int src_stride_r,
-                         const uint8_t* src_g,
-                         int src_stride_g,
-                         const uint8_t* src_b,
-                         int src_stride_b,
-                         const uint8_t* src_a,
-                         int src_stride_a,
-                         uint8_t* dst_argb,
-                         int dst_stride_argb,
-                         int width,
-                         int height) {
+static void MergeARGBPlaneAlpha(const uint8_t* src_r,
+                                int src_stride_r,
+                                const uint8_t* src_g,
+                                int src_stride_g,
+                                const uint8_t* src_b,
+                                int src_stride_b,
+                                const uint8_t* src_a,
+                                int src_stride_a,
+                                uint8_t* dst_argb,
+                                int dst_stride_argb,
+                                int width,
+                                int height) {
   int y;
   void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
                        const uint8_t* src_b, const uint8_t* src_a,
@@ -1458,16 +1458,16 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r,
 }
 
 LIBYUV_NOINLINE
-void MergeARGBPlaneOpaque(const uint8_t* src_r,
-                          int src_stride_r,
-                          const uint8_t* src_g,
-                          int src_stride_g,
-                          const uint8_t* src_b,
-                          int src_stride_b,
-                          uint8_t* dst_argb,
-                          int dst_stride_argb,
-                          int width,
-                          int height) {
+static void MergeARGBPlaneOpaque(const uint8_t* src_r,
+                                 int src_stride_r,
+                                 const uint8_t* src_g,
+                                 int src_stride_g,
+                                 const uint8_t* src_b,
+                                 int src_stride_b,
+                                 uint8_t* dst_argb,
+                                 int dst_stride_argb,
+                                 int width,
+                                 int height) {
   int y;
   void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
                        const uint8_t* src_b, uint8_t* dst_argb, int width) =
@@ -4545,16 +4545,16 @@ static int ARGBSobelize(const uint8_t* src_argb,
 #endif
   {
     // 3 rows with edges before/after.
-    const int kRowSize = (width + kEdge + 31) & ~31;
-    align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
+    const int row_size = (width + kEdge + 31) & ~31;
+    align_buffer_64(rows, row_size * 2 + (kEdge + row_size * 3 + kEdge));
     uint8_t* row_sobelx = rows;
-    uint8_t* row_sobely = rows + kRowSize;
-    uint8_t* row_y = rows + kRowSize * 2;
+    uint8_t* row_sobely = rows + row_size;
+    uint8_t* row_y = rows + row_size * 2;
 
     // Convert first row.
     uint8_t* row_y0 = row_y + kEdge;
-    uint8_t* row_y1 = row_y0 + kRowSize;
-    uint8_t* row_y2 = row_y1 + kRowSize;
+    uint8_t* row_y1 = row_y0 + row_size;
+    uint8_t* row_y2 = row_y1 + row_size;
     ARGBToYJRow(src_argb, row_y0, width);
     row_y0[-1] = row_y0[0];
     memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc
index 539cf98d..28226210 100644
--- a/source/rotate_argb.cc
+++ b/source/rotate_argb.cc
@@ -8,11 +8,12 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "libyuv/rotate.h"
+#include "libyuv/rotate_argb.h"
 
 #include "libyuv/convert.h"
 #include "libyuv/cpu_id.h"
 #include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
 #include "libyuv/row.h"
 #include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */
 
diff --git a/source/row_common.cc b/source/row_common.cc
index 9d94ab28..4e1141f7 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -2749,11 +2749,11 @@ void DetileSplitUVRow_C(const uint8_t* src_uv,
 }
 
 void DetileToYUY2_C(const uint8_t* src_y,
-                      ptrdiff_t src_y_tile_stride,
-                      const uint8_t* src_uv,
-                      ptrdiff_t src_uv_tile_stride,
-                      uint8_t* dst_yuy2,
-                      int width) {
+                    ptrdiff_t src_y_tile_stride,
+                    const uint8_t* src_uv,
+                    ptrdiff_t src_uv_tile_stride,
+                    uint8_t* dst_yuy2,
+                    int width) {
   for (int x = 0; x < width - 15; x += 16) {
     for (int i = 0; i < 8; i++) {
       dst_yuy2[0] = src_y[0];
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index 8d0f477c..3bda4482 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -4977,19 +4977,19 @@ void DetileToYUY2_SSE2(const uint8_t* src_y,
                        uint8_t* dst_yuy2,
                        int width) {
   asm volatile(
-      "1:                                       \n"
-      "movdqu     (%0),%%xmm0                   \n" // Load 16 Y
-      "sub        $0x10,%3                      \n"
-      "lea        (%0,%4),%0                    \n"
-      "movdqu     (%1),%%xmm1                   \n" // Load 8 UV
-      "lea        (%1,%5),%1                    \n"
-      "movdqu     %%xmm0,%%xmm2                 \n"
-      "punpcklbw  %%xmm1,%%xmm0                 \n"
-      "punpckhbw  %%xmm1,%%xmm2                 \n"
-      "movdqu     %%xmm0,(%2)                   \n"
-      "movdqu     %%xmm2,0x10(%2)               \n"
-      "lea        0x20(%2),%2                   \n"
-      "jg         1b                            \n"
+      "1:                                        \n"
+      "movdqu      (%0),%%xmm0                   \n"  // Load 16 Y
+      "sub         $0x10,%3                      \n"
+      "lea         (%0,%4),%0                    \n"
+      "movdqu      (%1),%%xmm1                   \n"  // Load 8 UV
+      "lea         (%1,%5),%1                    \n"
+      "movdqu      %%xmm0,%%xmm2                 \n"
+      "punpcklbw   %%xmm1,%%xmm0                 \n"
+      "punpckhbw   %%xmm1,%%xmm2                 \n"
+      "movdqu      %%xmm0,(%2)                   \n"
+      "movdqu      %%xmm2,0x10(%2)               \n"
+      "lea         0x20(%2),%2                   \n"
+      "jg          1b                            \n"
       : "+r"(src_y),                            // %0
         "+r"(src_uv),                           // %1
         "+r"(dst_yuy2),                         // %2
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 82039e9f..3f5c5de1 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -625,20 +625,20 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
 #if LIBYUV_USE_ST2
 // Read 16 Y, 8 UV, and write 8 YUYV.
 void DetileToYUY2_NEON(const uint8_t* src_y,
-                         ptrdiff_t src_y_tile_stride,
-                         const uint8_t* src_uv,
-                         ptrdiff_t src_uv_tile_stride,
-                         uint8_t* dst_yuy2,
-                         int width) {
+                       ptrdiff_t src_y_tile_stride,
+                       const uint8_t* src_uv,
+                       ptrdiff_t src_uv_tile_stride,
+                       uint8_t* dst_yuy2,
+                       int width) {
   asm volatile(
-      "1:                                       \n"
-      "vld1.8     q0, [%0], %4                  \n" // Load 16 Y
-      "pld        [%0, 1792]                    \n"
-      "vld1.8     q1, [%1], %5                  \n" // Load 8 UV
-      "pld        [%1, 1792]                    \n"
-      "subs       %3, %3, #16                   \n"
-      "vst2.8     {q0, q1}, [%2]!               \n"
-      "bgt        1b                            \n"
+      "1:                                        \n"
+      "vld1.8      q0, [%0], %4                  \n"  // Load 16 Y
+      "pld         [%0, 1792]                    \n"
+      "vld1.8      q1, [%1], %5                  \n"  // Load 8 UV
+      "pld         [%1, 1792]                    \n"
+      "subs        %3, %3, #16                   \n"
+      "vst2.8      {q0, q1}, [%2]!               \n"
+      "bgt         1b                            \n"
       : "+r"(src_y),                            // %0
         "+r"(src_uv),                           // %1
         "+r"(dst_yuy2),                         // %2
@@ -651,21 +651,21 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
 #else
 // Read 16 Y, 8 UV, and write 8 YUYV.
 void DetileToYUY2_NEON(const uint8_t* src_y,
-                         ptrdiff_t src_y_tile_stride,
-                         const uint8_t* src_uv,
-                         ptrdiff_t src_uv_tile_stride,
-                         uint8_t* dst_yuy2,
-                         int width) {
+                       ptrdiff_t src_y_tile_stride,
+                       const uint8_t* src_uv,
+                       ptrdiff_t src_uv_tile_stride,
+                       uint8_t* dst_yuy2,
+                       int width) {
   asm volatile(
-      "1:                                       \n"
-      "vld1.8     q0, [%0], %4                  \n" // Load 16 Y
-      "vld1.8     q1, [%1], %5                  \n" // Load 8 UV
-      "subs       %3, %3, #16                   \n"
-      "pld        [%0, 1792]                    \n"
-      "vzip.8     q0, q1                        \n"
-      "pld        [%1, 1792]                    \n"
-      "vst1.8     {q0, q1}, [%2]!               \n"
-      "bgt        1b                            \n"
+      "1:                                        \n"
+      "vld1.8      q0, [%0], %4                  \n"  // Load 16 Y
+      "vld1.8      q1, [%1], %5                  \n"  // Load 8 UV
+      "subs        %3, %3, #16                   \n"
+      "pld         [%0, 1792]                    \n"
+      "vzip.8      q0, q1                        \n"
+      "pld         [%1, 1792]                    \n"
+      "vst1.8      {q0, q1}, [%2]!               \n"
+      "bgt         1b                            \n"
       : "+r"(src_y),                            // %0
         "+r"(src_uv),                           // %1
         "+r"(dst_yuy2),                         // %2
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index e166ce04..37962378 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -653,11 +653,11 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
 #if LIBYUV_USE_ST2
 // Read 16 Y, 8 UV, and write 8 YUY2
 void DetileToYUY2_NEON(const uint8_t* src_y,
-                         ptrdiff_t src_y_tile_stride,
-                         const uint8_t* src_uv,
-                         ptrdiff_t src_uv_tile_stride,
-                         uint8_t* dst_yuy2,
-                         int width) {
+                       ptrdiff_t src_y_tile_stride,
+                       const uint8_t* src_uv,
+                       ptrdiff_t src_uv_tile_stride,
+                       uint8_t* dst_yuy2,
+                       int width) {
   asm volatile(
       "1:                                        \n"
       "ld1         {v0.16b}, [%0], %4            \n"  // load 16 Ys
@@ -667,23 +667,23 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
       "subs        %w3, %w3, #16                 \n"  // store 8 YUY2
       "st2         {v0.16b,v1.16b}, [%2], #32    \n"
       "b.gt        1b                            \n"
-      : "+r"(src_y),            // %0
-        "+r"(src_uv),           // %1
-        "+r"(dst_yuy2),         // %2
-        "+r"(width)             // %3
-      : "r"(src_y_tile_stride), // %4
-        "r"(src_uv_tile_stride) // %5
+      : "+r"(src_y),                // %0
+        "+r"(src_uv),               // %1
+        "+r"(dst_yuy2),             // %2
+        "+r"(width)                 // %3
+      : "r"(src_y_tile_stride),     // %4
+        "r"(src_uv_tile_stride)     // %5
       : "cc", "memory", "v0", "v1"  // Clobber list
   );
 }
 #else
 // Read 16 Y, 8 UV, and write 8 YUY2
 void DetileToYUY2_NEON(const uint8_t* src_y,
-                         ptrdiff_t src_y_tile_stride,
-                         const uint8_t* src_uv,
-                         ptrdiff_t src_uv_tile_stride,
-                         uint8_t* dst_yuy2,
-                         int width) {
+                       ptrdiff_t src_y_tile_stride,
+                       const uint8_t* src_uv,
+                       ptrdiff_t src_uv_tile_stride,
+                       uint8_t* dst_yuy2,
+                       int width) {
   asm volatile(
       "1:                                        \n"
       "ld1         {v0.16b}, [%0], %4            \n"  // load 16 Ys
@@ -694,13 +694,13 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
       "prfm        pldl1keep, [%1, 1792]         \n"
       "zip2        v3.16b, v0.16b, v1.16b        \n"
       "st1         {v2.16b,v3.16b}, [%2], #32    \n"  // store 8 YUY2
-      "b.gt     1b                               \n"
-      : "+r"(src_y),             // %0
-        "+r"(src_uv),            // %1
-        "+r"(dst_yuy2),          // %2
-        "+r"(width)              // %3
-      : "r"(src_y_tile_stride),  // %4
-        "r"(src_uv_tile_stride)  // %5
+      "b.gt        1b                            \n"
+      : "+r"(src_y),                            // %0
+        "+r"(src_uv),                           // %1
+        "+r"(dst_yuy2),                         // %2
+        "+r"(width)                             // %3
+      : "r"(src_y_tile_stride),                 // %4
+        "r"(src_uv_tile_stride)                 // %5
       : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber list
   );
 }
diff --git a/source/scale.cc b/source/scale.cc
index e1335f1e..16854c45 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -1315,11 +1315,11 @@ void ScalePlaneBilinearUp(int src_width,
     const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
 
     // Allocate 2 row buffers.
-    const int kRowSize = (dst_width + 31) & ~31;
-    align_buffer_64(row, kRowSize * 2);
+    const int row_size = (dst_width + 31) & ~31;
+    align_buffer_64(row, row_size * 2);
 
     uint8_t* rowptr = row;
-    int rowstride = kRowSize;
+    int rowstride = row_size;
     int lasty = yi;
 
     ScaleFilterCols(rowptr, src, dst_width, x, dx);
@@ -1766,11 +1766,11 @@ void ScalePlaneBilinearUp_16(int src_width,
     const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
 
     // Allocate 2 row buffers.
-    const int kRowSize = (dst_width + 31) & ~31;
-    align_buffer_64(row, kRowSize * 4);
+    const int row_size = (dst_width + 31) & ~31;
+    align_buffer_64(row, row_size * 4);
 
     uint16_t* rowptr = (uint16_t*)row;
-    int rowstride = kRowSize;
+    int rowstride = row_size;
     int lasty = yi;
 
     ScaleFilterCols(rowptr, src, dst_width, x, dx);
diff --git a/source/scale_argb.cc b/source/scale_argb.cc
index 9c3acf7f..07840d65 100644
--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -155,8 +155,8 @@ static void ScaleARGBDown4Box(int src_width,
                               int dy) {
   int j;
   // Allocate 2 rows of ARGB.
-  const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
-  align_buffer_64(row, kRowSize * 2);
+  const int row_size = (dst_width * 2 * 4 + 31) & ~31;
+  align_buffer_64(row, row_size * 2);
   int row_stride = src_stride * (dy >> 16);
   void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
                             uint8_t* dst_argb, int dst_width) =
@@ -187,9 +187,9 @@ static void ScaleARGBDown4Box(int src_width,
 
   for (j = 0; j < dst_height; ++j) {
     ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
-    ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
+    ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + row_size,
                       dst_width * 2);
-    ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
+    ScaleARGBRowDown2(row, row_size, dst_argb, dst_width);
     src_argb += row_stride;
     dst_argb += dst_stride;
   }
@@ -548,11 +548,11 @@ static void ScaleARGBBilinearUp(int src_width,
     const uint8_t* src = src_argb + yi * (int64_t)src_stride;
 
     // Allocate 2 rows of ARGB.
-    const int kRowSize = (dst_width * 4 + 31) & ~31;
-    align_buffer_64(row, kRowSize * 2);
+    const int row_size = (dst_width * 4 + 31) & ~31;
+    align_buffer_64(row, row_size * 2);
 
     uint8_t* rowptr = row;
-    int rowstride = kRowSize;
+    int rowstride = row_size;
     int lasty = yi;
 
     ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@@ -798,14 +798,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
   const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
 
   // Allocate 2 rows of ARGB.
-  const int kRowSize = (dst_width * 4 + 31) & ~31;
-  align_buffer_64(row, kRowSize * 2);
+  const int row_size = (dst_width * 4 + 31) & ~31;
+  align_buffer_64(row, row_size * 2);
 
   // Allocate 1 row of ARGB for source conversion.
   align_buffer_64(argb_row, src_width * 4);
 
   uint8_t* rowptr = row;
-  int rowstride = kRowSize;
+  int rowstride = row_size;
   int lasty = yi;
 
   // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
diff --git a/source/scale_uv.cc b/source/scale_uv.cc
index 3b3d7b8e..8bd6b586 100644
--- a/source/scale_uv.cc
+++ b/source/scale_uv.cc
@@ -193,8 +193,8 @@ static void ScaleUVDown4Box(int src_width,
                             int dy) {
   int j;
   // Allocate 2 rows of UV.
-  const int kRowSize = (dst_width * 2 * 2 + 15) & ~15;
-  align_buffer_64(row, kRowSize * 2);
+  const int row_size = (dst_width * 2 * 2 + 15) & ~15;
+  align_buffer_64(row, row_size * 2);
   int row_stride = src_stride * (dy >> 16);
   void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
                           uint8_t* dst_uv, int dst_width) =
@@ -234,9 +234,9 @@ static void ScaleUVDown4Box(int src_width,
 
   for (j = 0; j < dst_height; ++j) {
     ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
-    ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + kRowSize,
+    ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + row_size,
                     dst_width * 2);
-    ScaleUVRowDown2(row, kRowSize, dst_uv, dst_width);
+    ScaleUVRowDown2(row, row_size, dst_uv, dst_width);
     src_uv += row_stride;
     dst_uv += dst_stride;
   }
@@ -574,11 +574,11 @@ static void ScaleUVBilinearUp(int src_width,
     const uint8_t* src = src_uv + yi * (int64_t)src_stride;
 
     // Allocate 2 rows of UV.
-    const int kRowSize = (dst_width * 2 + 15) & ~15;
-    align_buffer_64(row, kRowSize * 2);
+    const int row_size = (dst_width * 2 + 15) & ~15;
+    align_buffer_64(row, row_size * 2);
 
     uint8_t* rowptr = row;
-    int rowstride = kRowSize;
+    int rowstride = row_size;
     int lasty = yi;
 
     ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index ea4a99ac..bb06e4ed 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -680,6 +680,9 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
 #define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j)                     \
   I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
                          kFilterBilinear)
+#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j)                     \
+  I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+                          kFilterBilinear)
 
 #define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
 
@@ -816,6 +819,7 @@ TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1)
 #endif
 TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
 #else
 TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
 TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
@@ -832,13 +836,13 @@ TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
 TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
 TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
 TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
-TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
 TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
+TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
 TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
@@ -1412,7 +1416,7 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
                  EPP_B, STRIDE_B, HEIGHT_B)
 #else
 #define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,   \
-                 EPP_B, STRIDE_B, HEIGHT_B, INPLACE)                        \
+                 EPP_B, STRIDE_B, HEIGHT_B)                                 \
   TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
             STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0)
 #endif
diff --git a/util/yuvconvert.cc b/util/yuvconvert.cc
index 332699e3..93b52668 100644
--- a/util/yuvconvert.cc
+++ b/util/yuvconvert.cc
@@ -42,9 +42,9 @@ static __inline uint32_t Abs(int32_t v) {
 }
 
 // Parse PYUV format. ie name.1920x800_24Hz_P420.yuv
-bool ExtractResolutionFromFilename(const char* name,
-                                   int* width_ptr,
-                                   int* height_ptr) {
+static bool ExtractResolutionFromFilename(const char* name,
+                                          int* width_ptr,
+                                          int* height_ptr) {
   // Isolate the .width_height. section of the filename by searching for a
   // dot or underscore followed by a digit.
   for (int i = 0; name[i]; ++i) {
@@ -59,7 +59,7 @@ bool ExtractResolutionFromFilename(const char* name,
   return false;
 }
 
-void PrintHelp(const char* program) {
+static void PrintHelp(const char* program) {
   printf("%s [-options] src_argb.raw dst_yuv.raw\n", program);
   printf(
       " -s <width> <height> .... specify source resolution.  "
@@ -78,7 +78,7 @@ void PrintHelp(const char* program) {
   exit(0);
 }
 
-void ParseOptions(int argc, const char* argv[]) {
+static void ParseOptions(int argc, const char* argv[]) {
   if (argc <= 1) {
     PrintHelp(argv[0]);
   }
author	Frank Barchard <fbarchard@google.com>	2022-09-16 11:12:39 -0700
committer	Frank Barchard <fbarchard@chromium.org>	2022-09-16 19:46:47 +0000
commit	f71c83552d373f0ff41833b17e2880632d8561d7 (patch)
tree	09088188086a6b03d07a5ebaa8edf01658466ad8
parent	3e38ce50589d9319badc0501f96d6c5b2b177472 (diff)
download	libyuv-f71c83552d373f0ff41833b17e2880632d8561d7.tar.gz