Casting for scale functions

- MT2T support for source strides added, but only works for positive values. - Reduced casting in row_common - one cast per assignment. - scaling functions use intptr_t for intermediate calculations, then cast strides to ptrdiff_t Bug: libyuv:948, b/257266635, b/262468594 Change-Id: I0409a0ce916b777da2a01c0ab0b56dccefed3b33 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4102203 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Wan-Teh Chang <wtc@google.com> Reviewed-by: Justin Green <greenjustin@google.com> Reviewed-by: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Ernest Hua <ernesthua@google.com>
author: Frank Barchard <fbarchard@google.com> 2022-12-15 14:11:52 -0800
committer: libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> 2022-12-15 22:34:22 +0000
commit: 3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e (patch)
tree: 3fc173f8288f4457858919c806bac14593dc3d1b
parent: 610e0cdead3fcd3288693d18eab8c7323805ad9e (diff)
download: libyuv-3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e.tar.gz
12 files changed, 702 insertions, 623 deletions
diff --git a/README.chromium b/README.chromium
index ccd2ca06..a33b46b9 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1852
+Version: 1854
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h
index 5c5231bb..ae9b954f 100644
--- a/include/libyuv/convert.h
+++ b/include/libyuv/convert.h
@@ -163,10 +163,13 @@ int MM21ToYUY2(const uint8_t* src_y,
                int height);
 
 // Convert MT2T to P010
+// Note that src_y and src_uv point to packed 10-bit values, so the Y plane will
+// be 10 / 8 times the dimensions of the image. Also for this reason,
+// src_stride_y and src_stride_uv are given in bytes.
 LIBYUV_API
-int MT2TToP010(const uint16_t* src_y,
+int MT2TToP010(const uint8_t* src_y,
                int src_stride_y,
-               const uint16_t* src_uv,
+               const uint8_t* src_uv,
                int src_stride_uv,
                uint16_t* dst_y,
                int dst_stride_y,
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 27f468ad..45b172b2 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -2123,8 +2123,8 @@ void DetileToYUY2_Any_NEON(const uint8_t* src_y,
                            ptrdiff_t src_uv_tile_stride,
                            uint8_t* dst_yuy2,
                            int width);
-void UnpackMT2T_C(const uint16_t* src, uint16_t* dst, size_t size);
-void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size);
+void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size);
+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size);
 void MergeUVRow_C(const uint8_t* src_u,
                   const uint8_t* src_v,
                   uint8_t* dst_uv,
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 6254d978..c72f7461 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 1852
+#define LIBYUV_VERSION 1854
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_
diff --git a/source/convert.cc b/source/convert.cc
index 65d4ba16..b62e513a 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -735,12 +735,10 @@ int MM21ToYUY2(const uint8_t* src_y,
 // Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format
 // documentation.
 // TODO(greenjustin): Add an MT2T to I420 conversion.
-// TODO(greenjustin): Investigate if there are valid stride parameters other
-// than width.
 LIBYUV_API
-int MT2TToP010(const uint16_t* src_y,
+int MT2TToP010(const uint8_t* src_y,
                int src_stride_y,
-               const uint16_t* src_uv,
+               const uint8_t* src_uv,
                int src_stride_uv,
                uint16_t* dst_y,
                int dst_stride_y,
@@ -748,48 +746,75 @@ int MT2TToP010(const uint16_t* src_y,
                int dst_stride_uv,
                int width,
                int height) {
-  if (width <= 0 || height <= 0 || !src_y || !src_uv || !dst_y || !dst_uv) {
+  if (width <= 0 || !height || !src_uv || !dst_uv) {
     return -1;
   }
 
-  // TODO(greenjustin): Investigate if we can allow arbitrary sizes. This may
-  // not be semantically meaningful in this format, but we do not have samples
-  // of unaligned data to conclude that yet. This format is 16x32 tiled, so we
-  // must pad the width and height to reflect that.
-  int aligned_width = (width + 15) & ~15;
-  int aligned_height = (height + 31) & ~31;
-
   {
-    size_t y_size = aligned_width * aligned_height * 10 / 8;
-    size_t uv_size = aligned_width * ((aligned_height + 1) / 2) * 10 / 8;
-    size_t tmp_y_size = aligned_width * aligned_height * sizeof(uint16_t);
-    size_t tmp_uv_size =
-        aligned_width * ((aligned_height + 1) / 2) * sizeof(uint16_t);
-    void (*UnpackMT2T)(const uint16_t* src, uint16_t* dst, size_t size) =
+    int u_width = (width + 1) / 2;
+    int uv_width = 2 * u_width;
+    int y = 0;
+    int uv_height = uv_height = (height + 1) / 2;
+    const int tile_width = 16;
+    const int y_tile_height = 32;
+    const int uv_tile_height = 16;
+    int padded_width = (width + tile_width - 1) & ~(tile_width - 1);
+    int y_tile_row_size = padded_width * y_tile_height * 10 / 8;
+    int uv_tile_row_size = padded_width * uv_tile_height * 10 / 8;
+    size_t row_buf_size = padded_width * y_tile_height * sizeof(uint16_t);
+    void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) =
         UnpackMT2T_C;
-    align_buffer_64(tmp_y, tmp_y_size);
-    align_buffer_64(tmp_uv, tmp_uv_size);
+    align_buffer_64(row_buf, row_buf_size);
 
 #if defined(HAS_UNPACKMT2T_NEON)
     if (TestCpuFlag(kCpuHasNEON)) {
       UnpackMT2T = UnpackMT2T_NEON;
     }
 #endif
+    // Negative height means invert the image.
+    if (height < 0) {
+      height = -height;
+      uv_height = (height + 1) / 2;
+      if (dst_y) {
+        dst_y = dst_y + (height - 1) * dst_stride_y;
+        dst_stride_y = -dst_stride_y;
+      }
+      dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv;
+      dst_stride_uv = -dst_stride_uv;
+    }
+
+    // Unpack and detile Y in rows of tiles
+    if (src_y && dst_y) {
+      for (y = 0; y < (height & ~(y_tile_height - 1)); y += y_tile_height) {
+        UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
+        DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
+                       width, y_tile_height, y_tile_height);
+        src_y += src_stride_y * y_tile_height;
+        dst_y += dst_stride_y * y_tile_height;
+      }
+      if (height & (y_tile_height - 1)) {
+        UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
+        DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
+                       width, height & (y_tile_height - 1), y_tile_height);
+      }
+    }
 
-    // TODO(greenjustin): Unpack and detile in rows rather than planes to keep
-    // the caches hot.
-    UnpackMT2T(src_y, (uint16_t*)tmp_y, y_size);
-    UnpackMT2T(src_uv, (uint16_t*)tmp_uv, uv_size);
-
-    DetilePlane_16((uint16_t*)tmp_y, src_stride_y, dst_y, dst_stride_y, width,
-                   height, 32);
-    DetilePlane_16((uint16_t*)tmp_uv, src_stride_uv, dst_uv, dst_stride_uv,
-                   width, (height + 1) / 2, 16);
-
-    free_aligned_buffer_64(tmp_y);
-    free_aligned_buffer_64(tmp_uv);
+    // Unpack and detile UV plane
+    for (y = 0; y < (uv_height & ~(uv_tile_height - 1)); y += uv_tile_height) {
+      UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
+      DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
+                     uv_width, uv_tile_height, uv_tile_height);
+      src_uv += src_stride_uv * uv_tile_height;
+      dst_uv += dst_stride_uv * uv_tile_height;
+    }
+    if (uv_height & (uv_tile_height - 1)) {
+      UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
+      DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
+                     uv_width, uv_height & (uv_tile_height - 1),
+                     uv_tile_height);
+    }
+    free_aligned_buffer_64(row_buf);
   }
-
   return 0;
 }
 
diff --git a/source/row_common.cc b/source/row_common.cc
index 7d084e76..3d1e705e 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -21,6 +21,12 @@ namespace libyuv {
 extern "C" {
 #endif
 
+#ifdef __cplusplus
+#define STATIC_CAST(type, expr) static_cast<type>(expr)
+#else
+#define STATIC_CAST(type, expr) (type)(expr)
+#endif
+
 // This macro controls YUV to RGB using unsigned math to extend range of
 // YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
 // LIBYUV_UNLIMITED_DATA
@@ -182,12 +188,13 @@ void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
                        int width) {
   int x;
   for (x = 0; x < width; ++x) {
-    uint8_t b = src_rgb565[0] & 0x1f;
-    uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
-    uint8_t r = src_rgb565[1] >> 3;
-    dst_argb[0] = (b << 3) | (b >> 2);
-    dst_argb[1] = (g << 2) | (g >> 4);
-    dst_argb[2] = (r << 3) | (r >> 2);
+    uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
+    uint8_t g = STATIC_CAST(
+        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
+    uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
+    dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+    dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
+    dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
     dst_argb[3] = 255u;
     dst_argb += 4;
     src_rgb565 += 2;
@@ -199,13 +206,14 @@ void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
                          int width) {
   int x;
   for (x = 0; x < width; ++x) {
-    uint8_t b = src_argb1555[0] & 0x1f;
-    uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
-    uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
-    uint8_t a = src_argb1555[1] >> 7;
-    dst_argb[0] = (b << 3) | (b >> 2);
-    dst_argb[1] = (g << 3) | (g >> 2);
-    dst_argb[2] = (r << 3) | (r >> 2);
+    uint8_t b = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
+    uint8_t g = STATIC_CAST(
+        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
+    uint8_t r = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
+    uint8_t a = STATIC_CAST(uint8_t, src_argb1555[1] >> 7);
+    dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+    dst_argb[1] = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
+    dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
     dst_argb[3] = -a;
     dst_argb += 4;
     src_argb1555 += 2;
@@ -217,14 +225,14 @@ void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
                          int width) {
   int x;
   for (x = 0; x < width; ++x) {
-    uint8_t b = src_argb4444[0] & 0x0f;
-    uint8_t g = src_argb4444[0] >> 4;
-    uint8_t r = src_argb4444[1] & 0x0f;
-    uint8_t a = src_argb4444[1] >> 4;
-    dst_argb[0] = (b << 4) | b;
-    dst_argb[1] = (g << 4) | g;
-    dst_argb[2] = (r << 4) | r;
-    dst_argb[3] = (a << 4) | a;
+    uint8_t b = STATIC_CAST(uint8_t, src_argb4444[0] & 0x0f);
+    uint8_t g = STATIC_CAST(uint8_t, src_argb4444[0] >> 4);
+    uint8_t r = STATIC_CAST(uint8_t, src_argb4444[1] & 0x0f);
+    uint8_t a = STATIC_CAST(uint8_t, src_argb4444[1] >> 4);
+    dst_argb[0] = STATIC_CAST(uint8_t, (b << 4) | b);
+    dst_argb[1] = STATIC_CAST(uint8_t, (g << 4) | g);
+    dst_argb[2] = STATIC_CAST(uint8_t, (r << 4) | r);
+    dst_argb[3] = STATIC_CAST(uint8_t, (a << 4) | a);
     dst_argb += 4;
     src_argb4444 += 2;
   }
@@ -320,7 +328,7 @@ void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
     uint8_t b0 = src_argb[0] >> 3;
     uint8_t g0 = src_argb[1] >> 2;
     uint8_t r0 = src_argb[2] >> 3;
-    *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+    *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
   }
 }
 
@@ -340,23 +348,25 @@ void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
   for (x = 0; x < width - 1; x += 2) {
     int dither0 = ((const unsigned char*)(&dither4))[x & 3];
     int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
-    uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
-    uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
-    uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
-    uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
-    uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
-    uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
-    *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 11);
-    *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 11);
+    uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
+    uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
+    uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
+    uint8_t b1 = STATIC_CAST(uint8_t, clamp255(src_argb[4] + dither1) >> 3);
+    uint8_t g1 = STATIC_CAST(uint8_t, clamp255(src_argb[5] + dither1) >> 2);
+    uint8_t r1 = STATIC_CAST(uint8_t, clamp255(src_argb[6] + dither1) >> 3);
+    *(uint16_t*)(dst_rgb + 0) =
+        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
+    *(uint16_t*)(dst_rgb + 2) =
+        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
     dst_rgb += 4;
     src_argb += 8;
   }
   if (width & 1) {
     int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
-    uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
-    uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
-    uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
-    *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+    uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
+    uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
+    uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
+    *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
   }
 }
 
@@ -371,8 +381,10 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
     uint8_t g1 = src_argb[5] >> 3;
     uint8_t r1 = src_argb[6] >> 3;
     uint8_t a1 = src_argb[7] >> 7;
-    *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
-    *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 10) | (a1 << 15);
+    *(uint16_t*)(dst_rgb + 0) =
+        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
+    *(uint16_t*)(dst_rgb + 2) =
+        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | (a1 << 15));
     dst_rgb += 4;
     src_argb += 8;
   }
@@ -381,7 +393,8 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
     uint8_t g0 = src_argb[1] >> 3;
     uint8_t r0 = src_argb[2] >> 3;
     uint8_t a0 = src_argb[3] >> 7;
-    *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
+    *(uint16_t*)(dst_rgb) =
+        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
   }
 }
 
@@ -396,8 +409,10 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
     uint8_t g1 = src_argb[5] >> 4;
     uint8_t r1 = src_argb[6] >> 4;
     uint8_t a1 = src_argb[7] >> 4;
-    *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
-    *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 4) | (r1 << 8) | (a1 << 12);
+    *(uint16_t*)(dst_rgb + 0) =
+        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
+    *(uint16_t*)(dst_rgb + 2) =
+        STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | (a1 << 12));
     dst_rgb += 4;
     src_argb += 8;
   }
@@ -406,7 +421,8 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
     uint8_t g0 = src_argb[1] >> 4;
     uint8_t r0 = src_argb[2] >> 4;
     uint8_t a0 = src_argb[3] >> 4;
-    *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
+    *(uint16_t*)(dst_rgb) =
+        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
   }
 }
 
@@ -417,7 +433,8 @@ void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
     uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
     uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
     uint32_t a0 = (src_abgr[3] >> 6);
-    *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
+    *(uint32_t*)(dst_ar30) =
+        STATIC_CAST(uint32_t, r0 | (g0 << 10) | (b0 << 20) | (a0 << 30));
     dst_ar30 += 4;
     src_abgr += 4;
   }
@@ -430,7 +447,8 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
     uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
     uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
     uint32_t a0 = (src_argb[3] >> 6);
-    *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
+    *(uint32_t*)(dst_ar30) =
+        STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
     dst_ar30 += 4;
     src_argb += 4;
   }
@@ -530,8 +548,8 @@ void AR64ShuffleRow_C(const uint8_t* src_ar64,
 
 #ifdef LIBYUV_RGB7
 // Old 7 bit math for compatibility on unsupported platforms.
-static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
-  return ((33 * r + 65 * g + 13 * b) >> 7) + 16;
+static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
+  return STATIC_CAST(uint8_t, ((33 * r + 65 * g + 13 * b) >> 7) + 16);
 }
 #else
 // 8 bit
@@ -540,8 +558,8 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
 //  return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
 //  0x7e80) >> 8;
 
-static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
-  return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
+static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
+  return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
 }
 #endif
 
@@ -549,29 +567,31 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
 
 // LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
 #ifdef LIBYUV_RGBTOU_TRUNCATE
-static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
-  return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
+static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
+  return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
 }
-static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
-  return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
+static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
+  return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
 }
 #else
 // TODO(fbarchard): Add rounding to x86 SIMD and use this
-static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
-  return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
+static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
+  return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8);
 }
-static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
-  return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
+static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
+  return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8);
 }
 #endif
 
 // LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
 #if !defined(LIBYUV_ARGBTOUV_PAVGB)
 static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
-  return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
+  return STATIC_CAST(
+      uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8);
 }
 static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
-  return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8;
+  return STATIC_CAST(
+      uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8);
 }
 #endif
 
@@ -690,28 +710,28 @@ MAKEROWY(RAW, 0, 1, 2, 3)
 
 #ifdef LIBYUV_RGB7
 // Old 7 bit math for compatibility on unsupported platforms.
-static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
   return (38 * r + 75 * g + 15 * b + 64) >> 7;
 }
 #else
 // 8 bit
-static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
   return (77 * r + 150 * g + 29 * b + 128) >> 8;
 }
 #endif
 
 #if defined(LIBYUV_ARGBTOUV_PAVGB)
-static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
   return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
 }
-static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
   return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
 }
 #else
-static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
+static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
   return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
 }
-static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
+static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
   return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
 }
 #endif
@@ -808,11 +828,12 @@ void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
   int x;
   for (x = 0; x < width; ++x) {
     uint8_t b = src_rgb565[0] & 0x1f;
-    uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+    uint8_t g = STATIC_CAST(
+        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
     uint8_t r = src_rgb565[1] >> 3;
-    b = (b << 3) | (b >> 2);
-    g = (g << 2) | (g >> 4);
-    r = (r << 3) | (r >> 2);
+    b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+    g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
+    r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
     dst_y[0] = RGBToY(r, g, b);
     src_rgb565 += 2;
     dst_y += 1;
@@ -823,11 +844,12 @@ void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
   int x;
   for (x = 0; x < width; ++x) {
     uint8_t b = src_argb1555[0] & 0x1f;
-    uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+    uint8_t g = STATIC_CAST(
+        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
     uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
-    b = (b << 3) | (b >> 2);
-    g = (g << 3) | (g >> 2);
-    r = (r << 3) | (r >> 2);
+    b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+    g = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
+    r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
     dst_y[0] = RGBToY(r, g, b);
     src_argb1555 += 2;
     dst_y += 1;
@@ -840,9 +862,9 @@ void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
     uint8_t b = src_argb4444[0] & 0x0f;
     uint8_t g = src_argb4444[0] >> 4;
     uint8_t r = src_argb4444[1] & 0x0f;
-    b = (b << 4) | b;
-    g = (g << 4) | g;
-    r = (r << 4) | r;
+    b = STATIC_CAST(uint8_t, (b << 4) | b);
+    g = STATIC_CAST(uint8_t, (g << 4) | g);
+    r = STATIC_CAST(uint8_t, (r << 4) | r);
     dst_y[0] = RGBToY(r, g, b);
     src_argb4444 += 2;
     dst_y += 1;
@@ -857,31 +879,35 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
   const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
   int x;
   for (x = 0; x < width - 1; x += 2) {
-    uint8_t b0 = src_rgb565[0] & 0x1f;
-    uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
-    uint8_t r0 = src_rgb565[1] >> 3;
-    uint8_t b1 = src_rgb565[2] & 0x1f;
-    uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
-    uint8_t r1 = src_rgb565[3] >> 3;
-    uint8_t b2 = next_rgb565[0] & 0x1f;
-    uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
-    uint8_t r2 = next_rgb565[1] >> 3;
-    uint8_t b3 = next_rgb565[2] & 0x1f;
-    uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
-    uint8_t r3 = next_rgb565[3] >> 3;
-
-    b0 = (b0 << 3) | (b0 >> 2);
-    g0 = (g0 << 2) | (g0 >> 4);
-    r0 = (r0 << 3) | (r0 >> 2);
-    b1 = (b1 << 3) | (b1 >> 2);
-    g1 = (g1 << 2) | (g1 >> 4);
-    r1 = (r1 << 3) | (r1 >> 2);
-    b2 = (b2 << 3) | (b2 >> 2);
-    g2 = (g2 << 2) | (g2 >> 4);
-    r2 = (r2 << 3) | (r2 >> 2);
-    b3 = (b3 << 3) | (b3 >> 2);
-    g3 = (g3 << 2) | (g3 >> 4);
-    r3 = (r3 << 3) | (r3 >> 2);
+    uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
+    uint8_t g0 = STATIC_CAST(
+        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
+    uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
+    uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f);
+    uint8_t g1 = STATIC_CAST(
+        uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3));
+    uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3);
+    uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
+    uint8_t g2 = STATIC_CAST(
+        uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
+    uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
+    uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f);
+    uint8_t g3 = STATIC_CAST(
+        uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3));
+    uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3);
+
+    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+    g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
+    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+    b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
+    g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4));
+    r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
+    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+    g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
+    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
+    b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
+    g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4));
+    r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
 
 #if LIBYUV_ARGBTOUV_PAVGB
     uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
@@ -903,19 +929,20 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
     dst_v += 1;
   }
   if (width & 1) {
-    uint8_t b0 = src_rgb565[0] & 0x1f;
-    uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
-    uint8_t r0 = src_rgb565[1] >> 3;
-    uint8_t b2 = next_rgb565[0] & 0x1f;
-    uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
-    uint8_t r2 = next_rgb565[1] >> 3;
-
-    b0 = (b0 << 3) | (b0 >> 2);
-    g0 = (g0 << 2) | (g0 >> 4);
-    r0 = (r0 << 3) | (r0 >> 2);
-    b2 = (b2 << 3) | (b2 >> 2);
-    g2 = (g2 << 2) | (g2 >> 4);
-    r2 = (r2 << 3) | (r2 >> 2);
+    uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
+    uint8_t g0 = STATIC_CAST(
+        uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
+    uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
+    uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
+    uint8_t g2 = STATIC_CAST(
+        uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
+    uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
+    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+    g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
+    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+    g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
+    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
 
 #if LIBYUV_ARGBTOUV_PAVGB
     uint8_t ab = AVGB(b0, b2);
@@ -941,31 +968,35 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
   const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
   int x;
   for (x = 0; x < width - 1; x += 2) {
-    uint8_t b0 = src_argb1555[0] & 0x1f;
-    uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
-    uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
-    uint8_t b1 = src_argb1555[2] & 0x1f;
-    uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
-    uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
-    uint8_t b2 = next_argb1555[0] & 0x1f;
-    uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
-    uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
-    uint8_t b3 = next_argb1555[2] & 0x1f;
-    uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
-    uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
-
-    b0 = (b0 << 3) | (b0 >> 2);
-    g0 = (g0 << 3) | (g0 >> 2);
-    r0 = (r0 << 3) | (r0 >> 2);
-    b1 = (b1 << 3) | (b1 >> 2);
-    g1 = (g1 << 3) | (g1 >> 2);
-    r1 = (r1 << 3) | (r1 >> 2);
-    b2 = (b2 << 3) | (b2 >> 2);
-    g2 = (g2 << 3) | (g2 >> 2);
-    r2 = (r2 << 3) | (r2 >> 2);
-    b3 = (b3 << 3) | (b3 >> 2);
-    g3 = (g3 << 3) | (g3 >> 2);
-    r3 = (r3 << 3) | (r3 >> 2);
+    uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
+    uint8_t g0 = STATIC_CAST(
+        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
+    uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
+    uint8_t b1 = STATIC_CAST(uint8_t, src_argb1555[2] & 0x1f);
+    uint8_t g1 = STATIC_CAST(
+        uint8_t, (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3));
+    uint8_t r1 = STATIC_CAST(uint8_t, (src_argb1555[3] & 0x7c) >> 2);
+    uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
+    uint8_t g2 = STATIC_CAST(
+        uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
+    uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
+    uint8_t b3 = STATIC_CAST(uint8_t, next_argb1555[2] & 0x1f);
+    uint8_t g3 = STATIC_CAST(
+        uint8_t, (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3));
+    uint8_t r3 = STATIC_CAST(uint8_t, (next_argb1555[3] & 0x7c) >> 2);
+
+    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+    g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
+    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+    b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
+    g1 = STATIC_CAST(uint8_t, (g1 << 3) | (g1 >> 2));
+    r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
+    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+    g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
+    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
+    b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
+    g3 = STATIC_CAST(uint8_t, (g3 << 3) | (g3 >> 2));
+    r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
 
 #if LIBYUV_ARGBTOUV_PAVGB
     uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
@@ -987,19 +1018,21 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
     dst_v += 1;
   }
   if (width & 1) {
-    uint8_t b0 = src_argb1555[0] & 0x1f;
-    uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
-    uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
-    uint8_t b2 = next_argb1555[0] & 0x1f;
-    uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
-    uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
-
-    b0 = (b0 << 3) | (b0 >> 2);
-    g0 = (g0 << 3) | (g0 >> 2);
-    r0 = (r0 << 3) | (r0 >> 2);
-    b2 = (b2 << 3) | (b2 >> 2);
-    g2 = (g2 << 3) | (g2 >> 2);
-    r2 = (r2 << 3) | (r2 >> 2);
+    uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
+    uint8_t g0 = STATIC_CAST(
+        uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
+    uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
+    uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
+    uint8_t g2 = STATIC_CAST(
+        uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
+    uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
+
+    b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+    g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
+    r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+    b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+    g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
+    r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
 
 #if LIBYUV_ARGBTOUV_PAVGB
     uint8_t ab = AVGB(b0, b2);
@@ -1038,18 +1071,18 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
     uint8_t g3 = next_argb4444[2] >> 4;
     uint8_t r3 = next_argb4444[3] & 0x0f;
 
-    b0 = (b0 << 4) | b0;
-    g0 = (g0 << 4) | g0;
-    r0 = (r0 << 4) | r0;
-    b1 = (b1 << 4) | b1;
-    g1 = (g1 << 4) | g1;
-    r1 = (r1 << 4) | r1;
-    b2 = (b2 << 4) | b2;
-    g2 = (g2 << 4) | g2;
-    r2 = (r2 << 4) | r2;
-    b3 = (b3 << 4) | b3;
-    g3 = (g3 << 4) | g3;
-    r3 = (r3 << 4) | r3;
+    b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
+    g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
+    r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
+    b1 = STATIC_CAST(uint8_t, (b1 << 4) | b1);
+    g1 = STATIC_CAST(uint8_t, (g1 << 4) | g1);
+    r1 = STATIC_CAST(uint8_t, (r1 << 4) | r1);
+    b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
+    g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
+    r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
+    b3 = STATIC_CAST(uint8_t, (b3 << 4) | b3);
+    g3 = STATIC_CAST(uint8_t, (g3 << 4) | g3);
+    r3 = STATIC_CAST(uint8_t, (r3 << 4) | r3);
 
 #if LIBYUV_ARGBTOUV_PAVGB
     uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
@@ -1078,12 +1111,12 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
     uint8_t g2 = next_argb4444[0] >> 4;
     uint8_t r2 = next_argb4444[1] & 0x0f;
 
-    b0 = (b0 << 4) | b0;
-    g0 = (g0 << 4) | g0;
-    r0 = (r0 << 4) | r0;
-    b2 = (b2 << 4) | b2;
-    g2 = (g2 << 4) | g2;
-    r2 = (r2 << 4) | r2;
+    b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
+    g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
+    r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
+    b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
+    g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
+    r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
 
 #if LIBYUV_ARGBTOUV_PAVGB
     uint8_t ab = AVGB(b0, b2);
@@ -1140,9 +1173,9 @@ void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
     int sg = (b * 22 + g * 88 + r * 45) >> 7;
     int sr = (b * 24 + g * 98 + r * 50) >> 7;
     // b does not over flow. a is preserved from original.
-    dst_argb[0] = sb;
-    dst_argb[1] = clamp255(sg);
-    dst_argb[2] = clamp255(sr);
+    dst_argb[0] = STATIC_CAST(uint8_t, sb);
+    dst_argb[1] = STATIC_CAST(uint8_t, clamp255(sg));
+    dst_argb[2] = STATIC_CAST(uint8_t, clamp255(sr));
     dst_argb += 4;
   }
 }
@@ -1171,10 +1204,10 @@ void ARGBColorMatrixRow_C(const uint8_t* src_argb,
     int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
               a * matrix_argb[15]) >>
              6;
-    dst_argb[0] = Clamp(sb);
-    dst_argb[1] = Clamp(sg);
-    dst_argb[2] = Clamp(sr);
-    dst_argb[3] = Clamp(sa);
+    dst_argb[0] = STATIC_CAST(uint8_t, Clamp(sb));
+    dst_argb[1] = STATIC_CAST(uint8_t, Clamp(sg));
+    dst_argb[2] = STATIC_CAST(uint8_t, Clamp(sr));
+    dst_argb[3] = STATIC_CAST(uint8_t, Clamp(sa));
     src_argb += 4;
     dst_argb += 4;
   }
@@ -1224,9 +1257,12 @@ void ARGBQuantizeRow_C(uint8_t* dst_argb,
     int b = dst_argb[0];
     int g = dst_argb[1];
     int r = dst_argb[2];
-    dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
-    dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
-    dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
+    dst_argb[0] = STATIC_CAST(
+        uint8_t, (b * scale >> 16) * interval_size + interval_offset);
+    dst_argb[1] = STATIC_CAST(
+        uint8_t, (g * scale >> 16) * interval_size + interval_offset);
+    dst_argb[2] = STATIC_CAST(
+        uint8_t, (r * scale >> 16) * interval_size + interval_offset);
     dst_argb += 4;
   }
 }
@@ -1277,10 +1313,10 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb,
     const uint32_t g_scale = src_argb1[1];
     const uint32_t r_scale = src_argb1[2];
     const uint32_t a_scale = src_argb1[3];
-    dst_argb[0] = SHADE(b, b_scale);
-    dst_argb[1] = SHADE(g, g_scale);
-    dst_argb[2] = SHADE(r, r_scale);
-    dst_argb[3] = SHADE(a, a_scale);
+    dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_scale));
+    dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_scale));
+    dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_scale));
+    dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_scale));
     src_argb += 4;
     src_argb1 += 4;
     dst_argb += 4;
@@ -1305,10 +1341,10 @@ void ARGBAddRow_C(const uint8_t* src_argb,
     const int g_add = src_argb1[1];
     const int r_add = src_argb1[2];
     const int a_add = src_argb1[3];
-    dst_argb[0] = SHADE(b, b_add);
-    dst_argb[1] = SHADE(g, g_add);
-    dst_argb[2] = SHADE(r, r_add);
-    dst_argb[3] = SHADE(a, a_add);
+    dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_add));
+    dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_add));
+    dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_add));
+    dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_add));
     src_argb += 4;
     src_argb1 += 4;
     dst_argb += 4;
@@ -1332,10 +1368,10 @@ void ARGBSubtractRow_C(const uint8_t* src_argb,
     const int g_sub = src_argb1[1];
     const int r_sub = src_argb1[2];
     const int a_sub = src_argb1[3];
-    dst_argb[0] = SHADE(b, b_sub);
-    dst_argb[1] = SHADE(g, g_sub);
-    dst_argb[2] = SHADE(r, r_sub);
-    dst_argb[3] = SHADE(a, a_sub);
+    dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_sub));
+    dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_sub));
+    dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_sub));
+    dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_sub));
     src_argb += 4;
     src_argb1 += 4;
     dst_argb += 4;
@@ -1692,9 +1728,9 @@ static __inline void YuvPixel(uint8_t y,
   LOAD_YUV_CONSTANTS;
   uint32_t y32 = y * 0x0101;
   CALC_RGB16;
-  *b = Clamp((int32_t)(b16) >> 6);
-  *g = Clamp((int32_t)(g16) >> 6);
-  *r = Clamp((int32_t)(r16) >> 6);
+  *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
+  *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
+  *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
 }
 
 // Reads 8 bit YUV and leaves result as 16 bit.
@@ -1724,8 +1760,8 @@ static __inline void YuvPixel10_16(uint16_t y,
                                    const struct YuvConstants* yuvconstants) {
   LOAD_YUV_CONSTANTS;
   uint32_t y32 = (y << 6) | (y >> 4);
-  u = clamp255(u >> 2);
-  v = clamp255(v >> 2);
+  u = STATIC_CAST(uint8_t, clamp255(u >> 2));
+  v = STATIC_CAST(uint8_t, clamp255(v >> 2));
   CALC_RGB16;
   *b = b16;
   *g = g16;
@@ -1743,8 +1779,8 @@ static __inline void YuvPixel12_16(int16_t y,
                                    const struct YuvConstants* yuvconstants) {
   LOAD_YUV_CONSTANTS;
   uint32_t y32 = (y << 4) | (y >> 8);
-  u = clamp255(u >> 4);
-  v = clamp255(v >> 4);
+  u = STATIC_CAST(uint8_t, clamp255(u >> 4));
+  v = STATIC_CAST(uint8_t, clamp255(v >> 4));
   CALC_RGB16;
   *b = b16;
   *g = g16;
@@ -1764,9 +1800,9 @@ static __inline void YuvPixel10(uint16_t y,
   int g16;
   int r16;
   YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
-  *b = Clamp(b16 >> 6);
-  *g = Clamp(g16 >> 6);
-  *r = Clamp(r16 >> 6);
+  *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
+  *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
+  *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
 }
 
 // C reference code that mimics the YUV 12 bit assembly.
@@ -1782,9 +1818,9 @@ static __inline void YuvPixel12(uint16_t y,
   int g16;
   int r16;
   YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
-  *b = Clamp(b16 >> 6);
-  *g = Clamp(g16 >> 6);
-  *r = Clamp(r16 >> 6);
+  *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
+  *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
+  *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
 }
 
 // C reference code that mimics the YUV 16 bit assembly.
@@ -1798,12 +1834,12 @@ static __inline void YuvPixel16_8(uint16_t y,
                                   const struct YuvConstants* yuvconstants) {
   LOAD_YUV_CONSTANTS;
   uint32_t y32 = y;
-  u = clamp255(u >> 8);
-  v = clamp255(v >> 8);
+  u = STATIC_CAST(uint16_t, clamp255(u >> 8));
+  v = STATIC_CAST(uint16_t, clamp255(v >> 8));
   CALC_RGB16;
-  *b = Clamp((int32_t)(b16) >> 6);
-  *g = Clamp((int32_t)(g16) >> 6);
-  *r = Clamp((int32_t)(r16) >> 6);
+  *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
+  *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
+  *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
 }
 
 // C reference code that mimics the YUV 16 bit assembly.
@@ -1817,8 +1853,8 @@ static __inline void YuvPixel16_16(uint16_t y,
                                    const struct YuvConstants* yuvconstants) {
   LOAD_YUV_CONSTANTS;
   uint32_t y32 = y;
-  u = clamp255(u >> 8);
-  v = clamp255(v >> 8);
+  u = STATIC_CAST(uint16_t, clamp255(u >> 8));
+  v = STATIC_CAST(uint16_t, clamp255(v >> 8));
   CALC_RGB16;
   *b = b16;
   *g = g16;
@@ -1840,9 +1876,9 @@ static __inline void YPixel(uint8_t y,
   int yg = yuvconstants->kYToRgb[0];
 #endif
   uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
-  *b = Clamp(((int32_t)(y1) + ygb) >> 6);
-  *g = Clamp(((int32_t)(y1) + ygb) >> 6);
-  *r = Clamp(((int32_t)(y1) + ygb) >> 6);
+  *b = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
+  *g = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
+  *r = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
 }
 
 void I444ToARGBRow_C(const uint8_t* src_y,
@@ -1963,10 +1999,10 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y,
   for (x = 0; x < width - 1; x += 2) {
     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
                rgb_buf + 2, yuvconstants);
-    rgb_buf[3] = clamp255(src_a[0] >> 2);
+    rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
     YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
                rgb_buf + 6, yuvconstants);
-    rgb_buf[7] = clamp255(src_a[1] >> 2);
+    rgb_buf[7] = STATIC_CAST(uint8_t, clamp255(src_a[1] >> 2));
     src_y += 2;
     src_u += 1;
     src_v += 1;
@@ -1976,7 +2012,7 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y,
   if (width & 1) {
     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
                rgb_buf + 2, yuvconstants);
-    rgb_buf[3] = clamp255(src_a[0] >> 2);
+    rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
   }
 }
 
@@ -1991,7 +2027,7 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y,
   for (x = 0; x < width; ++x) {
     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
                rgb_buf + 2, yuvconstants);
-    rgb_buf[3] = clamp255(src_a[0] >> 2);
+    rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
     src_y += 1;
     src_u += 1;
     src_v += 1;
@@ -2317,8 +2353,10 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
     b1 = b1 >> 4;
     g1 = g1 >> 4;
     r1 = r1 >> 4;
-    *(uint16_t*)(dst_argb4444 + 0) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
-    *(uint16_t*)(dst_argb4444 + 2) = b1 | (g1 << 4) | (r1 << 8) | 0xf000;
+    *(uint16_t*)(dst_argb4444 + 0) =
+        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
+    *(uint16_t*)(dst_argb4444 + 2) =
+        STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | 0xf000);
     src_y += 2;
     src_u += 1;
     src_v += 1;
@@ -2329,7 +2367,8 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
     b0 = b0 >> 4;
     g0 = g0 >> 4;
     r0 = r0 >> 4;
-    *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
+    *(uint16_t*)(dst_argb4444) =
+        STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
   }
 }
 
@@ -2355,8 +2394,10 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
     b1 = b1 >> 3;
     g1 = g1 >> 3;
     r1 = r1 >> 3;
-    *(uint16_t*)(dst_argb1555 + 0) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
-    *(uint16_t*)(dst_argb1555 + 2) = b1 | (g1 << 5) | (r1 << 10) | 0x8000;
+    *(uint16_t*)(dst_argb1555 + 0) =
+        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
+    *(uint16_t*)(dst_argb1555 + 2) =
+        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | 0x8000);
     src_y += 2;
     src_u += 1;
     src_v += 1;
@@ -2367,7 +2408,8 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
     b0 = b0 >> 3;
     g0 = g0 >> 3;
     r0 = r0 >> 3;
-    *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
+    *(uint16_t*)(dst_argb1555) =
+        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
   }
 }
 
@@ -2393,8 +2435,10 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
     b1 = b1 >> 3;
     g1 = g1 >> 2;
     r1 = r1 >> 3;
-    *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11);  // for ubsan
-    *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
+    *(uint16_t*)(dst_rgb565 + 0) =
+        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
+    *(uint16_t*)(dst_rgb565 + 2) =
+        STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
     src_y += 2;
     src_u += 1;
     src_v += 1;
@@ -2405,7 +2449,8 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
-    *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+    *(uint16_t*)(dst_rgb565 + 0) =
+        STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
   }
 }
 
@@ -2520,8 +2565,12 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
     b1 = b1 >> 3;
     g1 = g1 >> 2;
     r1 = r1 >> 3;
-    *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11);
-    *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
+    *(uint16_t*)(dst_rgb565 + 0) = STATIC_CAST(uint16_t, b0) |
+                                   STATIC_CAST(uint16_t, g0 << 5) |
+                                   STATIC_CAST(uint16_t, r0 << 11);
+    *(uint16_t*)(dst_rgb565 + 2) = STATIC_CAST(uint16_t, b1) |
+                                   STATIC_CAST(uint16_t, g1 << 5) |
+                                   STATIC_CAST(uint16_t, r1 << 11);
     src_y += 2;
     src_uv += 2;
     dst_rgb565 += 4;  // Advance 2 pixels.
@@ -2531,7 +2580,9 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
-    *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+    *(uint16_t*)(dst_rgb565) = STATIC_CAST(uint16_t, b0) |
+                               STATIC_CAST(uint16_t, g0 << 5) |
+                               STATIC_CAST(uint16_t, r0 << 11);
   }
 }
 
@@ -2805,10 +2856,10 @@ void DetileToYUY2_C(const uint8_t* src_y,
 // in 80 byte blocks representing 64 pixels each. The first 16 bytes of the
 // block contain all of the lower 2 bits of each pixel packed together, and the
 // next 64 bytes represent all the upper 8 bits of the pixel.
-void UnpackMT2T_C(const uint16_t* src, uint16_t* dst, size_t size) {
+void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) {
   for (size_t i = 0; i < size; i += 80) {
-    const uint8_t* src_lower_bits = (uint8_t*)src;
-    const uint8_t* src_upper_bits = ((uint8_t*)src) + 16;
+    const uint8_t* src_lower_bits = src;
+    const uint8_t* src_upper_bits = src + 16;
 
     for (int j = 0; j < 16; j++) {
       uint8_t lower_bits = src_lower_bits[j];
@@ -2824,7 +2875,7 @@ void UnpackMT2T_C(const uint16_t* src, uint16_t* dst, size_t size) {
                (uint16_t)src_upper_bits[j * 4 + 3] >> 2;
     }
 
-    src += 40;
+    src += 80;
   }
 }
 
@@ -2920,10 +2971,10 @@ void MergeAR64Row_C(const uint16_t* src_r,
   int shift = 16 - depth;
   int max = (1 << depth) - 1;
   for (x = 0; x < width; ++x) {
-    dst_ar64[0] = ClampMax(src_b[x], max) << shift;
-    dst_ar64[1] = ClampMax(src_g[x], max) << shift;
-    dst_ar64[2] = ClampMax(src_r[x], max) << shift;
-    dst_ar64[3] = ClampMax(src_a[x], max) << shift;
+    dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
+    dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
+    dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
+    dst_ar64[3] = STATIC_CAST(uint16_t, ClampMax(src_a[x], max) << shift);
     dst_ar64 += 4;
   }
 }
@@ -2940,10 +2991,10 @@ void MergeARGB16To8Row_C(const uint16_t* src_r,
   int x;
   int shift = depth - 8;
   for (x = 0; x < width; ++x) {
-    dst_argb[0] = clamp255(src_b[x] >> shift);
-    dst_argb[1] = clamp255(src_g[x] >> shift);
-    dst_argb[2] = clamp255(src_r[x] >> shift);
-    dst_argb[3] = clamp255(src_a[x] >> shift);
+    dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
+    dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
+    dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
+    dst_argb[3] = STATIC_CAST(uint8_t, clamp255(src_a[x] >> shift));
     dst_argb += 4;
   }
 }
@@ -2960,9 +3011,9 @@ void MergeXR64Row_C(const uint16_t* src_r,
   int shift = 16 - depth;
   int max = (1 << depth) - 1;
   for (x = 0; x < width; ++x) {
-    dst_ar64[0] = ClampMax(src_b[x], max) << shift;
-    dst_ar64[1] = ClampMax(src_g[x], max) << shift;
-    dst_ar64[2] = ClampMax(src_r[x], max) << shift;
+    dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
+    dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
+    dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
     dst_ar64[3] = 0xffff;
     dst_ar64 += 4;
   }
@@ -2979,9 +3030,9 @@ void MergeXRGB16To8Row_C(const uint16_t* src_r,
   int x;
   int shift = depth - 8;
   for (x = 0; x < width; ++x) {
-    dst_argb[0] = clamp255(src_b[x] >> shift);
-    dst_argb[1] = clamp255(src_g[x] >> shift);
-    dst_argb[2] = clamp255(src_r[x] >> shift);
+    dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
+    dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
+    dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
     dst_argb[3] = 0xff;
     dst_argb += 4;
   }
@@ -3027,8 +3078,8 @@ void MergeUVRow_16_C(const uint16_t* src_u,
   assert(depth <= 16);
   int x;
   for (x = 0; x < width; ++x) {
-    dst_uv[0] = src_u[x] << shift;
-    dst_uv[1] = src_v[x] << shift;
+    dst_uv[0] = STATIC_CAST(uint16_t, src_u[x] << shift);
+    dst_uv[1] = STATIC_CAST(uint16_t, src_v[x] << shift);
     dst_uv += 2;
   }
 }
@@ -3056,7 +3107,7 @@ void MultiplyRow_16_C(const uint16_t* src_y,
                       int width) {
   int x;
   for (x = 0; x < width; ++x) {
-    dst_y[x] = src_y[x] * scale;
+    dst_y[x] = STATIC_CAST(uint16_t, src_y[x] * scale);
   }
 }
 
@@ -3087,7 +3138,7 @@ void Convert16To8Row_C(const uint16_t* src_y,
   assert(scale <= 32768);
 
   for (x = 0; x < width; ++x) {
-    dst_y[x] = C16TO8(src_y[x], scale);
+    dst_y[x] = STATIC_CAST(uint8_t, C16TO8(src_y[x], scale));
   }
 }
 
@@ -3250,9 +3301,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb,
     uint32_t bb = src_argb1[0];
     uint32_t bg = src_argb1[1];
     uint32_t br = src_argb1[2];
-    dst_argb[0] = BLEND(fb, bb, a);
-    dst_argb[1] = BLEND(fg, bg, a);
-    dst_argb[2] = BLEND(fr, br, a);
+    dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
+    dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
+    dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
     dst_argb[3] = 255u;
 
     fb = src_argb[4 + 0];
@@ -3262,9 +3313,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb,
     bb = src_argb1[4 + 0];
     bg = src_argb1[4 + 1];
     br = src_argb1[4 + 2];
-    dst_argb[4 + 0] = BLEND(fb, bb, a);
-    dst_argb[4 + 1] = BLEND(fg, bg, a);
-    dst_argb[4 + 2] = BLEND(fr, br, a);
+    dst_argb[4 + 0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
+    dst_argb[4 + 1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
+    dst_argb[4 + 2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
     dst_argb[4 + 3] = 255u;
     src_argb += 8;
     src_argb1 += 8;
@@ -3279,9 +3330,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb,
     uint32_t bb = src_argb1[0];
     uint32_t bg = src_argb1[1];
     uint32_t br = src_argb1[2];
-    dst_argb[0] = BLEND(fb, bb, a);
-    dst_argb[1] = BLEND(fg, bg, a);
-    dst_argb[2] = BLEND(fr, br, a);
+    dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
+    dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
+    dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
     dst_argb[3] = 255u;
   }
 }
@@ -3326,7 +3377,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
     dst_argb[0] = ATTENUATE(b, a);
     dst_argb[1] = ATTENUATE(g, a);
     dst_argb[2] = ATTENUATE(r, a);
-    dst_argb[3] = a;
+    dst_argb[3] = STATIC_CAST(uint8_t, a);
     b = src_argb[4];
     g = src_argb[5];
     r = src_argb[6];
@@ -3334,7 +3385,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
     dst_argb[4] = ATTENUATE(b, a);
     dst_argb[5] = ATTENUATE(g, a);
     dst_argb[6] = ATTENUATE(r, a);
-    dst_argb[7] = a;
+    dst_argb[7] = STATIC_CAST(uint8_t, a);
     src_argb += 8;
     dst_argb += 8;
   }
@@ -3347,7 +3398,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
     dst_argb[0] = ATTENUATE(b, a);
     dst_argb[1] = ATTENUATE(g, a);
     dst_argb[2] = ATTENUATE(r, a);
-    dst_argb[3] = a;
+    dst_argb[3] = STATIC_CAST(uint8_t, a);
   }
 }
 #undef ATTENUATE
@@ -3419,10 +3470,10 @@ void ARGBUnattenuateRow_C(const uint8_t* src_argb,
     const uint32_t ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
 
     // Clamping should not be necessary but is free in assembly.
-    dst_argb[0] = UNATTENUATE(b, ia);
-    dst_argb[1] = UNATTENUATE(g, ia);
-    dst_argb[2] = UNATTENUATE(r, ia);
-    dst_argb[3] = a;
+    dst_argb[0] = STATIC_CAST(uint8_t, UNATTENUATE(b, ia));
+    dst_argb[1] = STATIC_CAST(uint8_t, UNATTENUATE(g, ia));
+    dst_argb[2] = STATIC_CAST(uint8_t, UNATTENUATE(r, ia));
+    dst_argb[3] = STATIC_CAST(uint8_t, a);
     src_argb += 4;
     dst_argb += 4;
   }
@@ -3456,12 +3507,20 @@ void CumulativeSumToAverageRow_C(const int32_t* tl,
   int i;
   assert(area != 0);
 
-  ooa = 1.0f / area;
+  ooa = 1.0f / STATIC_CAST(float, area);
   for (i = 0; i < count; ++i) {
-    dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
-    dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
-    dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
-    dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
+    dst[0] =
+        (uint8_t)(STATIC_CAST(float, bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) *
+                  ooa);
+    dst[1] =
+        (uint8_t)(STATIC_CAST(float, bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) *
+                  ooa);
+    dst[2] =
+        (uint8_t)(STATIC_CAST(float, bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) *
+                  ooa);
+    dst[3] =
+        (uint8_t)(STATIC_CAST(float, bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) *
+                  ooa);
     dst += 4;
     tl += 4;
     bl += 4;
@@ -3519,7 +3578,9 @@ static void HalfRow_16To8_C(const uint16_t* src_uv,
                             int width) {
   int x;
   for (x = 0; x < width; ++x) {
-    dst_uv[x] = C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale);
+    dst_uv[x] = STATIC_CAST(
+        uint8_t,
+        C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale));
   }
 }
 
@@ -3545,8 +3606,9 @@ void InterpolateRow_C(uint8_t* dst_ptr,
     return;
   }
   for (x = 0; x < width; ++x) {
-    dst_ptr[0] =
-        (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
+    dst_ptr[0] = STATIC_CAST(
+        uint8_t,
+        (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
     ++src_ptr;
     ++src_ptr1;
     ++dst_ptr;
@@ -3575,8 +3637,9 @@ void InterpolateRow_16_C(uint16_t* dst_ptr,
     return;
   }
   for (x = 0; x < width; ++x) {
-    dst_ptr[0] =
-        (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
+    dst_ptr[0] = STATIC_CAST(
+        uint16_t,
+        (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
     ++src_ptr;
     ++src_ptr1;
     ++dst_ptr;
@@ -3613,9 +3676,11 @@ void InterpolateRow_16To8_C(uint8_t* dst_ptr,
     return;
   }
   for (x = 0; x < width; ++x) {
-    dst_ptr[0] = C16TO8(
-        (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
-        scale);
+    dst_ptr[0] = STATIC_CAST(
+        uint8_t,
+        C16TO8(
+            (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
+            scale));
     src_ptr += 1;
     src_ptr1 += 1;
     dst_ptr += 1;
@@ -3727,10 +3792,10 @@ void ARGBPolynomialRow_C(const uint8_t* src_argb,
     dr += poly[14] * r3;
     da += poly[15] * a3;
 
-    dst_argb[0] = Clamp((int32_t)(db));
-    dst_argb[1] = Clamp((int32_t)(dg));
-    dst_argb[2] = Clamp((int32_t)(dr));
-    dst_argb[3] = Clamp((int32_t)(da));
+    dst_argb[0] = STATIC_CAST(uint8_t, Clamp((int32_t)(db)));
+    dst_argb[1] = STATIC_CAST(uint8_t, Clamp((int32_t)(dg)));
+    dst_argb[2] = STATIC_CAST(uint8_t, Clamp((int32_t)(dr)));
+    dst_argb[3] = STATIC_CAST(uint8_t, Clamp((int32_t)(da)));
     src_argb += 4;
     dst_argb += 4;
   }
@@ -4302,8 +4367,9 @@ void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
 void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
   int i;
   for (i = 0; i < width; ++i) {
-    *dst++ =
-        (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
+    *dst++ = STATIC_CAST(
+        uint16_t,
+        (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8);
     ++src;
   }
 }
@@ -4463,7 +4529,9 @@ void HalfMergeUVRow_C(const uint8_t* src_u,
   }
 }
 
+#undef STATIC_CAST
+
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
-#endif
+#endif
+\ No newline at end of file
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 0c6065f8..0611b9aa 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -720,9 +720,9 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
 }
 #endif
 
-void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) {
-  const uint16_t* src_lower_bits = src;
-  const uint16_t* src_upper_bits = src + 8;
+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
+  const uint8_t* src_lower_bits = src;
+  const uint8_t* src_upper_bits = src + 16;
   asm volatile(
       "1:                                        \n"
       "vld4.8      {d1, d3, d5, d7}, [%1]!       \n"  // Load 32 bytes of upper
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index e0a4ea19..41289fe9 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -751,9 +751,9 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
 
 // Unpack MT2T into tiled P010 64 pixels at a time. See
 // tinyurl.com/mtk-10bit-video-format for format documentation.
-void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) {
-  const uint16_t* src_lower_bits = src;
-  const uint16_t* src_upper_bits = src + 8;
+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
+  const uint8_t* src_lower_bits = src;
+  const uint8_t* src_upper_bits = src + 16;
   asm volatile(
       "1:                                        \n"
       "ld4         {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"
diff --git a/source/scale.cc b/source/scale.cc
index 16854c45..2a7e308d 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -775,9 +775,9 @@ static void ScaleAddCols2_C(int dst_width,
     int ix = x >> 16;
     x += dx;
     boxwidth = MIN1((x >> 16) - ix);
-    *dst_ptr++ =
-        SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
-        16;
+    *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) *
+                               scaletbl[boxwidth - minboxwidth] >>
+                           16);
   }
 }
 
@@ -814,7 +814,7 @@ static void ScaleAddCols0_C(int dst_width,
   (void)dx;
   src_ptr += (x >> 16);
   for (i = 0; i < dst_width; ++i) {
-    *dst_ptr++ = src_ptr[i] * scaleval >> 16;
+    *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16);
   }
 }
 
@@ -829,7 +829,7 @@ static void ScaleAddCols1_C(int dst_width,
   int i;
   x >>= 16;
   for (i = 0; i < dst_width; ++i) {
-    *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
+    *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
     x += boxwidth;
   }
 }
diff --git a/source/scale_argb.cc b/source/scale_argb.cc
index 07840d65..48c10896 100644
--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -58,9 +58,9 @@ static void ScaleARGBDown2(int src_width,
   assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
   // Advance to odd row, even column.
   if (filtering == kFilterBilinear) {
-    src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+    src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
   } else {
-    src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4;
+    src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4;
   }
 
 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
@@ -162,7 +162,7 @@ static void ScaleARGBDown4Box(int src_width,
                             uint8_t* dst_argb, int dst_width) =
       ScaleARGBRowDown2Box_C;
   // Advance to odd row, even column.
-  src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+  src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
   (void)src_width;
   (void)src_height;
   (void)dx;
@@ -214,7 +214,7 @@ static void ScaleARGBDownEven(int src_width,
                               enum FilterMode filtering) {
   int j;
   int col_step = dx >> 16;
-  int row_stride = (dy >> 16) * (int64_t)src_stride;
+  ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
   void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
                                int src_step, uint8_t* dst_argb, int dst_width) =
       filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
@@ -222,7 +222,7 @@ static void ScaleARGBDownEven(int src_width,
   (void)src_height;
   assert(IS_ALIGNED(src_width, 2));
   assert(IS_ALIGNED(src_height, 2));
-  src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+  src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
@@ -388,7 +388,7 @@ static void ScaleARGBBilinearDown(int src_width,
     }
     for (j = 0; j < dst_height; ++j) {
       int yi = y >> 16;
-      const uint8_t* src = src_argb + yi * (int64_t)src_stride;
+      const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
       if (filtering == kFilterLinear) {
         ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
       } else {
@@ -545,7 +545,7 @@ static void ScaleARGBBilinearUp(int src_width,
 
   {
     int yi = y >> 16;
-    const uint8_t* src = src_argb + yi * (int64_t)src_stride;
+    const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
 
     // Allocate 2 rows of ARGB.
     const int row_size = (dst_width * 4 + 31) & ~31;
@@ -570,7 +570,7 @@ static void ScaleARGBBilinearUp(int src_width,
         if (y > max_y) {
           y = max_y;
           yi = y >> 16;
-          src = src_argb + yi * (int64_t)src_stride;
+          src = src_argb + yi * (intptr_t)src_stride;
         }
         if (yi != lasty) {
           ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@@ -793,9 +793,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
   const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
   int yi = y >> 16;
   int uv_yi = yi >> kYShift;
-  const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y;
-  const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
-  const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
+  const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y;
+  const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
+  const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
 
   // Allocate 2 rows of ARGB.
   const int row_size = (dst_width * 4 + 31) & ~31;
@@ -833,9 +833,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
         y = max_y;
         yi = y >> 16;
         uv_yi = yi >> kYShift;
-        src_row_y = src_y + yi * (int64_t)src_stride_y;
-        src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
-        src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
+        src_row_y = src_y + yi * (intptr_t)src_stride_y;
+        src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
+        src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
       }
       if (yi != lasty) {
         // TODO(fbarchard): Convert the clipped region of row.
@@ -926,7 +926,7 @@ static void ScaleARGBSimple(int src_width,
   }
 
   for (j = 0; j < dst_height; ++j) {
-    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride,
+    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride,
                   dst_width, x, dx);
     dst_argb += dst_stride;
     y += dy;
@@ -962,7 +962,7 @@ static void ScaleARGB(const uint8_t* src,
   // Negative src_height means invert the image.
   if (src_height < 0) {
     src_height = -src_height;
-    src = src + (src_height - 1) * (int64_t)src_stride;
+    src = src + (src_height - 1) * (intptr_t)src_stride;
     src_stride = -src_stride;
   }
   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -977,7 +977,7 @@ static void ScaleARGB(const uint8_t* src,
   if (clip_y) {
     int64_t clipf = (int64_t)(clip_y)*dy;
     y += (clipf & 0xffff);
-    src += (clipf >> 16) * (int64_t)src_stride;
+    src += (clipf >> 16) * (intptr_t)src_stride;
     dst += clip_y * dst_stride;
   }
 
@@ -1011,7 +1011,7 @@ static void ScaleARGB(const uint8_t* src,
         filtering = kFilterNone;
         if (dx == 0x10000 && dy == 0x10000) {
           // Straight copy.
-          ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4,
+          ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
                    src_stride, dst, dst_stride, clip_width, clip_height);
           return;
         }
diff --git a/source/scale_uv.cc b/source/scale_uv.cc
index 8bd6b586..ecda769e 100644
--- a/source/scale_uv.cc
+++ b/source/scale_uv.cc
@@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width,
   assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
   // Advance to odd row, even column.
   if (filtering == kFilterBilinear) {
-    src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+    src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
   } else {
-    src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2;
+    src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2;
   }
 
 #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
@@ -200,7 +200,7 @@ static void ScaleUVDown4Box(int src_width,
                           uint8_t* dst_uv, int dst_width) =
       ScaleUVRowDown2Box_C;
   // Advance to odd row, even column.
-  src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+  src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
   (void)src_width;
   (void)src_height;
   (void)dx;
@@ -263,7 +263,7 @@ static void ScaleUVDownEven(int src_width,
                             enum FilterMode filtering) {
   int j;
   int col_step = dx >> 16;
-  int row_stride = (dy >> 16) * (int64_t)src_stride;
+  ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
   void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
                              int src_step, uint8_t* dst_uv, int dst_width) =
       filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
@@ -271,7 +271,7 @@ static void ScaleUVDownEven(int src_width,
   (void)src_height;
   assert(IS_ALIGNED(src_width, 2));
   assert(IS_ALIGNED(src_height, 2));
-  src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+  src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
 #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
     ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
@@ -429,7 +429,7 @@ static void ScaleUVBilinearDown(int src_width,
     }
     for (j = 0; j < dst_height; ++j) {
       int yi = y >> 16;
-      const uint8_t* src = src_uv + yi * (int64_t)src_stride;
+      const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
       if (filtering == kFilterLinear) {
         ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
       } else {
@@ -571,7 +571,7 @@ static void ScaleUVBilinearUp(int src_width,
 
   {
     int yi = y >> 16;
-    const uint8_t* src = src_uv + yi * (int64_t)src_stride;
+    const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
 
     // Allocate 2 rows of UV.
     const int row_size = (dst_width * 2 + 15) & ~15;
@@ -596,7 +596,7 @@ static void ScaleUVBilinearUp(int src_width,
         if (y > max_y) {
           y = max_y;
           yi = y >> 16;
-          src = src_uv + yi * (int64_t)src_stride;
+          src = src_uv + yi * (intptr_t)src_stride;
         }
         if (yi != lasty) {
           ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@@ -663,13 +663,13 @@ void ScaleUVLinearUp2(int src_width,
 #endif
 
   if (dst_height == 1) {
-    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
+    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
                dst_width);
   } else {
     dy = FixedDiv(src_height - 1, dst_height - 1);
     y = (1 << 15) - 1;
     for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
+      ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
       dst_uv += dst_stride;
       y += dy;
     }
@@ -770,13 +770,13 @@ void ScaleUVLinearUp2_16(int src_width,
 #endif
 
   if (dst_height == 1) {
-    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
+    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
                dst_width);
   } else {
     dy = FixedDiv(src_height - 1, dst_height - 1);
     y = (1 << 15) - 1;
     for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
+      ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
       dst_uv += dst_stride;
       y += dy;
     }
@@ -889,7 +889,7 @@ static void ScaleUVSimple(int src_width,
   }
 
   for (j = 0; j < dst_height; ++j) {
-    ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x,
+    ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x,
                 dx);
     dst_uv += dst_stride;
     y += dy;
@@ -910,7 +910,7 @@ static int UVCopy(const uint8_t* src_uv,
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
-    src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
+    src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
     src_stride_uv = -src_stride_uv;
   }
 
@@ -930,7 +930,7 @@ static int UVCopy_16(const uint16_t* src_uv,
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
-    src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
+    src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
     src_stride_uv = -src_stride_uv;
   }
 
@@ -968,7 +968,7 @@ static void ScaleUV(const uint8_t* src,
   // Negative src_height means invert the image.
   if (src_height < 0) {
     src_height = -src_height;
-    src = src + (src_height - 1) * (int64_t)src_stride;
+    src = src + (src_height - 1) * (intptr_t)src_stride;
     src_stride = -src_stride;
   }
   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -983,7 +983,7 @@ static void ScaleUV(const uint8_t* src,
   if (clip_y) {
     int64_t clipf = (int64_t)(clip_y)*dy;
     y += (clipf & 0xffff);
-    src += (clipf >> 16) * (int64_t)src_stride;
+    src += (clipf >> 16) * (intptr_t)src_stride;
     dst += clip_y * dst_stride;
   }
 
@@ -1024,7 +1024,7 @@ static void ScaleUV(const uint8_t* src,
 #ifdef HAS_UVCOPY
         if (dx == 0x10000 && dy == 0x10000) {
           // Straight copy.
-          UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2,
+          UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
                  src_stride, dst, dst_stride, clip_width, clip_height);
           return;
         }
@@ -1118,7 +1118,7 @@ int UVScale_16(const uint16_t* src_uv,
   // Negative src_height means invert the image.
   if (src_height < 0) {
     src_height = -src_height;
-    src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv;
+    src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv;
     src_stride_uv = -src_stride_uv;
   }
   src_width = Abs(src_width);
@@ -1126,13 +1126,13 @@ int UVScale_16(const uint16_t* src_uv,
 #ifdef HAS_UVCOPY
   if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
     if (dst_height == 1) {
-      UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv,
+      UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv,
                 src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
     } else {
       dy = src_height / dst_height;
-      UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv,
-                dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width,
-                dst_height);
+      UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv,
+                (int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv,
+                dst_width, dst_height);
     }
 
     return 0;
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index ad34cec7..89ff99ba 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -417,151 +417,136 @@ TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
 TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
 TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
 
-#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,       \
-                          SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,           \
-                          DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF,    \
-                          DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)             \
-  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {               \
-    static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported");        \
-    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");        \
-    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                    \
-                  "SRC_SUBSAMP_X unsupported");                                \
-    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                    \
-                  "SRC_SUBSAMP_Y unsupported");                                \
-    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                    \
-                  "DST_SUBSAMP_X unsupported");                                \
-    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                    \
-                  "DST_SUBSAMP_Y unsupported");                                \
-    const int kWidth = W1280;                                                  \
-    const int kHeight = benchmark_height_;                                     \
-    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);                \
-    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);                \
-    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);              \
-    const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1);  \
-    const int kPaddedHeight =                                                  \
-        (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1);                    \
-    const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X);    \
-    const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y);  \
-    align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF);  \
-    align_buffer_page_end(                                                     \
-        src_uv,                                                                \
-        2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF);       \
-    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                  \
-    align_buffer_page_end(dst_uv_c,                                            \
-                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);       \
-    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);                \
-    align_buffer_page_end(dst_uv_opt,                                          \
-                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);       \
-    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                    \
-    SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF);                  \
-    for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) {                   \
-      src_y_p[i] =                                                             \
-          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));       \
-    }                                                                          \
-    for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
-      src_uv_p[i] =                                                            \
-          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));       \
-    }                                                                          \
-    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                              \
-    memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);         \
-    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                          \
-    memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);     \
-    MaskCpuFlags(disable_cpu_flags_);                                          \
-    SRC_FMT_PLANAR##To##FMT_PLANAR(                                            \
-        src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth,                          \
-        DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth,                \
-        reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth,         \
-        NEG kHeight);                                                          \
-    MaskCpuFlags(benchmark_cpu_info_);                                         \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
-      SRC_FMT_PLANAR##To##FMT_PLANAR(                                          \
-          src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth,                        \
-          DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth,            \
-          reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth,     \
-          NEG kHeight);                                                        \
-    }                                                                          \
-    if (DOY) {                                                                 \
-      for (int i = 0; i < kHeight; ++i) {                                      \
-        for (int j = 0; j < kWidth; ++j) {                                     \
-          EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);       \
-        }                                                                      \
-      }                                                                        \
-    }                                                                          \
-    for (int i = 0; i < kDstHalfHeight; ++i) {                                 \
-      for (int j = 0; j < 2 * kDstHalfWidth; ++j) {                            \
-        EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j],                         \
-                  dst_uv_opt[i * 2 * kDstHalfWidth + j]);                      \
-      }                                                                        \
-    }                                                                          \
-    free_aligned_buffer_page_end(dst_y_c);                                     \
-    free_aligned_buffer_page_end(dst_uv_c);                                    \
-    free_aligned_buffer_page_end(dst_y_opt);                                   \
-    free_aligned_buffer_page_end(dst_uv_opt);                                  \
-    free_aligned_buffer_page_end(src_y);                                       \
-    free_aligned_buffer_page_end(src_uv);                                      \
+#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                    DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH,        \
+                    TILE_WIDTH, TILE_HEIGHT)                                  \
+  TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {              \
+    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");       \
+    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                   \
+                  "SRC_SUBSAMP_X unsupported");                               \
+    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                   \
+                  "SRC_SUBSAMP_Y unsupported");                               \
+    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                   \
+                  "DST_SUBSAMP_X unsupported");                               \
+    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                   \
+                  "DST_SUBSAMP_Y unsupported");                               \
+    const int kWidth = W1280;                                                 \
+    const int kHeight = benchmark_height_;                                    \
+    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);               \
+    const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X);               \
+    const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y);             \
+    const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
+    const int kPaddedHeight =                                                 \
+        (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1);                   \
+    const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X);   \
+    const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
+    align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
+    align_buffer_page_end(                                                    \
+        src_uv,                                                               \
+        2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF);      \
+    align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC);                 \
+    align_buffer_page_end(dst_uv_c,                                           \
+                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);      \
+    align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC);               \
+    align_buffer_page_end(dst_uv_opt,                                         \
+                          2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);      \
+    SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF);                   \
+    SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF);                 \
+    for (int i = 0;                                                           \
+         i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T);     \
+         ++i) {                                                               \
+      src_y_p[i] =                                                            \
+          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));      \
+    }                                                                         \
+    for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 *      \
+                            SRC_BPC / (int)sizeof(SRC_T);                     \
+         ++i) {                                                               \
+      src_uv_p[i] =                                                           \
+          (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH)));      \
+    }                                                                         \
+    memset(dst_y_c, 1, kWidth* kHeight* DST_BPC);                             \
+    memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);        \
+    memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC);                         \
+    memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC);    \
+    MaskCpuFlags(disable_cpu_flags_);                                         \
+    SRC_FMT_PLANAR##To##FMT_PLANAR(                                           \
+        src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p,              \
+        2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T),                     \
+        DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth,               \
+        reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth,        \
+        NEG kHeight);                                                         \
+    MaskCpuFlags(benchmark_cpu_info_);                                        \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                         \
+      SRC_FMT_PLANAR##To##FMT_PLANAR(                                         \
+          src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p,            \
+          2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T),                   \
+          DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth,           \
+          reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth,    \
+          NEG kHeight);                                                       \
+    }                                                                         \
+    if (DOY) {                                                                \
+      for (int i = 0; i < kHeight; ++i) {                                     \
+        for (int j = 0; j < kWidth; ++j) {                                    \
+          EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]);      \
+        }                                                                     \
+      }                                                                       \
+    }                                                                         \
+    for (int i = 0; i < kDstHalfHeight; ++i) {                                \
+      for (int j = 0; j < 2 * kDstHalfWidth; ++j) {                           \
+        EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j],                        \
+                  dst_uv_opt[i * 2 * kDstHalfWidth + j]);                     \
+      }                                                                       \
+    }                                                                         \
+    free_aligned_buffer_page_end(dst_y_c);                                    \
+    free_aligned_buffer_page_end(dst_uv_c);                                   \
+    free_aligned_buffer_page_end(dst_y_opt);                                  \
+    free_aligned_buffer_page_end(dst_uv_opt);                                 \
+    free_aligned_buffer_page_end(src_y);                                      \
+    free_aligned_buffer_page_end(src_uv);                                     \
   }
 
-#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,        \
-                         SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,            \
-                         DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH,  \
-                         TILE_HEIGHT)                                          \
-  TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,             \
-                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,  \
-                    DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1,        \
-                    SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)                        \
-  TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,             \
-                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,  \
-                    DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1,      \
-                    SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)                        \
-  TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,             \
-                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,  \
-                    DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1,         \
-                    SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)                        \
-  TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,             \
-                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,  \
-                    DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \
-                    TILE_WIDTH, TILE_HEIGHT)                                   \
-  TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,             \
-                    SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,  \
-                    DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0,          \
-                    SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
-
-TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
-
-// TODO (greenjustin): Test all variants.
-TESTBIPLANARTOBPI(MT2T,
-                  uint16_t,
-                  2,
-                  2,
-                  2,
-                  P010,
-                  uint16_t,
-                  2,
-                  2,
-                  2,
-                  benchmark_width_,
-                  _Opt,
-                  +,
-                  0,
-                  1,
-                  10,
-                  16,
-                  32)
-
-#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,        \
-                         SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,            \
-                         DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF,     \
-                         SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)                   \
+#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                   DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH,    \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH,  \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH,     \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH,        \
+              TILE_HEIGHT)                                                   \
+  TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+              FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+              benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH,      \
+              TILE_HEIGHT)
+
+TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
+
+#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
+                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
+                   DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH,   \
+                   TILE_HEIGHT)                                                \
   TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {               \
     static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported");        \
     static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");        \
@@ -641,30 +626,28 @@ TESTBIPLANARTOBPI(MT2T,
     free_aligned_buffer_page_end(src_uv);                                      \
   }
 
-#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,         \
-                        SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC,             \
-                        DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH,   \
-                        TILE_HEIGHT)                                           \
-  TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                   DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \
-                   TILE_WIDTH, TILE_HEIGHT)                                    \
-  TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                   DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2,          \
-                   SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)                         \
-  TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                   DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH,  \
-                   TILE_WIDTH, TILE_HEIGHT)                                    \
-  TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,              \
-                   SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X,   \
-                   DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH,     \
-                   TILE_WIDTH, TILE_HEIGHT)
-
-TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
+#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X,            \
+                  SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+                  DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)        \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH,       \
+             TILE_HEIGHT)                                                   \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH,     \
+             TILE_HEIGHT)                                                   \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH,        \
+             TILE_HEIGHT)                                                   \
+  TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,  \
+             FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y,      \
+             benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
+
+TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
 
 // Provide matrix wrappers for full range bt.709
 #define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
@@ -1089,8 +1072,8 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
 TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
 #endif
 
-#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C,       \
-                         BPP_B, W1280, N, NEG, OFF)                            \
+#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+                   W1280, N, NEG, OFF)                                         \
   TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
     const int kWidth = W1280;                                                  \
     const int kHeight = benchmark_height_;                                     \
@@ -1143,15 +1126,15 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
     free_aligned_buffer_page_end(dst_argb32_opt);                              \
   }
 
-#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
-  TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-                   benchmark_width_ + 1, _Any, +, 0)                           \
-  TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-                   benchmark_width_, _Unaligned, +, 2)                         \
-  TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-                   benchmark_width_, _Invert, -, 0)                            \
-  TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
-                   benchmark_width_, _Opt, +, 0)
+#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_ + 1, _Any, +, 0)                           \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_, _Unaligned, +, 2)                         \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_, _Invert, -, 0)                            \
+  TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,      \
+             benchmark_width_, _Opt, +, 0)
 
 #define JNV12ToARGB(a, b, c, d, e, f, g, h) \
   NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
@@ -1172,29 +1155,29 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
 #define JNV12ToRGB565(a, b, c, d, e, f, g, h) \
   NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
 
-TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3)
+TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3)
 #ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2)
+TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2)
 #endif
 
-TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3)
+TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3)
 #ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
+TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
 #endif
 
 #define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
@@ -1289,8 +1272,8 @@ TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
 TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
 TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
 
-#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X,          \
-                         SUBSAMP_Y, W1280, N, NEG, OFF)                       \
+#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
+                   W1280, N, NEG, OFF)                                        \
   TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) {                       \
     const int kWidth = W1280;                                                 \
     const int kHeight = benchmark_height_;                                    \
@@ -1336,25 +1319,25 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
     free_aligned_buffer_page_end(src_argb);                                   \
   }
 
-#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
-  TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                   benchmark_width_ + 1, _Any, +, 0)                           \
-  TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                   benchmark_width_, _Unaligned, +, 2)                         \
-  TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                   benchmark_width_, _Invert, -, 0)                            \
-  TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
-                   benchmark_width_, _Opt, +, 0)
-
-TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
-TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2)
-TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2)
-TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
-TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
-TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
+#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_ + 1, _Any, +, 0)                           \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_, _Unaligned, +, 2)                         \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_, _Invert, -, 0)                            \
+  TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,      \
+             benchmark_width_, _Opt, +, 0)
+
+TESTATOBP(ARGB, 1, 4, NV12, 2, 2)
+TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV12, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV21, 2, 2)
+TESTATOBP(RAW, 1, 3, JNV21, 2, 2)
+TESTATOBP(YUY2, 2, 4, NV12, 2, 2)
+TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV21, 2, 2)
 
 #define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B,     \
                   EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF)               \
@@ -3935,8 +3918,8 @@ TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10)
 TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
 #endif  // DISABLE_SLOW_TESTS
 
-#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,     \
-                           ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH)  \
+#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,    \
+                     YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH)               \
   TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
     const int kWidth = W1280;                                                  \
     const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
@@ -3979,16 +3962,16 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
     free_aligned_buffer_page_end(dst_argb_opt);                                \
   }
 
-#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,    \
-                          ALIGN, YALIGN, S_DEPTH)                            \
-  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                     YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH)   \
-  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                     YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
-  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                     YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH)    \
-  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                     YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,    \
+                    YALIGN, S_DEPTH)                                          \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH)                  \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH)                \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_, _Invert, -, 0, 0, S_DEPTH)                   \
+  TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+               benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
 
 #define P010ToARGB(a, b, c, d, e, f, g, h) \
   P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
@@ -4031,23 +4014,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
                          kFilterBilinear)
 
 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
-TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
+TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
 #ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
+TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
 #endif  // LITTLE_ENDIAN_ONLY_TEST
 #endif  // DISABLE_SLOW_TESTS
author	Frank Barchard <fbarchard@google.com>	2022-12-15 14:11:52 -0800
committer	libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>	2022-12-15 22:34:22 +0000
commit	3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e (patch)
tree	3fc173f8288f4457858919c806bac14593dc3d1b
parent	610e0cdead3fcd3288693d18eab8c7323805ad9e (diff)
download	libyuv-3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e.tar.gz