1 files changed, 438 insertions, 545 deletions
diff --git a/files/source/row_win.cc b/files/source/row_win.cc
index 202f2b8d..27e3da7b 100644
--- a/files/source/row_win.cc
+++ b/files/source/row_win.cc
@@ -28,27 +28,27 @@ extern "C" {
 #if defined(_M_X64)
 
 // Read 4 UV from 422, upsample to 8 UV.
-#define READYUV422                                      \
-  xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);            \
-  xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
-  xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);                 \
-  xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);                \
-  u_buf += 4;                                           \
-  xmm4 = _mm_loadl_epi64((__m128i*)y_buf);              \
-  xmm4 = _mm_unpacklo_epi8(xmm4, xmm4);                 \
+#define READYUV422                                        \
+  xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf);            \
+  xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \
+  xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);                   \
+  xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);                  \
+  u_buf += 4;                                             \
+  xmm4 = _mm_loadl_epi64((__m128i*)y_buf);                \
+  xmm4 = _mm_unpacklo_epi8(xmm4, xmm4);                   \
   y_buf += 8;
 
 // Read 4 UV from 422, upsample to 8 UV.  With 8 Alpha.
-#define READYUVA422                                     \
-  xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);            \
-  xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
-  xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);                 \
-  xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);                \
-  u_buf += 4;                                           \
-  xmm4 = _mm_loadl_epi64((__m128i*)y_buf);              \
-  xmm4 = _mm_unpacklo_epi8(xmm4, xmm4);                 \
-  y_buf += 8;                                           \
-  xmm5 = _mm_loadl_epi64((__m128i*)a_buf);              \
+#define READYUVA422                                       \
+  xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf);            \
+  xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \
+  xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);                   \
+  xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);                  \
+  u_buf += 4;                                             \
+  xmm4 = _mm_loadl_epi64((__m128i*)y_buf);                \
+  xmm4 = _mm_unpacklo_epi8(xmm4, xmm4);                   \
+  y_buf += 8;                                             \
+  xmm5 = _mm_loadl_epi64((__m128i*)a_buf);                \
   a_buf += 8;
 
 // Convert 8 pixels: 8 UV and 8 Y.
@@ -84,15 +84,15 @@ extern "C" {
   dst_argb += 32;
 
 #if defined(HAS_I422TOARGBROW_SSSE3)
-void I422ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* dst_argb,
+void I422ToARGBRow_SSSE3(const uint8_t* y_buf,
+                         const uint8_t* u_buf,
+                         const uint8_t* v_buf,
+                         uint8_t* dst_argb,
                          const struct YuvConstants* yuvconstants,
                          int width) {
   __m128i xmm0, xmm1, xmm2, xmm4;
   const __m128i xmm5 = _mm_set1_epi8(-1);
-  const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
+  const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf;
   while (width > 0) {
     READYUV422
     YUVTORGB(yuvconstants)
@@ -103,15 +103,15 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
 #endif
 
 #if defined(HAS_I422ALPHATOARGBROW_SSSE3)
-void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              const uint8* a_buf,
-                              uint8* dst_argb,
+void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
+                              const uint8_t* u_buf,
+                              const uint8_t* v_buf,
+                              const uint8_t* a_buf,
+                              uint8_t* dst_argb,
                               const struct YuvConstants* yuvconstants,
                               int width) {
   __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
-  const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
+  const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf;
   while (width > 0) {
     READYUVA422
     YUVTORGB(yuvconstants)
@@ -255,8 +255,8 @@ static const lvec8 kShuffleNV21 = {
 };
 
 // Duplicates gray value 3 times and fills in alpha opaque.
-__declspec(naked) void J400ToARGBRow_SSE2(const uint8* src_y,
-                                          uint8* dst_argb,
+__declspec(naked) void J400ToARGBRow_SSE2(const uint8_t* src_y,
+                                          uint8_t* dst_argb,
                                           int width) {
   __asm {
     mov        eax, [esp + 4]  // src_y
@@ -285,8 +285,8 @@ __declspec(naked) void J400ToARGBRow_SSE2(const uint8* src_y,
 
 #ifdef HAS_J400TOARGBROW_AVX2
 // Duplicates gray value 3 times and fills in alpha opaque.
-__declspec(naked) void J400ToARGBRow_AVX2(const uint8* src_y,
-                                          uint8* dst_argb,
+__declspec(naked) void J400ToARGBRow_AVX2(const uint8_t* src_y,
+                                          uint8_t* dst_argb,
                                           int width) {
   __asm {
     mov         eax, [esp + 4]  // src_y
@@ -316,8 +316,8 @@ __declspec(naked) void J400ToARGBRow_AVX2(const uint8* src_y,
 }
 #endif  // HAS_J400TOARGBROW_AVX2
 
-__declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24,
-                                            uint8* dst_argb,
+__declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
+                                            uint8_t* dst_argb,
                                             int width) {
   __asm {
     mov       eax, [esp + 4]  // src_rgb24
@@ -355,8 +355,8 @@ __declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24,
   }
 }
 
-__declspec(naked) void RAWToARGBRow_SSSE3(const uint8* src_raw,
-                                          uint8* dst_argb,
+__declspec(naked) void RAWToARGBRow_SSSE3(const uint8_t* src_raw,
+                                          uint8_t* dst_argb,
                                           int width) {
   __asm {
     mov       eax, [esp + 4]  // src_raw
@@ -394,8 +394,8 @@ __declspec(naked) void RAWToARGBRow_SSSE3(const uint8* src_raw,
   }
 }
 
-__declspec(naked) void RAWToRGB24Row_SSSE3(const uint8* src_raw,
-                                           uint8* dst_rgb24,
+__declspec(naked) void RAWToRGB24Row_SSSE3(const uint8_t* src_raw,
+                                           uint8_t* dst_rgb24,
                                            int width) {
   __asm {
     mov       eax, [esp + 4]  // src_raw
@@ -430,8 +430,8 @@ __declspec(naked) void RAWToRGB24Row_SSSE3(const uint8* src_raw,
 // v * (256 + 8)
 // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
 // 20 instructions.
-__declspec(naked) void RGB565ToARGBRow_SSE2(const uint8* src_rgb565,
-                                            uint8* dst_argb,
+__declspec(naked) void RGB565ToARGBRow_SSE2(const uint8_t* src_rgb565,
+                                            uint8_t* dst_argb,
                                             int width) {
   __asm {
     mov       eax, 0x01080108  // generate multiplier to repeat 5 bits
@@ -486,8 +486,8 @@ __declspec(naked) void RGB565ToARGBRow_SSE2(const uint8* src_rgb565,
 // v * 256 + v * 8
 // v * (256 + 8)
 // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
-__declspec(naked) void RGB565ToARGBRow_AVX2(const uint8* src_rgb565,
-                                            uint8* dst_argb,
+__declspec(naked) void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565,
+                                            uint8_t* dst_argb,
                                             int width) {
   __asm {
     mov        eax, 0x01080108  // generate multiplier to repeat 5 bits
@@ -537,8 +537,8 @@ __declspec(naked) void RGB565ToARGBRow_AVX2(const uint8* src_rgb565,
 #endif  // HAS_RGB565TOARGBROW_AVX2
 
 #ifdef HAS_ARGB1555TOARGBROW_AVX2
-__declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555,
-                                              uint8* dst_argb,
+__declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555,
+                                              uint8_t* dst_argb,
                                               int width) {
   __asm {
     mov        eax, 0x01080108  // generate multiplier to repeat 5 bits
@@ -589,8 +589,8 @@ __declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555,
 #endif  // HAS_ARGB1555TOARGBROW_AVX2
 
 #ifdef HAS_ARGB4444TOARGBROW_AVX2
-__declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444,
-                                              uint8* dst_argb,
+__declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444,
+                                              uint8_t* dst_argb,
                                               int width) {
   __asm {
     mov       eax,  0x0f0f0f0f  // generate mask 0x0f0f0f0f
@@ -627,8 +627,8 @@ __declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444,
 #endif  // HAS_ARGB4444TOARGBROW_AVX2
 
 // 24 instructions
-__declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555,
-                                              uint8* dst_argb,
+__declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8_t* src_argb1555,
+                                              uint8_t* dst_argb,
                                               int width) {
   __asm {
     mov       eax, 0x01080108  // generate multiplier to repeat 5 bits
@@ -680,8 +680,8 @@ __declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555,
 }
 
 // 18 instructions.
-__declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444,
-                                              uint8* dst_argb,
+__declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8_t* src_argb4444,
+                                              uint8_t* dst_argb,
                                               int width) {
   __asm {
     mov       eax, 0x0f0f0f0f  // generate mask 0x0f0f0f0f
@@ -718,8 +718,8 @@ __declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444,
   }
 }
 
-__declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8* src_argb,
-                                            uint8* dst_rgb,
+__declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8_t* src_argb,
+                                            uint8_t* dst_rgb,
                                             int width) {
   __asm {
     mov       eax, [esp + 4]  // src_argb
@@ -757,8 +757,8 @@ __declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8* src_argb,
   }
 }
 
-__declspec(naked) void ARGBToRAWRow_SSSE3(const uint8* src_argb,
-                                          uint8* dst_rgb,
+__declspec(naked) void ARGBToRAWRow_SSSE3(const uint8_t* src_argb,
+                                          uint8_t* dst_rgb,
                                           int width) {
   __asm {
     mov       eax, [esp + 4]  // src_argb
@@ -796,8 +796,8 @@ __declspec(naked) void ARGBToRAWRow_SSSE3(const uint8* src_argb,
   }
 }
 
-__declspec(naked) void ARGBToRGB565Row_SSE2(const uint8* src_argb,
-                                            uint8* dst_rgb,
+__declspec(naked) void ARGBToRGB565Row_SSE2(const uint8_t* src_argb,
+                                            uint8_t* dst_rgb,
                                             int width) {
   __asm {
     mov       eax, [esp + 4]  // src_argb
@@ -834,9 +834,9 @@ __declspec(naked) void ARGBToRGB565Row_SSE2(const uint8* src_argb,
   }
 }
 
-__declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb,
-                                                  uint8* dst_rgb,
-                                                  const uint32 dither4,
+__declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb,
+                                                  uint8_t* dst_rgb,
+                                                  const uint32_t dither4,
                                                   int width) {
   __asm {
 
@@ -881,9 +881,9 @@ __declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb,
 }
 
 #ifdef HAS_ARGBTORGB565DITHERROW_AVX2
-__declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb,
-                                                  uint8* dst_rgb,
-                                                  const uint32 dither4,
+__declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb,
+                                                  uint8_t* dst_rgb,
+                                                  const uint32_t dither4,
                                                   int width) {
   __asm {
     mov        eax, [esp + 4]  // src_argb
@@ -925,8 +925,8 @@ __declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb,
 #endif  // HAS_ARGBTORGB565DITHERROW_AVX2
 
 // TODO(fbarchard): Improve sign extension/packing.
-__declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8* src_argb,
-                                              uint8* dst_rgb,
+__declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb,
+                                              uint8_t* dst_rgb,
                                               int width) {
   __asm {
     mov       eax, [esp + 4]  // src_argb
@@ -967,8 +967,8 @@ __declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8* src_argb,
   }
 }
 
-__declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8* src_argb,
-                                              uint8* dst_rgb,
+__declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb,
+                                              uint8_t* dst_rgb,
                                               int width) {
   __asm {
     mov       eax, [esp + 4]  // src_argb
@@ -998,8 +998,8 @@ __declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8* src_argb,
 }
 
 #ifdef HAS_ARGBTORGB565ROW_AVX2
-__declspec(naked) void ARGBToRGB565Row_AVX2(const uint8* src_argb,
-                                            uint8* dst_rgb,
+__declspec(naked) void ARGBToRGB565Row_AVX2(const uint8_t* src_argb,
+                                            uint8_t* dst_rgb,
                                             int width) {
   __asm {
     mov        eax, [esp + 4]  // src_argb
@@ -1036,8 +1036,8 @@ __declspec(naked) void ARGBToRGB565Row_AVX2(const uint8* src_argb,
 #endif  // HAS_ARGBTORGB565ROW_AVX2
 
 #ifdef HAS_ARGBTOARGB1555ROW_AVX2
-__declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8* src_argb,
-                                              uint8* dst_rgb,
+__declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb,
+                                              uint8_t* dst_rgb,
                                               int width) {
   __asm {
     mov        eax, [esp + 4]  // src_argb
@@ -1077,8 +1077,8 @@ __declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8* src_argb,
 #endif  // HAS_ARGBTOARGB1555ROW_AVX2
 
 #ifdef HAS_ARGBTOARGB4444ROW_AVX2
-__declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8* src_argb,
-                                              uint8* dst_rgb,
+__declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb,
+                                              uint8_t* dst_rgb,
                                               int width) {
   __asm {
     mov        eax, [esp + 4]  // src_argb
@@ -1109,8 +1109,8 @@ __declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8* src_argb,
 #endif  // HAS_ARGBTOARGB4444ROW_AVX2
 
 // Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-__declspec(naked) void ARGBToYRow_SSSE3(const uint8* src_argb,
-                                        uint8* dst_y,
+__declspec(naked) void ARGBToYRow_SSSE3(const uint8_t* src_argb,
+                                        uint8_t* dst_y,
                                         int width) {
   __asm {
     mov        eax, [esp + 4] /* src_argb */
@@ -1145,8 +1145,8 @@ __declspec(naked) void ARGBToYRow_SSSE3(const uint8* src_argb,
 
 // Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
 // Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
-__declspec(naked) void ARGBToYJRow_SSSE3(const uint8* src_argb,
-                                         uint8* dst_y,
+__declspec(naked) void ARGBToYJRow_SSSE3(const uint8_t* src_argb,
+                                         uint8_t* dst_y,
                                          int width) {
   __asm {
     mov        eax, [esp + 4] /* src_argb */
@@ -1185,8 +1185,8 @@ __declspec(naked) void ARGBToYJRow_SSSE3(const uint8* src_argb,
 static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7};
 
 // Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) void ARGBToYRow_AVX2(const uint8* src_argb,
-                                       uint8* dst_y,
+__declspec(naked) void ARGBToYRow_AVX2(const uint8_t* src_argb,
+                                       uint8_t* dst_y,
                                        int width) {
   __asm {
     mov        eax, [esp + 4] /* src_argb */
@@ -1225,8 +1225,8 @@ __declspec(naked) void ARGBToYRow_AVX2(const uint8* src_argb,
 
 #ifdef HAS_ARGBTOYJROW_AVX2
 // Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) void ARGBToYJRow_AVX2(const uint8* src_argb,
-                                        uint8* dst_y,
+__declspec(naked) void ARGBToYJRow_AVX2(const uint8_t* src_argb,
+                                        uint8_t* dst_y,
                                         int width) {
   __asm {
     mov        eax, [esp + 4] /* src_argb */
@@ -1265,8 +1265,8 @@ __declspec(naked) void ARGBToYJRow_AVX2(const uint8* src_argb,
 }
 #endif  //  HAS_ARGBTOYJROW_AVX2
 
-__declspec(naked) void BGRAToYRow_SSSE3(const uint8* src_argb,
-                                        uint8* dst_y,
+__declspec(naked) void BGRAToYRow_SSSE3(const uint8_t* src_argb,
+                                        uint8_t* dst_y,
                                         int width) {
   __asm {
     mov        eax, [esp + 4] /* src_argb */
@@ -1299,8 +1299,8 @@ __declspec(naked) void BGRAToYRow_SSSE3(const uint8* src_argb,
   }
 }
 
-__declspec(naked) void ABGRToYRow_SSSE3(const uint8* src_argb,
-                                        uint8* dst_y,
+__declspec(naked) void ABGRToYRow_SSSE3(const uint8_t* src_argb,
+                                        uint8_t* dst_y,
                                         int width) {
   __asm {
     mov        eax, [esp + 4] /* src_argb */
@@ -1333,8 +1333,8 @@ __declspec(naked) void ABGRToYRow_SSSE3(const uint8* src_argb,
   }
 }
 
-__declspec(naked) void RGBAToYRow_SSSE3(const uint8* src_argb,
-                                        uint8* dst_y,
+__declspec(naked) void RGBAToYRow_SSSE3(const uint8_t* src_argb,
+                                        uint8_t* dst_y,
                                         int width) {
   __asm {
     mov        eax, [esp + 4] /* src_argb */
@@ -1367,10 +1367,10 @@ __declspec(naked) void RGBAToYRow_SSSE3(const uint8* src_argb,
   }
 }
 
-__declspec(naked) void ARGBToUVRow_SSSE3(const uint8* src_argb0,
+__declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
                                          int src_stride_argb,
-                                         uint8* dst_u,
-                                         uint8* dst_v,
+                                         uint8_t* dst_u,
+                                         uint8_t* dst_v,
                                          int width) {
   __asm {
     push       esi
@@ -1410,9 +1410,9 @@ __declspec(naked) void ARGBToUVRow_SSSE3(const uint8* src_argb0,
     shufps     xmm4, xmm3, 0xdd
     pavgb      xmm2, xmm4
 
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 16 different pixels, its 8 pixels of U and 8 of V
     movdqa     xmm1, xmm0
     movdqa     xmm3, xmm2
     pmaddubsw  xmm0, xmm7  // U
@@ -1426,7 +1426,7 @@ __declspec(naked) void ARGBToUVRow_SSSE3(const uint8* src_argb0,
     packsswb   xmm0, xmm1
     paddb      xmm0, xmm5  // -> unsigned
 
-    // step 3 - store 8 U and 8 V values
+        // step 3 - store 8 U and 8 V values
     movlps     qword ptr [edx], xmm0  // U
     movhps     qword ptr [edx + edi], xmm0  // V
     lea        edx, [edx + 8]
@@ -1439,10 +1439,10 @@ __declspec(naked) void ARGBToUVRow_SSSE3(const uint8* src_argb0,
   }
 }
 
-__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8* src_argb0,
+__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
                                           int src_stride_argb,
-                                          uint8* dst_u,
-                                          uint8* dst_v,
+                                          uint8_t* dst_u,
+                                          uint8_t* dst_v,
                                           int width) {
   __asm {
     push       esi
@@ -1482,9 +1482,9 @@ __declspec(naked) void ARGBToUVJRow_SSSE3(const uint8* src_argb0,
     shufps     xmm4, xmm3, 0xdd
     pavgb      xmm2, xmm4
 
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 16 different pixels, its 8 pixels of U and 8 of V
     movdqa     xmm1, xmm0
     movdqa     xmm3, xmm2
     pmaddubsw  xmm0, xmm7  // U
@@ -1499,7 +1499,7 @@ __declspec(naked) void ARGBToUVJRow_SSSE3(const uint8* src_argb0,
     psraw      xmm1, 8
     packsswb   xmm0, xmm1
 
-    // step 3 - store 8 U and 8 V values
+        // step 3 - store 8 U and 8 V values
     movlps     qword ptr [edx], xmm0  // U
     movhps     qword ptr [edx + edi], xmm0  // V
     lea        edx, [edx + 8]
@@ -1513,10 +1513,10 @@ __declspec(naked) void ARGBToUVJRow_SSSE3(const uint8* src_argb0,
 }
 
 #ifdef HAS_ARGBTOUVROW_AVX2
-__declspec(naked) void ARGBToUVRow_AVX2(const uint8* src_argb0,
+__declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
                                         int src_stride_argb,
-                                        uint8* dst_u,
-                                        uint8* dst_v,
+                                        uint8_t* dst_u,
+                                        uint8_t* dst_v,
                                         int width) {
   __asm {
     push       esi
@@ -1549,9 +1549,9 @@ __declspec(naked) void ARGBToUVRow_AVX2(const uint8* src_argb0,
     vshufps    ymm2, ymm2, ymm3, 0xdd
     vpavgb     ymm2, ymm2, ymm4  // mutated by vshufps
 
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 32 different pixels, its 16 pixels of U and 16 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 32 different pixels, its 16 pixels of U and 16 of V
     vpmaddubsw ymm1, ymm0, ymm7  // U
     vpmaddubsw ymm3, ymm2, ymm7
     vpmaddubsw ymm0, ymm0, ymm6  // V
@@ -1565,7 +1565,7 @@ __declspec(naked) void ARGBToUVRow_AVX2(const uint8* src_argb0,
     vpshufb    ymm0, ymm0, ymmword ptr kShufARGBToUV_AVX  // for vshufps/vphaddw
     vpaddb     ymm0, ymm0, ymm5  // -> unsigned
 
-    // step 3 - store 16 U and 16 V values
+        // step 3 - store 16 U and 16 V values
     vextractf128 [edx], ymm0, 0  // U
     vextractf128 [edx + edi], ymm0, 1  // V
     lea        edx, [edx + 16]
@@ -1581,10 +1581,10 @@ __declspec(naked) void ARGBToUVRow_AVX2(const uint8* src_argb0,
 #endif  // HAS_ARGBTOUVROW_AVX2
 
 #ifdef HAS_ARGBTOUVJROW_AVX2
-__declspec(naked) void ARGBToUVJRow_AVX2(const uint8* src_argb0,
+__declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
                                          int src_stride_argb,
-                                         uint8* dst_u,
-                                         uint8* dst_v,
+                                         uint8_t* dst_u,
+                                         uint8_t* dst_v,
                                          int width) {
   __asm {
     push       esi
@@ -1617,9 +1617,9 @@ __declspec(naked) void ARGBToUVJRow_AVX2(const uint8* src_argb0,
     vshufps    ymm2, ymm2, ymm3, 0xdd
     vpavgb     ymm2, ymm2, ymm4  // mutated by vshufps
 
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 32 different pixels, its 16 pixels of U and 16 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 32 different pixels, its 16 pixels of U and 16 of V
     vpmaddubsw ymm1, ymm0, ymm7  // U
     vpmaddubsw ymm3, ymm2, ymm7
     vpmaddubsw ymm0, ymm0, ymm6  // V
@@ -1634,7 +1634,7 @@ __declspec(naked) void ARGBToUVJRow_AVX2(const uint8* src_argb0,
     vpermq     ymm0, ymm0, 0xd8  // For vpacksswb
     vpshufb    ymm0, ymm0, ymmword ptr kShufARGBToUV_AVX  // for vshufps/vphaddw
 
-    // step 3 - store 16 U and 16 V values
+        // step 3 - store 16 U and 16 V values
     vextractf128 [edx], ymm0, 0  // U
     vextractf128 [edx + edi], ymm0, 1  // V
     lea        edx, [edx + 16]
@@ -1649,9 +1649,9 @@ __declspec(naked) void ARGBToUVJRow_AVX2(const uint8* src_argb0,
 }
 #endif  // HAS_ARGBTOUVJROW_AVX2
 
-__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
-                                            uint8* dst_u,
-                                            uint8* dst_v,
+__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb0,
+                                            uint8_t* dst_u,
+                                            uint8_t* dst_v,
                                             int width) {
   __asm {
     push       edi
@@ -1707,10 +1707,10 @@ __declspec(naked) void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
   }
 }
 
-__declspec(naked) void BGRAToUVRow_SSSE3(const uint8* src_argb0,
+__declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb0,
                                          int src_stride_argb,
-                                         uint8* dst_u,
-                                         uint8* dst_v,
+                                         uint8_t* dst_u,
+                                         uint8_t* dst_v,
                                          int width) {
   __asm {
     push       esi
@@ -1750,9 +1750,9 @@ __declspec(naked) void BGRAToUVRow_SSSE3(const uint8* src_argb0,
     shufps     xmm4, xmm3, 0xdd
     pavgb      xmm2, xmm4
 
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 16 different pixels, its 8 pixels of U and 8 of V
     movdqa     xmm1, xmm0
     movdqa     xmm3, xmm2
     pmaddubsw  xmm0, xmm7  // U
@@ -1766,7 +1766,7 @@ __declspec(naked) void BGRAToUVRow_SSSE3(const uint8* src_argb0,
     packsswb   xmm0, xmm1
     paddb      xmm0, xmm5  // -> unsigned
 
-    // step 3 - store 8 U and 8 V values
+        // step 3 - store 8 U and 8 V values
     movlps     qword ptr [edx], xmm0  // U
     movhps     qword ptr [edx + edi], xmm0  // V
     lea        edx, [edx + 8]
@@ -1779,10 +1779,10 @@ __declspec(naked) void BGRAToUVRow_SSSE3(const uint8* src_argb0,
   }
 }
 
-__declspec(naked) void ABGRToUVRow_SSSE3(const uint8* src_argb0,
+__declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb0,
                                          int src_stride_argb,
-                                         uint8* dst_u,
-                                         uint8* dst_v,
+                                         uint8_t* dst_u,
+                                         uint8_t* dst_v,
                                          int width) {
   __asm {
     push       esi
@@ -1822,9 +1822,9 @@ __declspec(naked) void ABGRToUVRow_SSSE3(const uint8* src_argb0,
     shufps     xmm4, xmm3, 0xdd
     pavgb      xmm2, xmm4
 
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 16 different pixels, its 8 pixels of U and 8 of V
     movdqa     xmm1, xmm0
     movdqa     xmm3, xmm2
     pmaddubsw  xmm0, xmm7  // U
@@ -1838,7 +1838,7 @@ __declspec(naked) void ABGRToUVRow_SSSE3(const uint8* src_argb0,
     packsswb   xmm0, xmm1
     paddb      xmm0, xmm5  // -> unsigned
 
-    // step 3 - store 8 U and 8 V values
+        // step 3 - store 8 U and 8 V values
     movlps     qword ptr [edx], xmm0  // U
     movhps     qword ptr [edx + edi], xmm0  // V
     lea        edx, [edx + 8]
@@ -1851,10 +1851,10 @@ __declspec(naked) void ABGRToUVRow_SSSE3(const uint8* src_argb0,
   }
 }
 
-__declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0,
+__declspec(naked) void RGBAToUVRow_SSSE3(const uint8_t* src_argb0,
                                          int src_stride_argb,
-                                         uint8* dst_u,
-                                         uint8* dst_v,
+                                         uint8_t* dst_u,
+                                         uint8_t* dst_v,
                                          int width) {
   __asm {
     push       esi
@@ -1894,9 +1894,9 @@ __declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0,
     shufps     xmm4, xmm3, 0xdd
     pavgb      xmm2, xmm4
 
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 16 different pixels, its 8 pixels of U and 8 of V
     movdqa     xmm1, xmm0
     movdqa     xmm3, xmm2
     pmaddubsw  xmm0, xmm7  // U
@@ -1910,7 +1910,7 @@ __declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0,
     packsswb   xmm0, xmm1
     paddb      xmm0, xmm5  // -> unsigned
 
-    // step 3 - store 8 U and 8 V values
+        // step 3 - store 8 U and 8 V values
     movlps     qword ptr [edx], xmm0  // U
     movhps     qword ptr [edx + edi], xmm0  // V
     lea        edx, [edx + 8]
@@ -2065,10 +2065,10 @@ __declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0,
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
 __declspec(naked) void I422ToARGBRow_AVX2(
-    const uint8* y_buf,
-    const uint8* u_buf,
-    const uint8* v_buf,
-    uint8* dst_argb,
+    const uint8_t* y_buf,
+    const uint8_t* u_buf,
+    const uint8_t* v_buf,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2105,11 +2105,11 @@ __declspec(naked) void I422ToARGBRow_AVX2(
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB.
 __declspec(naked) void I422AlphaToARGBRow_AVX2(
-    const uint8* y_buf,
-    const uint8* u_buf,
-    const uint8* v_buf,
-    const uint8* a_buf,
-    uint8* dst_argb,
+    const uint8_t* y_buf,
+    const uint8_t* u_buf,
+    const uint8_t* v_buf,
+    const uint8_t* a_buf,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2148,10 +2148,10 @@ __declspec(naked) void I422AlphaToARGBRow_AVX2(
 // 16 pixels
 // 16 UV values with 16 Y producing 16 ARGB (64 bytes).
 __declspec(naked) void I444ToARGBRow_AVX2(
-    const uint8* y_buf,
-    const uint8* u_buf,
-    const uint8* v_buf,
-    uint8* dst_argb,
+    const uint8_t* y_buf,
+    const uint8_t* u_buf,
+    const uint8_t* v_buf,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2187,9 +2187,9 @@ __declspec(naked) void I444ToARGBRow_AVX2(
 // 16 pixels.
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
 __declspec(naked) void NV12ToARGBRow_AVX2(
-    const uint8* y_buf,
-    const uint8* uv_buf,
-    uint8* dst_argb,
+    const uint8_t* y_buf,
+    const uint8_t* uv_buf,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2222,9 +2222,9 @@ __declspec(naked) void NV12ToARGBRow_AVX2(
 // 16 pixels.
 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
 __declspec(naked) void NV21ToARGBRow_AVX2(
-    const uint8* y_buf,
-    const uint8* vu_buf,
-    uint8* dst_argb,
+    const uint8_t* y_buf,
+    const uint8_t* vu_buf,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2257,8 +2257,8 @@ __declspec(naked) void NV21ToARGBRow_AVX2(
 // 16 pixels.
 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
 __declspec(naked) void YUY2ToARGBRow_AVX2(
-    const uint8* src_yuy2,
-    uint8* dst_argb,
+    const uint8_t* src_yuy2,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2288,8 +2288,8 @@ __declspec(naked) void YUY2ToARGBRow_AVX2(
 // 16 pixels.
 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
 __declspec(naked) void UYVYToARGBRow_AVX2(
-    const uint8* src_uyvy,
-    uint8* dst_argb,
+    const uint8_t* src_uyvy,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2319,10 +2319,10 @@ __declspec(naked) void UYVYToARGBRow_AVX2(
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
 __declspec(naked) void I422ToRGBARow_AVX2(
-    const uint8* y_buf,
-    const uint8* u_buf,
-    const uint8* v_buf,
-    uint8* dst_argb,
+    const uint8_t* y_buf,
+    const uint8_t* u_buf,
+    const uint8_t* v_buf,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2551,10 +2551,10 @@ __declspec(naked) void I422ToRGBARow_AVX2(
 // 8 pixels.
 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
 __declspec(naked) void I444ToARGBRow_SSSE3(
-    const uint8* y_buf,
-    const uint8* u_buf,
-    const uint8* v_buf,
-    uint8* dst_argb,
+    const uint8_t* y_buf,
+    const uint8_t* u_buf,
+    const uint8_t* v_buf,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2588,10 +2588,10 @@ __declspec(naked) void I444ToARGBRow_SSSE3(
 // 8 pixels.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes).
 __declspec(naked) void I422ToRGB24Row_SSSE3(
-    const uint8* y_buf,
-    const uint8* u_buf,
-    const uint8* v_buf,
-    uint8* dst_rgb24,
+    const uint8_t* y_buf,
+    const uint8_t* u_buf,
+    const uint8_t* v_buf,
+    uint8_t* dst_rgb24,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2626,10 +2626,10 @@ __declspec(naked) void I422ToRGB24Row_SSSE3(
 // 8 pixels
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
 __declspec(naked) void I422ToRGB565Row_SSSE3(
-    const uint8* y_buf,
-    const uint8* u_buf,
-    const uint8* v_buf,
-    uint8* rgb565_buf,
+    const uint8_t* y_buf,
+    const uint8_t* u_buf,
+    const uint8_t* v_buf,
+    uint8_t* rgb565_buf,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2669,10 +2669,10 @@ __declspec(naked) void I422ToRGB565Row_SSSE3(
 // 8 pixels.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
 __declspec(naked) void I422ToARGBRow_SSSE3(
-    const uint8* y_buf,
-    const uint8* u_buf,
-    const uint8* v_buf,
-    uint8* dst_argb,
+    const uint8_t* y_buf,
+    const uint8_t* u_buf,
+    const uint8_t* v_buf,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2706,11 +2706,11 @@ __declspec(naked) void I422ToARGBRow_SSSE3(
 // 8 pixels.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB.
 __declspec(naked) void I422AlphaToARGBRow_SSSE3(
-    const uint8* y_buf,
-    const uint8* u_buf,
-    const uint8* v_buf,
-    const uint8* a_buf,
-    uint8* dst_argb,
+    const uint8_t* y_buf,
+    const uint8_t* u_buf,
+    const uint8_t* v_buf,
+    const uint8_t* a_buf,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2746,9 +2746,9 @@ __declspec(naked) void I422AlphaToARGBRow_SSSE3(
 // 8 pixels.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
 __declspec(naked) void NV12ToARGBRow_SSSE3(
-    const uint8* y_buf,
-    const uint8* uv_buf,
-    uint8* dst_argb,
+    const uint8_t* y_buf,
+    const uint8_t* uv_buf,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2778,9 +2778,9 @@ __declspec(naked) void NV12ToARGBRow_SSSE3(
 // 8 pixels.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
 __declspec(naked) void NV21ToARGBRow_SSSE3(
-    const uint8* y_buf,
-    const uint8* vu_buf,
-    uint8* dst_argb,
+    const uint8_t* y_buf,
+    const uint8_t* vu_buf,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2810,8 +2810,8 @@ __declspec(naked) void NV21ToARGBRow_SSSE3(
 // 8 pixels.
 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
 __declspec(naked) void YUY2ToARGBRow_SSSE3(
-    const uint8* src_yuy2,
-    uint8* dst_argb,
+    const uint8_t* src_yuy2,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2838,8 +2838,8 @@ __declspec(naked) void YUY2ToARGBRow_SSSE3(
 // 8 pixels.
 // 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
 __declspec(naked) void UYVYToARGBRow_SSSE3(
-    const uint8* src_uyvy,
-    uint8* dst_argb,
+    const uint8_t* src_uyvy,
+    uint8_t* dst_argb,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2864,10 +2864,10 @@ __declspec(naked) void UYVYToARGBRow_SSSE3(
 }
 
 __declspec(naked) void I422ToRGBARow_SSSE3(
-    const uint8* y_buf,
-    const uint8* u_buf,
-    const uint8* v_buf,
-    uint8* dst_rgba,
+    const uint8_t* y_buf,
+    const uint8_t* u_buf,
+    const uint8_t* v_buf,
+    uint8_t* dst_rgba,
     const struct YuvConstants* yuvconstants,
     int width) {
   __asm {
@@ -2900,8 +2900,8 @@ __declspec(naked) void I422ToRGBARow_SSSE3(
 
 #ifdef HAS_I400TOARGBROW_SSE2
 // 8 pixels of Y converted to 8 pixels of ARGB (32 bytes).
-__declspec(naked) void I400ToARGBRow_SSE2(const uint8* y_buf,
-                                          uint8* rgb_buf,
+__declspec(naked) void I400ToARGBRow_SSE2(const uint8_t* y_buf,
+                                          uint8_t* rgb_buf,
                                           int width) {
   __asm {
     mov        eax, 0x4a354a35  // 4a35 = 18997 = round(1.164 * 64 * 256)
@@ -2927,7 +2927,7 @@ __declspec(naked) void I400ToARGBRow_SSE2(const uint8* y_buf,
     psrlw      xmm0, 6
     packuswb   xmm0, xmm0        // G
 
-    // Step 2: Weave into ARGB
+        // Step 2: Weave into ARGB
     punpcklbw  xmm0, xmm0  // GG
     movdqa     xmm1, xmm0
     punpcklwd  xmm0, xmm0  // BGRA first 4 pixels
@@ -2947,8 +2947,8 @@ __declspec(naked) void I400ToARGBRow_SSE2(const uint8* y_buf,
 #ifdef HAS_I400TOARGBROW_AVX2
 // 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
 // note: vpunpcklbw mutates and vpackuswb unmutates.
-__declspec(naked) void I400ToARGBRow_AVX2(const uint8* y_buf,
-                                          uint8* rgb_buf,
+__declspec(naked) void I400ToARGBRow_AVX2(const uint8_t* y_buf,
+                                          uint8_t* rgb_buf,
                                           int width) {
   __asm {
     mov        eax, 0x4a354a35  // 4a35 = 18997 = round(1.164 * 64 * 256)
@@ -2975,8 +2975,8 @@ __declspec(naked) void I400ToARGBRow_AVX2(const uint8* y_buf,
     vpsrlw     ymm0, ymm0, 6
     vpackuswb  ymm0, ymm0, ymm0        // G.  still mutated: 3120
 
-    // TODO(fbarchard): Weave alpha with unpack.
-    // Step 2: Weave into ARGB
+        // TODO(fbarchard): Weave alpha with unpack.
+        // Step 2: Weave into ARGB
     vpunpcklbw ymm1, ymm0, ymm0  // GG - mutates
     vpermq     ymm1, ymm1, 0xd8
     vpunpcklwd ymm0, ymm1, ymm1  // GGGG first 8 pixels
@@ -3000,8 +3000,8 @@ static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u,
                                      7u,  6u,  5u,  4u,  3u,  2u,  1u, 0u};
 
 // TODO(fbarchard): Replace lea with -16 offset.
-__declspec(naked) void MirrorRow_SSSE3(const uint8* src,
-                                       uint8* dst,
+__declspec(naked) void MirrorRow_SSSE3(const uint8_t* src,
+                                       uint8_t* dst,
                                        int width) {
   __asm {
     mov       eax, [esp + 4]  // src
@@ -3022,7 +3022,9 @@ __declspec(naked) void MirrorRow_SSSE3(const uint8* src,
 #endif  // HAS_MIRRORROW_SSSE3
 
 #ifdef HAS_MIRRORROW_AVX2
-__declspec(naked) void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
+__declspec(naked) void MirrorRow_AVX2(const uint8_t* src,
+                                      uint8_t* dst,
+                                      int width) {
   __asm {
     mov       eax, [esp + 4]  // src
     mov       edx, [esp + 8]  // dst
@@ -3048,9 +3050,9 @@ __declspec(naked) void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
 static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u,
                                        15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u};
 
-__declspec(naked) void MirrorUVRow_SSSE3(const uint8* src,
-                                         uint8* dst_u,
-                                         uint8* dst_v,
+__declspec(naked) void MirrorUVRow_SSSE3(const uint8_t* src,
+                                         uint8_t* dst_u,
+                                         uint8_t* dst_v,
                                          int width) {
   __asm {
     push      edi
@@ -3079,8 +3081,8 @@ __declspec(naked) void MirrorUVRow_SSSE3(const uint8* src,
 #endif  // HAS_MIRRORUVROW_SSSE3
 
 #ifdef HAS_ARGBMIRRORROW_SSE2
-__declspec(naked) void ARGBMirrorRow_SSE2(const uint8* src,
-                                          uint8* dst,
+__declspec(naked) void ARGBMirrorRow_SSE2(const uint8_t* src,
+                                          uint8_t* dst,
                                           int width) {
   __asm {
     mov       eax, [esp + 4]  // src
@@ -3105,8 +3107,8 @@ __declspec(naked) void ARGBMirrorRow_SSE2(const uint8* src,
 // Shuffle table for reversing the bytes.
 static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
 
-__declspec(naked) void ARGBMirrorRow_AVX2(const uint8* src,
-                                          uint8* dst,
+__declspec(naked) void ARGBMirrorRow_AVX2(const uint8_t* src,
+                                          uint8_t* dst,
                                           int width) {
   __asm {
     mov       eax, [esp + 4]  // src
@@ -3127,9 +3129,9 @@ __declspec(naked) void ARGBMirrorRow_AVX2(const uint8* src,
 #endif  // HAS_ARGBMIRRORROW_AVX2
 
 #ifdef HAS_SPLITUVROW_SSE2
-__declspec(naked) void SplitUVRow_SSE2(const uint8* src_uv,
-                                       uint8* dst_u,
-                                       uint8* dst_v,
+__declspec(naked) void SplitUVRow_SSE2(const uint8_t* src_uv,
+                                       uint8_t* dst_u,
+                                       uint8_t* dst_v,
                                        int width) {
   __asm {
     push       edi
@@ -3167,9 +3169,9 @@ __declspec(naked) void SplitUVRow_SSE2(const uint8* src_uv,
 #endif  // HAS_SPLITUVROW_SSE2
 
 #ifdef HAS_SPLITUVROW_AVX2
-__declspec(naked) void SplitUVRow_AVX2(const uint8* src_uv,
-                                       uint8* dst_u,
-                                       uint8* dst_v,
+__declspec(naked) void SplitUVRow_AVX2(const uint8_t* src_uv,
+                                       uint8_t* dst_u,
+                                       uint8_t* dst_v,
                                        int width) {
   __asm {
     push       edi
@@ -3207,9 +3209,9 @@ __declspec(naked) void SplitUVRow_AVX2(const uint8* src_uv,
 #endif  // HAS_SPLITUVROW_AVX2
 
 #ifdef HAS_MERGEUVROW_SSE2
-__declspec(naked) void MergeUVRow_SSE2(const uint8* src_u,
-                                       const uint8* src_v,
-                                       uint8* dst_uv,
+__declspec(naked) void MergeUVRow_SSE2(const uint8_t* src_u,
+                                       const uint8_t* src_v,
+                                       uint8_t* dst_uv,
                                        int width) {
   __asm {
     push       edi
@@ -3239,9 +3241,9 @@ __declspec(naked) void MergeUVRow_SSE2(const uint8* src_u,
 #endif  //  HAS_MERGEUVROW_SSE2
 
 #ifdef HAS_MERGEUVROW_AVX2
-__declspec(naked) void MergeUVRow_AVX2(const uint8* src_u,
-                                       const uint8* src_v,
-                                       uint8* dst_uv,
+__declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u,
+                                       const uint8_t* src_v,
+                                       uint8_t* dst_uv,
                                        int width) {
   __asm {
     push       edi
@@ -3273,12 +3275,14 @@ __declspec(naked) void MergeUVRow_AVX2(const uint8* src_u,
 #endif  //  HAS_MERGEUVROW_AVX2
 
 #ifdef HAS_COPYROW_SSE2
-// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
-__declspec(naked) void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
+// CopyRow copys 'width' bytes using a 16 byte load/store, 32 bytes at time.
+__declspec(naked) void CopyRow_SSE2(const uint8_t* src,
+                                    uint8_t* dst,
+                                    int width) {
   __asm {
     mov        eax, [esp + 4]  // src
     mov        edx, [esp + 8]  // dst
-    mov        ecx, [esp + 12]  // count
+    mov        ecx, [esp + 12]  // width
     test       eax, 15
     jne        convertloopu
     test       edx, 15
@@ -3310,12 +3314,14 @@ __declspec(naked) void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
 #endif  // HAS_COPYROW_SSE2
 
 #ifdef HAS_COPYROW_AVX
-// CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time.
-__declspec(naked) void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
+// CopyRow copys 'width' bytes using a 32 byte load/store, 64 bytes at time.
+__declspec(naked) void CopyRow_AVX(const uint8_t* src,
+                                   uint8_t* dst,
+                                   int width) {
   __asm {
     mov        eax, [esp + 4]  // src
     mov        edx, [esp + 8]  // dst
-    mov        ecx, [esp + 12]  // count
+    mov        ecx, [esp + 12]  // width
 
   convertloop:
     vmovdqu    ymm0, [eax]
@@ -3334,13 +3340,15 @@ __declspec(naked) void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
 #endif  // HAS_COPYROW_AVX
 
 // Multiple of 1.
-__declspec(naked) void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
+__declspec(naked) void CopyRow_ERMS(const uint8_t* src,
+                                    uint8_t* dst,
+                                    int width) {
   __asm {
     mov        eax, esi
     mov        edx, edi
     mov        esi, [esp + 4]  // src
     mov        edi, [esp + 8]  // dst
-    mov        ecx, [esp + 12]  // count
+    mov        ecx, [esp + 12]  // width
     rep movsb
     mov        edi, edx
     mov        esi, eax
@@ -3350,13 +3358,13 @@ __declspec(naked) void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
 
 #ifdef HAS_ARGBCOPYALPHAROW_SSE2
 // width in pixels
-__declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8* src,
-                                             uint8* dst,
+__declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8_t* src,
+                                             uint8_t* dst,
                                              int width) {
   __asm {
     mov        eax, [esp + 4]  // src
     mov        edx, [esp + 8]  // dst
-    mov        ecx, [esp + 12]  // count
+    mov        ecx, [esp + 12]  // width
     pcmpeqb    xmm0, xmm0  // generate mask 0xff000000
     pslld      xmm0, 24
     pcmpeqb    xmm1, xmm1  // generate mask 0x00ffffff
@@ -3387,13 +3395,13 @@ __declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8* src,
 
 #ifdef HAS_ARGBCOPYALPHAROW_AVX2
 // width in pixels
-__declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8* src,
-                                             uint8* dst,
+__declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8_t* src,
+                                             uint8_t* dst,
                                              int width) {
   __asm {
     mov        eax, [esp + 4]  // src
     mov        edx, [esp + 8]  // dst
-    mov        ecx, [esp + 12]  // count
+    mov        ecx, [esp + 12]  // width
     vpcmpeqb   ymm0, ymm0, ymm0
     vpsrld     ymm0, ymm0, 8  // generate mask 0x00ffffff
 
@@ -3417,8 +3425,8 @@ __declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8* src,
 
 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
 // width in pixels
-__declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8* src_argb,
-                                                uint8* dst_a,
+__declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb,
+                                                uint8_t* dst_a,
                                                 int width) {
   __asm {
     mov        eax, [esp + 4]  // src_argb
@@ -3445,8 +3453,8 @@ __declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8* src_argb,
 
 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
 // width in pixels
-__declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8* src_argb,
-                                                uint8* dst_a,
+__declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb,
+                                                uint8_t* dst_a,
                                                 int width) {
   __asm {
     mov        eax, [esp + 4]  // src_argb
@@ -3481,13 +3489,13 @@ __declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8* src_argb,
 
 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
 // width in pixels
-__declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8* src,
-                                                uint8* dst,
+__declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src,
+                                                uint8_t* dst,
                                                 int width) {
   __asm {
     mov        eax, [esp + 4]  // src
     mov        edx, [esp + 8]  // dst
-    mov        ecx, [esp + 12]  // count
+    mov        ecx, [esp + 12]  // width
     pcmpeqb    xmm0, xmm0  // generate mask 0xff000000
     pslld      xmm0, 24
     pcmpeqb    xmm1, xmm1  // generate mask 0x00ffffff
@@ -3520,13 +3528,13 @@ __declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8* src,
 
 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
 // width in pixels
-__declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8* src,
-                                                uint8* dst,
+__declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src,
+                                                uint8_t* dst,
                                                 int width) {
   __asm {
     mov        eax, [esp + 4]  // src
     mov        edx, [esp + 8]  // dst
-    mov        ecx, [esp + 12]  // count
+    mov        ecx, [esp + 12]  // width
     vpcmpeqb   ymm0, ymm0, ymm0
     vpsrld     ymm0, ymm0, 8  // generate mask 0x00ffffff
 
@@ -3551,16 +3559,16 @@ __declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8* src,
 #endif  // HAS_ARGBCOPYYTOALPHAROW_AVX2
 
 #ifdef HAS_SETROW_X86
-// Write 'count' bytes using an 8 bit value repeated.
-// Count should be multiple of 4.
-__declspec(naked) void SetRow_X86(uint8* dst, uint8 v8, int count) {
+// Write 'width' bytes using an 8 bit value repeated.
+// width should be multiple of 4.
+__declspec(naked) void SetRow_X86(uint8_t* dst, uint8_t v8, int width) {
   __asm {
     movzx      eax, byte ptr [esp + 8]  // v8
     mov        edx, 0x01010101  // Duplicate byte to all bytes.
     mul        edx  // overwrites edx with upper part of result.
     mov        edx, edi
     mov        edi, [esp + 4]  // dst
-    mov        ecx, [esp + 12]  // count
+    mov        ecx, [esp + 12]  // width
     shr        ecx, 2
     rep stosd
     mov        edi, edx
@@ -3568,26 +3576,28 @@ __declspec(naked) void SetRow_X86(uint8* dst, uint8 v8, int count) {
   }
 }
 
-// Write 'count' bytes using an 8 bit value repeated.
-__declspec(naked) void SetRow_ERMS(uint8* dst, uint8 v8, int count) {
+// Write 'width' bytes using an 8 bit value repeated.
+__declspec(naked) void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) {
   __asm {
     mov        edx, edi
     mov        edi, [esp + 4]  // dst
     mov        eax, [esp + 8]  // v8
-    mov        ecx, [esp + 12]  // count
+    mov        ecx, [esp + 12]  // width
     rep stosb
     mov        edi, edx
     ret
   }
 }
 
-// Write 'count' 32 bit values.
-__declspec(naked) void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) {
+// Write 'width' 32 bit values.
+__declspec(naked) void ARGBSetRow_X86(uint8_t* dst_argb,
+                                      uint32_t v32,
+                                      int width) {
   __asm {
     mov        edx, edi
     mov        edi, [esp + 4]  // dst
     mov        eax, [esp + 8]  // v32
-    mov        ecx, [esp + 12]  // count
+    mov        ecx, [esp + 12]  // width
     rep stosd
     mov        edi, edx
     ret
@@ -3596,8 +3606,8 @@ __declspec(naked) void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) {
 #endif  // HAS_SETROW_X86
 
 #ifdef HAS_YUY2TOYROW_AVX2
-__declspec(naked) void YUY2ToYRow_AVX2(const uint8* src_yuy2,
-                                       uint8* dst_y,
+__declspec(naked) void YUY2ToYRow_AVX2(const uint8_t* src_yuy2,
+                                       uint8_t* dst_y,
                                        int width) {
   __asm {
     mov        eax, [esp + 4]  // src_yuy2
@@ -3623,10 +3633,10 @@ __declspec(naked) void YUY2ToYRow_AVX2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) void YUY2ToUVRow_AVX2(const uint8* src_yuy2,
+__declspec(naked) void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
                                         int stride_yuy2,
-                                        uint8* dst_u,
-                                        uint8* dst_v,
+                                        uint8_t* dst_u,
+                                        uint8_t* dst_v,
                                         int width) {
   __asm {
     push       esi
@@ -3669,9 +3679,9 @@ __declspec(naked) void YUY2ToUVRow_AVX2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
-                                           uint8* dst_u,
-                                           uint8* dst_v,
+__declspec(naked) void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2,
+                                           uint8_t* dst_u,
+                                           uint8_t* dst_v,
                                            int width) {
   __asm {
     push       edi
@@ -3709,8 +3719,8 @@ __declspec(naked) void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) void UYVYToYRow_AVX2(const uint8* src_uyvy,
-                                       uint8* dst_y,
+__declspec(naked) void UYVYToYRow_AVX2(const uint8_t* src_uyvy,
+                                       uint8_t* dst_y,
                                        int width) {
   __asm {
     mov        eax, [esp + 4]  // src_uyvy
@@ -3734,10 +3744,10 @@ __declspec(naked) void UYVYToYRow_AVX2(const uint8* src_uyvy,
   }
 }
 
-__declspec(naked) void UYVYToUVRow_AVX2(const uint8* src_uyvy,
+__declspec(naked) void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
                                         int stride_uyvy,
-                                        uint8* dst_u,
-                                        uint8* dst_v,
+                                        uint8_t* dst_u,
+                                        uint8_t* dst_v,
                                         int width) {
   __asm {
     push       esi
@@ -3780,9 +3790,9 @@ __declspec(naked) void UYVYToUVRow_AVX2(const uint8* src_uyvy,
   }
 }
 
-__declspec(naked) void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
-                                           uint8* dst_u,
-                                           uint8* dst_v,
+__declspec(naked) void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy,
+                                           uint8_t* dst_u,
+                                           uint8_t* dst_v,
                                            int width) {
   __asm {
     push       edi
@@ -3822,8 +3832,8 @@ __declspec(naked) void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
 #endif  // HAS_YUY2TOYROW_AVX2
 
 #ifdef HAS_YUY2TOYROW_SSE2
-__declspec(naked) void YUY2ToYRow_SSE2(const uint8* src_yuy2,
-                                       uint8* dst_y,
+__declspec(naked) void YUY2ToYRow_SSE2(const uint8_t* src_yuy2,
+                                       uint8_t* dst_y,
                                        int width) {
   __asm {
     mov        eax, [esp + 4]  // src_yuy2
@@ -3847,10 +3857,10 @@ __declspec(naked) void YUY2ToYRow_SSE2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) void YUY2ToUVRow_SSE2(const uint8* src_yuy2,
+__declspec(naked) void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
                                         int stride_yuy2,
-                                        uint8* dst_u,
-                                        uint8* dst_v,
+                                        uint8_t* dst_u,
+                                        uint8_t* dst_v,
                                         int width) {
   __asm {
     push       esi
@@ -3892,9 +3902,9 @@ __declspec(naked) void YUY2ToUVRow_SSE2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
-                                           uint8* dst_u,
-                                           uint8* dst_v,
+__declspec(naked) void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2,
+                                           uint8_t* dst_u,
+                                           uint8_t* dst_v,
                                            int width) {
   __asm {
     push       edi
@@ -3929,8 +3939,8 @@ __declspec(naked) void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
   }
 }
 
-__declspec(naked) void UYVYToYRow_SSE2(const uint8* src_uyvy,
-                                       uint8* dst_y,
+__declspec(naked) void UYVYToYRow_SSE2(const uint8_t* src_uyvy,
+                                       uint8_t* dst_y,
                                        int width) {
   __asm {
     mov        eax, [esp + 4]  // src_uyvy
@@ -3952,10 +3962,10 @@ __declspec(naked) void UYVYToYRow_SSE2(const uint8* src_uyvy,
   }
 }
 
-__declspec(naked) void UYVYToUVRow_SSE2(const uint8* src_uyvy,
+__declspec(naked) void UYVYToUVRow_SSE2(const uint8_t* src_uyvy,
                                         int stride_uyvy,
-                                        uint8* dst_u,
-                                        uint8* dst_v,
+                                        uint8_t* dst_u,
+                                        uint8_t* dst_v,
                                         int width) {
   __asm {
     push       esi
@@ -3997,9 +4007,9 @@ __declspec(naked) void UYVYToUVRow_SSE2(const uint8* src_uyvy,
   }
 }
 
-__declspec(naked) void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
-                                           uint8* dst_u,
-                                           uint8* dst_v,
+__declspec(naked) void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy,
+                                           uint8_t* dst_u,
+                                           uint8_t* dst_v,
                                            int width) {
   __asm {
     push       edi
@@ -4041,10 +4051,10 @@ __declspec(naked) void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
 // =((A2*C2)+(B2*(255-C2))+255)/256
 // signed version of math
 // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
-__declspec(naked) void BlendPlaneRow_SSSE3(const uint8* src0,
-                                           const uint8* src1,
-                                           const uint8* alpha,
-                                           uint8* dst,
+__declspec(naked) void BlendPlaneRow_SSSE3(const uint8_t* src0,
+                                           const uint8_t* src1,
+                                           const uint8_t* alpha,
+                                           uint8_t* dst,
                                            int width) {
   __asm {
     push       esi
@@ -4067,7 +4077,7 @@ __declspec(naked) void BlendPlaneRow_SSSE3(const uint8* src0,
     sub        edx, esi
     sub        edi, esi
 
-    // 8 pixel loop.
+        // 8 pixel loop.
   convertloop8:
     movq       xmm0, qword ptr [esi]  // alpha
     punpcklbw  xmm0, xmm0
@@ -4098,10 +4108,10 @@ __declspec(naked) void BlendPlaneRow_SSSE3(const uint8* src0,
 // =((A2*C2)+(B2*(255-C2))+255)/256
 // signed version of math
 // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
-__declspec(naked) void BlendPlaneRow_AVX2(const uint8* src0,
-                                          const uint8* src1,
-                                          const uint8* alpha,
-                                          uint8* dst,
+__declspec(naked) void BlendPlaneRow_AVX2(const uint8_t* src0,
+                                          const uint8_t* src1,
+                                          const uint8_t* alpha,
+                                          uint8_t* dst,
                                           int width) {
   __asm {
     push        esi
@@ -4123,7 +4133,7 @@ __declspec(naked) void BlendPlaneRow_AVX2(const uint8* src0,
     sub         edx, esi
     sub         edi, esi
 
-    // 32 pixel loop.
+        // 32 pixel loop.
   convertloop32:
     vmovdqu     ymm0, [esi]  // alpha
     vpunpckhbw  ymm3, ymm0, ymm0  // 8..15, 24..31
@@ -4162,9 +4172,9 @@ static const uvec8 kShuffleAlpha = {3u,  0x80, 3u,  0x80, 7u,  0x80, 7u,  0x80,
                                     11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80};
 
 // Blend 8 pixels at a time.
-__declspec(naked) void ARGBBlendRow_SSSE3(const uint8* src_argb0,
-                                          const uint8* src_argb1,
-                                          uint8* dst_argb,
+__declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
+                                          const uint8_t* src_argb1,
+                                          uint8_t* dst_argb,
                                           int width) {
   __asm {
     push       esi
@@ -4183,7 +4193,7 @@ __declspec(naked) void ARGBBlendRow_SSSE3(const uint8* src_argb0,
     sub        ecx, 4
     jl         convertloop4b  // less than 4 pixels?
 
-    // 4 pixel loop.
+        // 4 pixel loop.
   convertloop4:
     movdqu     xmm3, [eax]  // src argb
     lea        eax, [eax + 16]
@@ -4212,7 +4222,7 @@ __declspec(naked) void ARGBBlendRow_SSSE3(const uint8* src_argb0,
     add        ecx, 4 - 1
     jl         convertloop1b
 
-    // 1 pixel loop.
+            // 1 pixel loop.
   convertloop1:
     movd       xmm3, [eax]  // src argb
     lea        eax, [eax + 4]
@@ -4253,8 +4263,8 @@ static const uvec8 kShuffleAlpha1 = {
     11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
     15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
 };
-__declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8* src_argb,
-                                              uint8* dst_argb,
+__declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
+                                              uint8_t* dst_argb,
                                               int width) {
   __asm {
     mov        eax, [esp + 4]  // src_argb0
@@ -4298,8 +4308,8 @@ __declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8* src_argb,
 static const uvec8 kShuffleAlpha_AVX2 = {6u,   7u,   6u,   7u,  6u,  7u,
                                          128u, 128u, 14u,  15u, 14u, 15u,
                                          14u,  15u,  128u, 128u};
-__declspec(naked) void ARGBAttenuateRow_AVX2(const uint8* src_argb,
-                                             uint8* dst_argb,
+__declspec(naked) void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
+                                             uint8_t* dst_argb,
                                              int width) {
   __asm {
     mov        eax, [esp + 4]  // src_argb0
@@ -4336,8 +4346,8 @@ __declspec(naked) void ARGBAttenuateRow_AVX2(const uint8* src_argb,
 
 #ifdef HAS_ARGBUNATTENUATEROW_SSE2
 // Unattenuate 4 pixels at a time.
-__declspec(naked) void ARGBUnattenuateRow_SSE2(const uint8* src_argb,
-                                               uint8* dst_argb,
+__declspec(naked) void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
+                                               uint8_t* dst_argb,
                                                int width) {
   __asm {
     push       ebx
@@ -4392,8 +4402,8 @@ static const uvec8 kUnattenShuffleAlpha_AVX2 = {
 // TODO(fbarchard): Enable USE_GATHER for future hardware if faster.
 // USE_GATHER is not on by default, due to being a slow instruction.
 #ifdef USE_GATHER
-__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb,
-                                               uint8* dst_argb,
+__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
+                                               uint8_t* dst_argb,
                                                int width) {
   __asm {
     mov        eax, [esp + 4]  // src_argb0
@@ -4426,8 +4436,8 @@ __declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb,
   }
 }
 #else   // USE_GATHER
-__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb,
-                                               uint8* dst_argb,
+__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
+                                               uint8_t* dst_argb,
                                                int width) {
   __asm {
 
@@ -4495,8 +4505,8 @@ __declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8* src_argb,
 
 #ifdef HAS_ARGBGRAYROW_SSSE3
 // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
-__declspec(naked) void ARGBGrayRow_SSSE3(const uint8* src_argb,
-                                         uint8* dst_argb,
+__declspec(naked) void ARGBGrayRow_SSSE3(const uint8_t* src_argb,
+                                         uint8_t* dst_argb,
                                          int width) {
   __asm {
     mov        eax, [esp + 4] /* src_argb */
@@ -4552,7 +4562,7 @@ static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0,
                                    24, 98, 50, 0, 24, 98, 50, 0};
 
 // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-__declspec(naked) void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
+__declspec(naked) void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width) {
   __asm {
     mov        eax, [esp + 4] /* dst_argb */
     mov        ecx, [esp + 8] /* width */
@@ -4608,9 +4618,9 @@ __declspec(naked) void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
 // Same as Sepia except matrix is provided.
 // TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
 // and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
-__declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8* src_argb,
-                                                uint8* dst_argb,
-                                                const int8* matrix_argb,
+__declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb,
+                                                uint8_t* dst_argb,
+                                                const int8_t* matrix_argb,
                                                 int width) {
   __asm {
     mov        eax, [esp + 4] /* src_argb */
@@ -4670,7 +4680,7 @@ __declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8* src_argb,
 
 #ifdef HAS_ARGBQUANTIZEROW_SSE2
 // Quantize 4 ARGB pixels (16 bytes).
-__declspec(naked) void ARGBQuantizeRow_SSE2(uint8* dst_argb,
+__declspec(naked) void ARGBQuantizeRow_SSE2(uint8_t* dst_argb,
                                             int scale,
                                             int interval_size,
                                             int interval_offset,
@@ -4717,10 +4727,10 @@ __declspec(naked) void ARGBQuantizeRow_SSE2(uint8* dst_argb,
 
 #ifdef HAS_ARGBSHADEROW_SSE2
 // Shade 4 pixels at a time by specified value.
-__declspec(naked) void ARGBShadeRow_SSE2(const uint8* src_argb,
-                                         uint8* dst_argb,
+__declspec(naked) void ARGBShadeRow_SSE2(const uint8_t* src_argb,
+                                         uint8_t* dst_argb,
                                          int width,
-                                         uint32 value) {
+                                         uint32_t value) {
   __asm {
     mov        eax, [esp + 4]  // src_argb
     mov        edx, [esp + 8]  // dst_argb
@@ -4752,9 +4762,9 @@ __declspec(naked) void ARGBShadeRow_SSE2(const uint8* src_argb,
 
 #ifdef HAS_ARGBMULTIPLYROW_SSE2
 // Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8* src_argb0,
-                                            const uint8* src_argb1,
-                                            uint8* dst_argb,
+__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
+                                            const uint8_t* src_argb1,
+                                            uint8_t* dst_argb,
                                             int width) {
   __asm {
     push       esi
@@ -4792,9 +4802,9 @@ __declspec(naked) void ARGBMultiplyRow_SSE2(const uint8* src_argb0,
 #ifdef HAS_ARGBADDROW_SSE2
 // Add 2 rows of ARGB pixels together, 4 pixels at a time.
 // TODO(fbarchard): Port this to posix, neon and other math functions.
-__declspec(naked) void ARGBAddRow_SSE2(const uint8* src_argb0,
-                                       const uint8* src_argb1,
-                                       uint8* dst_argb,
+__declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0,
+                                       const uint8_t* src_argb1,
+                                       uint8_t* dst_argb,
                                        int width) {
   __asm {
     push       esi
@@ -4841,9 +4851,9 @@ __declspec(naked) void ARGBAddRow_SSE2(const uint8* src_argb0,
 
 #ifdef HAS_ARGBSUBTRACTROW_SSE2
 // Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) void ARGBSubtractRow_SSE2(const uint8* src_argb0,
-                                            const uint8* src_argb1,
-                                            uint8* dst_argb,
+__declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
+                                            const uint8_t* src_argb1,
+                                            uint8_t* dst_argb,
                                             int width) {
   __asm {
     push       esi
@@ -4871,9 +4881,9 @@ __declspec(naked) void ARGBSubtractRow_SSE2(const uint8* src_argb0,
 
 #ifdef HAS_ARGBMULTIPLYROW_AVX2
 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8* src_argb0,
-                                            const uint8* src_argb1,
-                                            uint8* dst_argb,
+__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
+                                            const uint8_t* src_argb1,
+                                            uint8_t* dst_argb,
                                             int width) {
   __asm {
     push       esi
@@ -4909,9 +4919,9 @@ __declspec(naked) void ARGBMultiplyRow_AVX2(const uint8* src_argb0,
 
 #ifdef HAS_ARGBADDROW_AVX2
 // Add 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) void ARGBAddRow_AVX2(const uint8* src_argb0,
-                                       const uint8* src_argb1,
-                                       uint8* dst_argb,
+__declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb0,
+                                       const uint8_t* src_argb1,
+                                       uint8_t* dst_argb,
                                        int width) {
   __asm {
     push       esi
@@ -4939,9 +4949,9 @@ __declspec(naked) void ARGBAddRow_AVX2(const uint8* src_argb0,
 
 #ifdef HAS_ARGBSUBTRACTROW_AVX2
 // Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) void ARGBSubtractRow_AVX2(const uint8* src_argb0,
-                                            const uint8* src_argb1,
-                                            uint8* dst_argb,
+__declspec(naked) void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
+                                            const uint8_t* src_argb1,
+                                            uint8_t* dst_argb,
                                             int width) {
   __asm {
     push       esi
@@ -4972,10 +4982,10 @@ __declspec(naked) void ARGBSubtractRow_AVX2(const uint8* src_argb0,
 // -1  0  1
 // -2  0  2
 // -1  0  1
-__declspec(naked) void SobelXRow_SSE2(const uint8* src_y0,
-                                      const uint8* src_y1,
-                                      const uint8* src_y2,
-                                      uint8* dst_sobelx,
+__declspec(naked) void SobelXRow_SSE2(const uint8_t* src_y0,
+                                      const uint8_t* src_y1,
+                                      const uint8_t* src_y2,
+                                      uint8_t* dst_sobelx,
                                       int width) {
   __asm {
     push       esi
@@ -5030,9 +5040,9 @@ __declspec(naked) void SobelXRow_SSE2(const uint8* src_y0,
 // -1 -2 -1
 //  0  0  0
 //  1  2  1
-__declspec(naked) void SobelYRow_SSE2(const uint8* src_y0,
-                                      const uint8* src_y1,
-                                      uint8* dst_sobely,
+__declspec(naked) void SobelYRow_SSE2(const uint8_t* src_y0,
+                                      const uint8_t* src_y1,
+                                      uint8_t* dst_sobely,
                                       int width) {
   __asm {
     push       esi
@@ -5084,9 +5094,9 @@ __declspec(naked) void SobelYRow_SSE2(const uint8* src_y0,
 // R = Sobel
 // G = Sobel
 // B = Sobel
-__declspec(naked) void SobelRow_SSE2(const uint8* src_sobelx,
-                                     const uint8* src_sobely,
-                                     uint8* dst_argb,
+__declspec(naked) void SobelRow_SSE2(const uint8_t* src_sobelx,
+                                     const uint8_t* src_sobely,
+                                     uint8_t* dst_argb,
                                      int width) {
   __asm {
     push       esi
@@ -5132,9 +5142,9 @@ __declspec(naked) void SobelRow_SSE2(const uint8* src_sobelx,
 
 #ifdef HAS_SOBELTOPLANEROW_SSE2
 // Adds Sobel X and Sobel Y and stores Sobel into a plane.
-__declspec(naked) void SobelToPlaneRow_SSE2(const uint8* src_sobelx,
-                                            const uint8* src_sobely,
-                                            uint8* dst_y,
+__declspec(naked) void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx,
+                                            const uint8_t* src_sobely,
+                                            uint8_t* dst_y,
                                             int width) {
   __asm {
     push       esi
@@ -5166,9 +5176,9 @@ __declspec(naked) void SobelToPlaneRow_SSE2(const uint8* src_sobelx,
 // R = Sobel X
 // G = Sobel
 // B = Sobel Y
-__declspec(naked) void SobelXYRow_SSE2(const uint8* src_sobelx,
-                                       const uint8* src_sobely,
-                                       uint8* dst_argb,
+__declspec(naked) void SobelXYRow_SSE2(const uint8_t* src_sobelx,
+                                       const uint8_t* src_sobely,
+                                       uint8_t* dst_argb,
                                        int width) {
   __asm {
     push       esi
@@ -5225,11 +5235,11 @@ __declspec(naked) void SobelXYRow_SSE2(const uint8* src_sobelx,
 // count is number of averaged pixels to produce.
 // Does 4 pixels at a time.
 // This function requires alignment on accumulation buffer pointers.
-void CumulativeSumToAverageRow_SSE2(const int32* topleft,
-                                    const int32* botleft,
+void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
+                                    const int32_t* botleft,
                                     int width,
                                     int area,
-                                    uint8* dst,
+                                    uint8_t* dst,
                                     int count) {
   __asm {
     mov        eax, topleft  // eax topleft
@@ -5256,7 +5266,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft,
     cvtps2dq   xmm5, xmm5  // 0.16 fixed point
     packssdw   xmm5, xmm5  // 16 bit shorts
 
-    // 4 pixel loop small blocks.
+        // 4 pixel loop small blocks.
   s4:
         // top left
     movdqu     xmm0, [eax]
@@ -5298,7 +5308,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft,
 
     jmp        l4b
 
-    // 4 pixel loop
+            // 4 pixel loop
   l4:
         // top left
     movdqu     xmm0, [eax]
@@ -5350,7 +5360,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft,
     add        ecx, 4 - 1
     jl         l1b
 
-    // 1 pixel loop
+            // 1 pixel loop
   l1:
     movdqu     xmm0, [eax]
     psubd      xmm0, [eax + edx * 4]
@@ -5375,9 +5385,9 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft,
 #ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
 // Creates a table of cumulative sums where each value is a sum of all values
 // above and to the left of the value.
-void ComputeCumulativeSumRow_SSE2(const uint8* row,
-                                  int32* cumsum,
-                                  const int32* previous_cumsum,
+void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
+                                  int32_t* cumsum,
+                                  const int32_t* previous_cumsum,
                                   int width) {
   __asm {
     mov        eax, row
@@ -5392,7 +5402,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row,
     test       edx, 15
     jne        l4b
 
-    // 4 pixel loop
+        // 4 pixel loop
   l4:
     movdqu     xmm2, [eax]  // 4 argb pixels 16 bytes.
     lea        eax, [eax + 16]
@@ -5438,9 +5448,9 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row,
     add        ecx, 4 - 1
     jl         l1b
 
-    // 1 pixel loop
+            // 1 pixel loop
   l1:
-    movd       xmm2, dword ptr [eax]  // 1 argb pixel 4 bytes.
+    movd       xmm2, dword ptr [eax]  // 1 argb pixel, 4 bytes.
     lea        eax, [eax + 4]
     punpcklbw  xmm2, xmm1
     punpcklwd  xmm2, xmm1
@@ -5460,9 +5470,9 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row,
 
 #ifdef HAS_ARGBAFFINEROW_SSE2
 // Copy ARGB pixels from source image with slope to a row of destination.
-__declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb,
+__declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb,
                                                      int src_argb_stride,
-                                                     uint8* dst_argb,
+                                                     uint8_t* dst_argb,
                                                      const float* uv_dudv,
                                                      int width) {
   __asm {
@@ -5481,7 +5491,7 @@ __declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb,
     sub        ecx, 4
     jl         l4b
 
-    // setup for 4 pixel loop
+        // setup for 4 pixel loop
     pshufd     xmm7, xmm7, 0x44  // dup dudv
     pshufd     xmm5, xmm5, 0  // dup 4, stride
     movdqa     xmm0, xmm2  // x0, y0, x1, y1
@@ -5493,7 +5503,7 @@ __declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb,
     addps      xmm3, xmm4
     addps      xmm4, xmm4  // dudv *= 4
 
-    // 4 pixel loop
+        // 4 pixel loop
   l4:
     cvttps2dq  xmm0, xmm2  // x, y float to int first 2
     cvttps2dq  xmm1, xmm3  // x, y float to int next 2
@@ -5524,7 +5534,7 @@ __declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb,
     add        ecx, 4 - 1
     jl         l1b
 
-    // 1 pixel loop
+            // 1 pixel loop
   l1:
     cvttps2dq  xmm0, xmm2  // x, y float to int
     packssdw   xmm0, xmm0  // x, y as shorts
@@ -5546,8 +5556,8 @@ __declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb,
 
 #ifdef HAS_INTERPOLATEROW_AVX2
 // Bilinear filter 32x2 -> 32x1
-__declspec(naked) void InterpolateRow_AVX2(uint8* dst_ptr,
-                                           const uint8* src_ptr,
+__declspec(naked) void InterpolateRow_AVX2(uint8_t* dst_ptr,
+                                           const uint8_t* src_ptr,
                                            ptrdiff_t src_stride,
                                            int dst_width,
                                            int source_y_fraction) {
@@ -5598,7 +5608,7 @@ __declspec(naked) void InterpolateRow_AVX2(uint8* dst_ptr,
     jg         xloop
     jmp        xloop99
 
-    // Blend 50 / 50.
+        // Blend 50 / 50.
  xloop50:
    vmovdqu    ymm0, [esi]
    vpavgb     ymm0, ymm0, [esi + edx]
@@ -5608,7 +5618,7 @@ __declspec(naked) void InterpolateRow_AVX2(uint8* dst_ptr,
    jg         xloop50
    jmp        xloop99
 
-    // Blend 100 / 0 - Copy row unchanged.
+        // Blend 100 / 0 - Copy row unchanged.
  xloop100:
    rep movsb
 
@@ -5623,8 +5633,8 @@ __declspec(naked) void InterpolateRow_AVX2(uint8* dst_ptr,
 
 // Bilinear filter 16x2 -> 16x1
 // TODO(fbarchard): Consider allowing 256 using memcpy.
-__declspec(naked) void InterpolateRow_SSSE3(uint8* dst_ptr,
-                                            const uint8* src_ptr,
+__declspec(naked) void InterpolateRow_SSSE3(uint8_t* dst_ptr,
+                                            const uint8_t* src_ptr,
                                             ptrdiff_t src_stride,
                                             int dst_width,
                                             int source_y_fraction) {
@@ -5638,7 +5648,7 @@ __declspec(naked) void InterpolateRow_SSSE3(uint8* dst_ptr,
     mov        ecx, [esp + 8 + 16]  // dst_width
     mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
     sub        edi, esi
-    // Dispatch to specialized filters if applicable.
+        // Dispatch to specialized filters if applicable.
     cmp        eax, 0
     je         xloop100  // 0 /256.  Blend 100 / 0.
     cmp        eax, 128
@@ -5678,7 +5688,7 @@ __declspec(naked) void InterpolateRow_SSSE3(uint8* dst_ptr,
     jg         xloop
     jmp        xloop99
 
-    // Blend 50 / 50.
+        // Blend 50 / 50.
   xloop50:
     movdqu     xmm0, [esi]
     movdqu     xmm1, [esi + edx]
@@ -5689,7 +5699,7 @@ __declspec(naked) void InterpolateRow_SSSE3(uint8* dst_ptr,
     jg         xloop50
     jmp        xloop99
 
-    // Blend 100 / 0 - Copy row unchanged.
+        // Blend 100 / 0 - Copy row unchanged.
   xloop100:
     movdqu     xmm0, [esi]
     movdqu     [esi + edi], xmm0
@@ -5705,9 +5715,9 @@ __declspec(naked) void InterpolateRow_SSSE3(uint8* dst_ptr,
 }
 
 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-__declspec(naked) void ARGBShuffleRow_SSSE3(const uint8* src_argb,
-                                            uint8* dst_argb,
-                                            const uint8* shuffler,
+__declspec(naked) void ARGBShuffleRow_SSSE3(const uint8_t* src_argb,
+                                            uint8_t* dst_argb,
+                                            const uint8_t* shuffler,
                                             int width) {
   __asm {
     mov        eax, [esp + 4]  // src_argb
@@ -5732,9 +5742,9 @@ __declspec(naked) void ARGBShuffleRow_SSSE3(const uint8* src_argb,
 }
 
 #ifdef HAS_ARGBSHUFFLEROW_AVX2
-__declspec(naked) void ARGBShuffleRow_AVX2(const uint8* src_argb,
-                                           uint8* dst_argb,
-                                           const uint8* shuffler,
+__declspec(naked) void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
+                                           uint8_t* dst_argb,
+                                           const uint8_t* shuffler,
                                            int width) {
   __asm {
     mov        eax, [esp + 4]  // src_argb
@@ -5761,133 +5771,16 @@ __declspec(naked) void ARGBShuffleRow_AVX2(const uint8* src_argb,
 }
 #endif  // HAS_ARGBSHUFFLEROW_AVX2
 
-__declspec(naked) void ARGBShuffleRow_SSE2(const uint8* src_argb,
-                                           uint8* dst_argb,
-                                           const uint8* shuffler,
-                                           int width) {
-  __asm {
-    push       ebx
-    push       esi
-    mov        eax, [esp + 8 + 4]  // src_argb
-    mov        edx, [esp + 8 + 8]  // dst_argb
-    mov        esi, [esp + 8 + 12]  // shuffler
-    mov        ecx, [esp + 8 + 16]  // width
-    pxor       xmm5, xmm5
-
-    mov        ebx, [esi]  // shuffler
-    cmp        ebx, 0x03000102
-    je         shuf_3012
-    cmp        ebx, 0x00010203
-    je         shuf_0123
-    cmp        ebx, 0x00030201
-    je         shuf_0321
-    cmp        ebx, 0x02010003
-    je         shuf_2103
-
-    // TODO(fbarchard): Use one source pointer and 3 offsets.
-  shuf_any1:
-    movzx      ebx, byte ptr [esi]
-    movzx      ebx, byte ptr [eax + ebx]
-    mov        [edx], bl
-    movzx      ebx, byte ptr [esi + 1]
-    movzx      ebx, byte ptr [eax + ebx]
-    mov        [edx + 1], bl
-    movzx      ebx, byte ptr [esi + 2]
-    movzx      ebx, byte ptr [eax + ebx]
-    mov        [edx + 2], bl
-    movzx      ebx, byte ptr [esi + 3]
-    movzx      ebx, byte ptr [eax + ebx]
-    mov        [edx + 3], bl
-    lea        eax, [eax + 4]
-    lea        edx, [edx + 4]
-    sub        ecx, 1
-    jg         shuf_any1
-    jmp        shuf99
-
-  shuf_0123:
-    movdqu     xmm0, [eax]
-    lea        eax, [eax + 16]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm5
-    punpckhbw  xmm1, xmm5
-    pshufhw    xmm0, xmm0, 01Bh  // 1B = 00011011 = 0x0123 = BGRAToARGB
-    pshuflw    xmm0, xmm0, 01Bh
-    pshufhw    xmm1, xmm1, 01Bh
-    pshuflw    xmm1, xmm1, 01Bh
-    packuswb   xmm0, xmm1
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    sub        ecx, 4
-    jg         shuf_0123
-    jmp        shuf99
-
-  shuf_0321:
-    movdqu     xmm0, [eax]
-    lea        eax, [eax + 16]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm5
-    punpckhbw  xmm1, xmm5
-    pshufhw    xmm0, xmm0, 039h  // 39 = 00111001 = 0x0321 = RGBAToARGB
-    pshuflw    xmm0, xmm0, 039h
-    pshufhw    xmm1, xmm1, 039h
-    pshuflw    xmm1, xmm1, 039h
-    packuswb   xmm0, xmm1
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    sub        ecx, 4
-    jg         shuf_0321
-    jmp        shuf99
-
-  shuf_2103:
-    movdqu     xmm0, [eax]
-    lea        eax, [eax + 16]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm5
-    punpckhbw  xmm1, xmm5
-    pshufhw    xmm0, xmm0, 093h  // 93 = 10010011 = 0x2103 = ARGBToRGBA
-    pshuflw    xmm0, xmm0, 093h
-    pshufhw    xmm1, xmm1, 093h
-    pshuflw    xmm1, xmm1, 093h
-    packuswb   xmm0, xmm1
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    sub        ecx, 4
-    jg         shuf_2103
-    jmp        shuf99
-
-  shuf_3012:
-    movdqu     xmm0, [eax]
-    lea        eax, [eax + 16]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm5
-    punpckhbw  xmm1, xmm5
-    pshufhw    xmm0, xmm0, 0C6h  // C6 = 11000110 = 0x3012 = ABGRToARGB
-    pshuflw    xmm0, xmm0, 0C6h
-    pshufhw    xmm1, xmm1, 0C6h
-    pshuflw    xmm1, xmm1, 0C6h
-    packuswb   xmm0, xmm1
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    sub        ecx, 4
-    jg         shuf_3012
-
-  shuf99:
-    pop        esi
-    pop        ebx
-    ret
-  }
-}
-
 // YUY2 - Macro-pixel = 2 image pixels
 // Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
 
 // UYVY - Macro-pixel = 2 image pixels
 // U0Y0V0Y1
 
-__declspec(naked) void I422ToYUY2Row_SSE2(const uint8* src_y,
-                                          const uint8* src_u,
-                                          const uint8* src_v,
-                                          uint8* dst_frame,
+__declspec(naked) void I422ToYUY2Row_SSE2(const uint8_t* src_y,
+                                          const uint8_t* src_u,
+                                          const uint8_t* src_v,
+                                          uint8_t* dst_frame,
                                           int width) {
   __asm {
     push       esi
@@ -5921,10 +5814,10 @@ __declspec(naked) void I422ToYUY2Row_SSE2(const uint8* src_y,
   }
 }
 
-__declspec(naked) void I422ToUYVYRow_SSE2(const uint8* src_y,
-                                          const uint8* src_u,
-                                          const uint8* src_v,
-                                          uint8* dst_frame,
+__declspec(naked) void I422ToUYVYRow_SSE2(const uint8_t* src_y,
+                                          const uint8_t* src_u,
+                                          const uint8_t* src_v,
+                                          uint8_t* dst_frame,
                                           int width) {
   __asm {
     push       esi
@@ -5959,8 +5852,8 @@ __declspec(naked) void I422ToUYVYRow_SSE2(const uint8* src_y,
 }
 
 #ifdef HAS_ARGBPOLYNOMIALROW_SSE2
-__declspec(naked) void ARGBPolynomialRow_SSE2(const uint8* src_argb,
-                                              uint8* dst_argb,
+__declspec(naked) void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
+                                              uint8_t* dst_argb,
                                               const float* poly,
                                               int width) {
   __asm {
@@ -5971,7 +5864,7 @@ __declspec(naked) void ARGBPolynomialRow_SSE2(const uint8* src_argb,
     mov        ecx, [esp + 4 + 16] /* width */
     pxor       xmm3, xmm3  // 0 constant for zero extending bytes to ints.
 
-    // 2 pixel loop.
+        // 2 pixel loop.
  convertloop:
         //    pmovzxbd  xmm0, dword ptr [eax]  // BGRA pixel
         //    pmovzxbd  xmm4, dword ptr [eax + 4]  // BGRA pixel
@@ -6018,8 +5911,8 @@ __declspec(naked) void ARGBPolynomialRow_SSE2(const uint8* src_argb,
 #endif  // HAS_ARGBPOLYNOMIALROW_SSE2
 
 #ifdef HAS_ARGBPOLYNOMIALROW_AVX2
-__declspec(naked) void ARGBPolynomialRow_AVX2(const uint8* src_argb,
-                                              uint8* dst_argb,
+__declspec(naked) void ARGBPolynomialRow_AVX2(const uint8_t* src_argb,
+                                              uint8_t* dst_argb,
                                               const float* poly,
                                               int width) {
   __asm {
@@ -6058,8 +5951,8 @@ __declspec(naked) void ARGBPolynomialRow_AVX2(const uint8* src_argb,
 
 #ifdef HAS_HALFFLOATROW_SSE2
 static float kExpBias = 1.9259299444e-34f;
-__declspec(naked) void HalfFloatRow_SSE2(const uint16* src,
-                                         uint16* dst,
+__declspec(naked) void HalfFloatRow_SSE2(const uint16_t* src,
+                                         uint16_t* dst,
                                          float scale,
                                          int width) {
   __asm {
@@ -6072,7 +5965,7 @@ __declspec(naked) void HalfFloatRow_SSE2(const uint16* src,
     pxor       xmm5, xmm5
     sub        edx, eax
 
-    // 8 pixel loop.
+        // 8 pixel loop.
  convertloop:
     movdqu      xmm2, xmmword ptr [eax]  // 8 shorts
     add         eax, 16
@@ -6095,8 +5988,8 @@ __declspec(naked) void HalfFloatRow_SSE2(const uint16* src,
 #endif  // HAS_HALFFLOATROW_SSE2
 
 #ifdef HAS_HALFFLOATROW_AVX2
-__declspec(naked) void HalfFloatRow_AVX2(const uint16* src,
-                                         uint16* dst,
+__declspec(naked) void HalfFloatRow_AVX2(const uint16_t* src,
+                                         uint16_t* dst,
                                          float scale,
                                          int width) {
   __asm {
@@ -6110,7 +6003,7 @@ __declspec(naked) void HalfFloatRow_AVX2(const uint16* src,
     vpxor      ymm5, ymm5, ymm5
     sub        edx, eax
 
-    // 16 pixel loop.
+        // 16 pixel loop.
  convertloop:
     vmovdqu     ymm2, [eax]  // 16 shorts
     add         eax, 32
@@ -6133,8 +6026,8 @@ __declspec(naked) void HalfFloatRow_AVX2(const uint16* src,
 #endif  // HAS_HALFFLOATROW_AVX2
 
 #ifdef HAS_HALFFLOATROW_F16C
-__declspec(naked) void HalfFloatRow_F16C(const uint16* src,
-                                         uint16* dst,
+__declspec(naked) void HalfFloatRow_F16C(const uint16_t* src,
+                                         uint16_t* dst,
                                          float scale,
                                          int width) {
   __asm {
@@ -6144,7 +6037,7 @@ __declspec(naked) void HalfFloatRow_F16C(const uint16* src,
     mov        ecx, [esp + 16] /* width */
     sub        edx, eax
 
-    // 16 pixel loop.
+        // 16 pixel loop.
  convertloop:
     vpmovzxwd   ymm2, xmmword ptr [eax]  // 8 shorts -> 8 ints
     vpmovzxwd   ymm3, xmmword ptr [eax + 16]  // 8 more shorts
@@ -6167,8 +6060,8 @@ __declspec(naked) void HalfFloatRow_F16C(const uint16* src,
 
 #ifdef HAS_ARGBCOLORTABLEROW_X86
 // Tranform ARGB pixels with color table.
-__declspec(naked) void ARGBColorTableRow_X86(uint8* dst_argb,
-                                             const uint8* table_argb,
+__declspec(naked) void ARGBColorTableRow_X86(uint8_t* dst_argb,
+                                             const uint8_t* table_argb,
                                              int width) {
   __asm {
     push       esi
@@ -6201,8 +6094,8 @@ __declspec(naked) void ARGBColorTableRow_X86(uint8* dst_argb,
 
 #ifdef HAS_RGBCOLORTABLEROW_X86
 // Tranform RGB pixels with color table.
-__declspec(naked) void RGBColorTableRow_X86(uint8* dst_argb,
-                                            const uint8* table_argb,
+__declspec(naked) void RGBColorTableRow_X86(uint8_t* dst_argb,
+                                            const uint8_t* table_argb,
                                             int width) {
   __asm {
     push       esi
@@ -6233,11 +6126,11 @@ __declspec(naked) void RGBColorTableRow_X86(uint8* dst_argb,
 
 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
 // Tranform RGB pixels with luma table.
-__declspec(naked) void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
-                                                   uint8* dst_argb,
+__declspec(naked) void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
+                                                   uint8_t* dst_argb,
                                                    int width,
-                                                   const uint8* luma,
-                                                   uint32 lumacoeff) {
+                                                   const uint8_t* luma,
+                                                   uint32_t lumacoeff) {
   __asm {
     push       esi
     push       edi
@@ -6252,7 +6145,7 @@ __declspec(naked) void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
     psllw      xmm4, 8
     pxor       xmm5, xmm5
 
-    // 4 pixel loop.
+        // 4 pixel loop.
   convertloop:
     movdqu     xmm0, xmmword ptr [eax]  // generate luma ptr
     pmaddubsw  xmm0, xmm3