diff options
author | Frank Barchard <fbarchard@google.com> | 2022-12-15 14:11:52 -0800 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-12-15 22:34:22 +0000 |
commit | 3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e (patch) | |
tree | 3fc173f8288f4457858919c806bac14593dc3d1b | |
parent | 610e0cdead3fcd3288693d18eab8c7323805ad9e (diff) | |
download | libyuv-3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e.tar.gz |
Casting for scale functions
- MT2T support for source strides added, but only works for positive values.
- Reduced casting in row_common - one cast per assignment.
- scaling functions use intptr_t for intermediate calculations, then cast strides to ptrdiff_t
Bug: libyuv:948, b/257266635, b/262468594
Change-Id: I0409a0ce916b777da2a01c0ab0b56dccefed3b33
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4102203
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Reviewed-by: Justin Green <greenjustin@google.com>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Ernest Hua <ernesthua@google.com>
-rw-r--r-- | README.chromium | 2 | ||||
-rw-r--r-- | include/libyuv/convert.h | 7 | ||||
-rw-r--r-- | include/libyuv/row.h | 4 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/convert.cc | 91 | ||||
-rw-r--r-- | source/row_common.cc | 640 | ||||
-rw-r--r-- | source/row_neon.cc | 6 | ||||
-rw-r--r-- | source/row_neon64.cc | 6 | ||||
-rw-r--r-- | source/scale.cc | 10 | ||||
-rw-r--r-- | source/scale_argb.cc | 36 | ||||
-rw-r--r-- | source/scale_uv.cc | 46 | ||||
-rw-r--r-- | unit_test/convert_test.cc | 475 |
12 files changed, 702 insertions, 623 deletions
diff --git a/README.chromium b/README.chromium index ccd2ca06..a33b46b9 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1852 +Version: 1854 License: BSD License File: LICENSE diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index 5c5231bb..ae9b954f 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -163,10 +163,13 @@ int MM21ToYUY2(const uint8_t* src_y, int height); // Convert MT2T to P010 +// Note that src_y and src_uv point to packed 10-bit values, so the Y plane will +// be 10 / 8 times the dimensions of the image. Also for this reason, +// src_stride_y and src_stride_uv are given in bytes. LIBYUV_API -int MT2TToP010(const uint16_t* src_y, +int MT2TToP010(const uint8_t* src_y, int src_stride_y, - const uint16_t* src_uv, + const uint8_t* src_uv, int src_stride_uv, uint16_t* dst_y, int dst_stride_y, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 27f468ad..45b172b2 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -2123,8 +2123,8 @@ void DetileToYUY2_Any_NEON(const uint8_t* src_y, ptrdiff_t src_uv_tile_stride, uint8_t* dst_yuy2, int width); -void UnpackMT2T_C(const uint16_t* src, uint16_t* dst, size_t size); -void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size); +void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size); +void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size); void MergeUVRow_C(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 6254d978..c72f7461 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1852 +#define LIBYUV_VERSION 1854 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index 65d4ba16..b62e513a 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -735,12 +735,10 @@ int MM21ToYUY2(const uint8_t* src_y, // Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format // documentation. // TODO(greenjustin): Add an MT2T to I420 conversion. -// TODO(greenjustin): Investigate if there are valid stride parameters other -// than width. LIBYUV_API -int MT2TToP010(const uint16_t* src_y, +int MT2TToP010(const uint8_t* src_y, int src_stride_y, - const uint16_t* src_uv, + const uint8_t* src_uv, int src_stride_uv, uint16_t* dst_y, int dst_stride_y, @@ -748,48 +746,75 @@ int MT2TToP010(const uint16_t* src_y, int dst_stride_uv, int width, int height) { - if (width <= 0 || height <= 0 || !src_y || !src_uv || !dst_y || !dst_uv) { + if (width <= 0 || !height || !src_uv || !dst_uv) { return -1; } - // TODO(greenjustin): Investigate if we can allow arbitrary sizes. This may - // not be semantically meaningful in this format, but we do not have samples - // of unaligned data to conclude that yet. This format is 16x32 tiled, so we - // must pad the width and height to reflect that. - int aligned_width = (width + 15) & ~15; - int aligned_height = (height + 31) & ~31; - { - size_t y_size = aligned_width * aligned_height * 10 / 8; - size_t uv_size = aligned_width * ((aligned_height + 1) / 2) * 10 / 8; - size_t tmp_y_size = aligned_width * aligned_height * sizeof(uint16_t); - size_t tmp_uv_size = - aligned_width * ((aligned_height + 1) / 2) * sizeof(uint16_t); - void (*UnpackMT2T)(const uint16_t* src, uint16_t* dst, size_t size) = + int u_width = (width + 1) / 2; + int uv_width = 2 * u_width; + int y = 0; + int uv_height = uv_height = (height + 1) / 2; + const int tile_width = 16; + const int y_tile_height = 32; + const int uv_tile_height = 16; + int padded_width = (width + tile_width - 1) & ~(tile_width - 1); + int y_tile_row_size = padded_width * y_tile_height * 10 / 8; + int uv_tile_row_size = padded_width * uv_tile_height * 10 / 8; + size_t row_buf_size = padded_width * y_tile_height * sizeof(uint16_t); + void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) = UnpackMT2T_C; - align_buffer_64(tmp_y, tmp_y_size); - align_buffer_64(tmp_uv, tmp_uv_size); + align_buffer_64(row_buf, row_buf_size); #if defined(HAS_UNPACKMT2T_NEON) if (TestCpuFlag(kCpuHasNEON)) { UnpackMT2T = UnpackMT2T_NEON; } #endif + // Negative height means invert the image. + if (height < 0) { + height = -height; + uv_height = (height + 1) / 2; + if (dst_y) { + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv; + dst_stride_uv = -dst_stride_uv; + } + + // Unpack and detile Y in rows of tiles + if (src_y && dst_y) { + for (y = 0; y < (height & ~(y_tile_height - 1)); y += y_tile_height) { + UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y, + width, y_tile_height, y_tile_height); + src_y += src_stride_y * y_tile_height; + dst_y += dst_stride_y * y_tile_height; + } + if (height & (y_tile_height - 1)) { + UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y, + width, height & (y_tile_height - 1), y_tile_height); + } + } - // TODO(greenjustin): Unpack and detile in rows rather than planes to keep - // the caches hot. - UnpackMT2T(src_y, (uint16_t*)tmp_y, y_size); - UnpackMT2T(src_uv, (uint16_t*)tmp_uv, uv_size); - - DetilePlane_16((uint16_t*)tmp_y, src_stride_y, dst_y, dst_stride_y, width, - height, 32); - DetilePlane_16((uint16_t*)tmp_uv, src_stride_uv, dst_uv, dst_stride_uv, - width, (height + 1) / 2, 16); - - free_aligned_buffer_64(tmp_y); - free_aligned_buffer_64(tmp_uv); + // Unpack and detile UV plane + for (y = 0; y < (uv_height & ~(uv_tile_height - 1)); y += uv_tile_height) { + UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv, + uv_width, uv_tile_height, uv_tile_height); + src_uv += src_stride_uv * uv_tile_height; + dst_uv += dst_stride_uv * uv_tile_height; + } + if (uv_height & (uv_tile_height - 1)) { + UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv, + uv_width, uv_height & (uv_tile_height - 1), + uv_tile_height); + } + free_aligned_buffer_64(row_buf); } - return 0; } diff --git a/source/row_common.cc b/source/row_common.cc index 7d084e76..3d1e705e 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -21,6 +21,12 @@ namespace libyuv { extern "C" { #endif +#ifdef __cplusplus +#define STATIC_CAST(type, expr) static_cast<type>(expr) +#else +#define STATIC_CAST(type, expr) (type)(expr) +#endif + // This macro controls YUV to RGB using unsigned math to extend range of // YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B: // LIBYUV_UNLIMITED_DATA @@ -182,12 +188,13 @@ void RGB565ToARGBRow_C(const uint8_t* src_rgb565, int width) { int x; for (x = 0; x < width; ++x) { - uint8_t b = src_rgb565[0] & 0x1f; - uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8_t r = src_rgb565[1] >> 3; - dst_argb[0] = (b << 3) | (b >> 2); - dst_argb[1] = (g << 2) | (g >> 4); - dst_argb[2] = (r << 3) | (r >> 2); + uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f); + uint8_t g = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); + uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3); + dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4)); + dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_argb[3] = 255u; dst_argb += 4; src_rgb565 += 2; @@ -199,13 +206,14 @@ void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, int width) { int x; for (x = 0; x < width; ++x) { - uint8_t b = src_argb1555[0] & 0x1f; - uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8_t r = (src_argb1555[1] & 0x7c) >> 2; - uint8_t a = src_argb1555[1] >> 7; - dst_argb[0] = (b << 3) | (b >> 2); - dst_argb[1] = (g << 3) | (g >> 2); - dst_argb[2] = (r << 3) | (r >> 2); + uint8_t b = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f); + uint8_t g = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); + uint8_t r = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2); + uint8_t a = STATIC_CAST(uint8_t, src_argb1555[1] >> 7); + dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + dst_argb[1] = STATIC_CAST(uint8_t, (g << 3) | (g >> 2)); + dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_argb[3] = -a; dst_argb += 4; src_argb1555 += 2; @@ -217,14 +225,14 @@ void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444, int width) { int x; for (x = 0; x < width; ++x) { - uint8_t b = src_argb4444[0] & 0x0f; - uint8_t g = src_argb4444[0] >> 4; - uint8_t r = src_argb4444[1] & 0x0f; - uint8_t a = src_argb4444[1] >> 4; - dst_argb[0] = (b << 4) | b; - dst_argb[1] = (g << 4) | g; - dst_argb[2] = (r << 4) | r; - dst_argb[3] = (a << 4) | a; + uint8_t b = STATIC_CAST(uint8_t, src_argb4444[0] & 0x0f); + uint8_t g = STATIC_CAST(uint8_t, src_argb4444[0] >> 4); + uint8_t r = STATIC_CAST(uint8_t, src_argb4444[1] & 0x0f); + uint8_t a = STATIC_CAST(uint8_t, src_argb4444[1] >> 4); + dst_argb[0] = STATIC_CAST(uint8_t, (b << 4) | b); + dst_argb[1] = STATIC_CAST(uint8_t, (g << 4) | g); + dst_argb[2] = STATIC_CAST(uint8_t, (r << 4) | r); + dst_argb[3] = STATIC_CAST(uint8_t, (a << 4) | a); dst_argb += 4; src_argb4444 += 2; } @@ -320,7 +328,7 @@ void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t b0 = src_argb[0] >> 3; uint8_t g0 = src_argb[1] >> 2; uint8_t r0 = src_argb[2] >> 3; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); } } @@ -340,23 +348,25 @@ void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, for (x = 0; x < width - 1; x += 2) { int dither0 = ((const unsigned char*)(&dither4))[x & 3]; int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3]; - uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; - uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; - uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; - uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3; - uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2; - uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3; - *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 11); - *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 11); + uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3); + uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2); + uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3); + uint8_t b1 = STATIC_CAST(uint8_t, clamp255(src_argb[4] + dither1) >> 3); + uint8_t g1 = STATIC_CAST(uint8_t, clamp255(src_argb[5] + dither1) >> 2); + uint8_t r1 = STATIC_CAST(uint8_t, clamp255(src_argb[6] + dither1) >> 3); + *(uint16_t*)(dst_rgb + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); + *(uint16_t*)(dst_rgb + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11)); dst_rgb += 4; src_argb += 8; } if (width & 1) { int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3]; - uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; - uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; - uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); + uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3); + uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2); + uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3); + *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); } } @@ -371,8 +381,10 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g1 = src_argb[5] >> 3; uint8_t r1 = src_argb[6] >> 3; uint8_t a1 = src_argb[7] >> 7; - *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); - *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 10) | (a1 << 15); + *(uint16_t*)(dst_rgb + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15)); + *(uint16_t*)(dst_rgb + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | (a1 << 15)); dst_rgb += 4; src_argb += 8; } @@ -381,7 +393,8 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g0 = src_argb[1] >> 3; uint8_t r0 = src_argb[2] >> 3; uint8_t a0 = src_argb[3] >> 7; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); + *(uint16_t*)(dst_rgb) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15)); } } @@ -396,8 +409,10 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g1 = src_argb[5] >> 4; uint8_t r1 = src_argb[6] >> 4; uint8_t a1 = src_argb[7] >> 4; - *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); - *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 4) | (r1 << 8) | (a1 << 12); + *(uint16_t*)(dst_rgb + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12)); + *(uint16_t*)(dst_rgb + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | (a1 << 12)); dst_rgb += 4; src_argb += 8; } @@ -406,7 +421,8 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g0 = src_argb[1] >> 4; uint8_t r0 = src_argb[2] >> 4; uint8_t a0 = src_argb[3] >> 4; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); + *(uint16_t*)(dst_rgb) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12)); } } @@ -417,7 +433,8 @@ void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) { uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2); uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2); uint32_t a0 = (src_abgr[3] >> 6); - *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30); + *(uint32_t*)(dst_ar30) = + STATIC_CAST(uint32_t, r0 | (g0 << 10) | (b0 << 20) | (a0 << 30)); dst_ar30 += 4; src_abgr += 4; } @@ -430,7 +447,8 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) { uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2); uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2); uint32_t a0 = (src_argb[3] >> 6); - *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30); + *(uint32_t*)(dst_ar30) = + STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30)); dst_ar30 += 4; src_argb += 4; } @@ -530,8 +548,8 @@ void AR64ShuffleRow_C(const uint8_t* src_ar64, #ifdef LIBYUV_RGB7 // Old 7 bit math for compatibility on unsupported platforms. -static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { - return ((33 * r + 65 * g + 13 * b) >> 7) + 16; +static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, ((33 * r + 65 * g + 13 * b) >> 7) + 16); } #else // 8 bit @@ -540,8 +558,8 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { // return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) + // 0x7e80) >> 8; -static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { - return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; +static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8); } #endif @@ -549,29 +567,31 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { // LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round. #ifdef LIBYUV_RGBTOU_TRUNCATE -static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { - return (112 * b - 74 * g - 38 * r + 0x8000) >> 8; +static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8); } -static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) { - return (112 * r - 94 * g - 18 * b + 0x8000) >> 8; +static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8); } #else // TODO(fbarchard): Add rounding to x86 SIMD and use this -static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { - return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; +static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8); } -static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) { - return (112 * r - 94 * g - 18 * b + 0x8080) >> 8; +static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8); } #endif // LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb. #if !defined(LIBYUV_ARGBTOUV_PAVGB) static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) { - return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8; + return STATIC_CAST( + uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8); } static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) { - return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8; + return STATIC_CAST( + uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8); } #endif @@ -690,28 +710,28 @@ MAKEROWY(RAW, 0, 1, 2, 3) #ifdef LIBYUV_RGB7 // Old 7 bit math for compatibility on unsupported platforms. -static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { return (38 * r + 75 * g + 15 * b + 64) >> 7; } #else // 8 bit -static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { return (77 * r + 150 * g + 29 * b + 128) >> 8; } #endif #if defined(LIBYUV_ARGBTOUV_PAVGB) -static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * b - 84 * g - 43 * r + 0x8080) >> 8; } -static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * r - 107 * g - 20 * b + 0x8080) >> 8; } #else -static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) { +static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) { return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8; } -static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) { +static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) { return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8; } #endif @@ -808,11 +828,12 @@ void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_rgb565[0] & 0x1f; - uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8_t g = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); uint8_t r = src_rgb565[1] >> 3; - b = (b << 3) | (b >> 2); - g = (g << 2) | (g >> 4); - r = (r << 3) | (r >> 2); + b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4)); + r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_y[0] = RGBToY(r, g, b); src_rgb565 += 2; dst_y += 1; @@ -823,11 +844,12 @@ void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_argb1555[0] & 0x1f; - uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8_t g = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); uint8_t r = (src_argb1555[1] & 0x7c) >> 2; - b = (b << 3) | (b >> 2); - g = (g << 3) | (g >> 2); - r = (r << 3) | (r >> 2); + b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + g = STATIC_CAST(uint8_t, (g << 3) | (g >> 2)); + r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_y[0] = RGBToY(r, g, b); src_argb1555 += 2; dst_y += 1; @@ -840,9 +862,9 @@ void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) { uint8_t b = src_argb4444[0] & 0x0f; uint8_t g = src_argb4444[0] >> 4; uint8_t r = src_argb4444[1] & 0x0f; - b = (b << 4) | b; - g = (g << 4) | g; - r = (r << 4) | r; + b = STATIC_CAST(uint8_t, (b << 4) | b); + g = STATIC_CAST(uint8_t, (g << 4) | g); + r = STATIC_CAST(uint8_t, (r << 4) | r); dst_y[0] = RGBToY(r, g, b); src_argb4444 += 2; dst_y += 1; @@ -857,31 +879,35 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565, const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565; int x; for (x = 0; x < width - 1; x += 2) { - uint8_t b0 = src_rgb565[0] & 0x1f; - uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8_t r0 = src_rgb565[1] >> 3; - uint8_t b1 = src_rgb565[2] & 0x1f; - uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); - uint8_t r1 = src_rgb565[3] >> 3; - uint8_t b2 = next_rgb565[0] & 0x1f; - uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8_t r2 = next_rgb565[1] >> 3; - uint8_t b3 = next_rgb565[2] & 0x1f; - uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); - uint8_t r3 = next_rgb565[3] >> 3; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 2) | (g0 >> 4); - r0 = (r0 << 3) | (r0 >> 2); - b1 = (b1 << 3) | (b1 >> 2); - g1 = (g1 << 2) | (g1 >> 4); - r1 = (r1 << 3) | (r1 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 2) | (g2 >> 4); - r2 = (r2 << 3) | (r2 >> 2); - b3 = (b3 << 3) | (b3 >> 2); - g3 = (g3 << 2) | (g3 >> 4); - r3 = (r3 << 3) | (r3 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3); + uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f); + uint8_t g1 = STATIC_CAST( + uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3)); + uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3); + uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3); + uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f); + uint8_t g3 = STATIC_CAST( + uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3)); + uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3); + + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2)); + g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4)); + r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); + b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2)); + g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4)); + r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3)); @@ -903,19 +929,20 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565, dst_v += 1; } if (width & 1) { - uint8_t b0 = src_rgb565[0] & 0x1f; - uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8_t r0 = src_rgb565[1] >> 3; - uint8_t b2 = next_rgb565[0] & 0x1f; - uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8_t r2 = next_rgb565[1] >> 3; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 2) | (g0 >> 4); - r0 = (r0 << 3) | (r0 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 2) | (g2 >> 4); - r2 = (r2 << 3) | (r2 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3); + uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3); + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(b0, b2); @@ -941,31 +968,35 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555; int x; for (x = 0; x < width - 1; x += 2) { - uint8_t b0 = src_argb1555[0] & 0x1f; - uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8_t b1 = src_argb1555[2] & 0x1f; - uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); - uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2; - uint8_t b2 = next_argb1555[0] & 0x1f; - uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2; - uint8_t b3 = next_argb1555[2] & 0x1f; - uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); - uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 3) | (g0 >> 2); - r0 = (r0 << 3) | (r0 >> 2); - b1 = (b1 << 3) | (b1 >> 2); - g1 = (g1 << 3) | (g1 >> 2); - r1 = (r1 << 3) | (r1 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 3) | (g2 >> 2); - r2 = (r2 << 3) | (r2 >> 2); - b3 = (b3 << 3) | (b3 >> 2); - g3 = (g3 << 3) | (g3 >> 2); - r3 = (r3 << 3) | (r3 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2); + uint8_t b1 = STATIC_CAST(uint8_t, src_argb1555[2] & 0x1f); + uint8_t g1 = STATIC_CAST( + uint8_t, (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3)); + uint8_t r1 = STATIC_CAST(uint8_t, (src_argb1555[3] & 0x7c) >> 2); + uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2); + uint8_t b3 = STATIC_CAST(uint8_t, next_argb1555[2] & 0x1f); + uint8_t g3 = STATIC_CAST( + uint8_t, (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3)); + uint8_t r3 = STATIC_CAST(uint8_t, (next_argb1555[3] & 0x7c) >> 2); + + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2)); + g1 = STATIC_CAST(uint8_t, (g1 << 3) | (g1 >> 2)); + r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); + b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2)); + g3 = STATIC_CAST(uint8_t, (g3 << 3) | (g3 >> 2)); + r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3)); @@ -987,19 +1018,21 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, dst_v += 1; } if (width & 1) { - uint8_t b0 = src_argb1555[0] & 0x1f; - uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8_t b2 = next_argb1555[0] & 0x1f; - uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 3) | (g0 >> 2); - r0 = (r0 << 3) | (r0 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 3) | (g2 >> 2); - r2 = (r2 << 3) | (r2 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2); + uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2); + + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(b0, b2); @@ -1038,18 +1071,18 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, uint8_t g3 = next_argb4444[2] >> 4; uint8_t r3 = next_argb4444[3] & 0x0f; - b0 = (b0 << 4) | b0; - g0 = (g0 << 4) | g0; - r0 = (r0 << 4) | r0; - b1 = (b1 << 4) | b1; - g1 = (g1 << 4) | g1; - r1 = (r1 << 4) | r1; - b2 = (b2 << 4) | b2; - g2 = (g2 << 4) | g2; - r2 = (r2 << 4) | r2; - b3 = (b3 << 4) | b3; - g3 = (g3 << 4) | g3; - r3 = (r3 << 4) | r3; + b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0); + g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0); + r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0); + b1 = STATIC_CAST(uint8_t, (b1 << 4) | b1); + g1 = STATIC_CAST(uint8_t, (g1 << 4) | g1); + r1 = STATIC_CAST(uint8_t, (r1 << 4) | r1); + b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2); + g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2); + r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2); + b3 = STATIC_CAST(uint8_t, (b3 << 4) | b3); + g3 = STATIC_CAST(uint8_t, (g3 << 4) | g3); + r3 = STATIC_CAST(uint8_t, (r3 << 4) | r3); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3)); @@ -1078,12 +1111,12 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, uint8_t g2 = next_argb4444[0] >> 4; uint8_t r2 = next_argb4444[1] & 0x0f; - b0 = (b0 << 4) | b0; - g0 = (g0 << 4) | g0; - r0 = (r0 << 4) | r0; - b2 = (b2 << 4) | b2; - g2 = (g2 << 4) | g2; - r2 = (r2 << 4) | r2; + b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0); + g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0); + r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0); + b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2); + g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2); + r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(b0, b2); @@ -1140,9 +1173,9 @@ void ARGBSepiaRow_C(uint8_t* dst_argb, int width) { int sg = (b * 22 + g * 88 + r * 45) >> 7; int sr = (b * 24 + g * 98 + r * 50) >> 7; // b does not over flow. a is preserved from original. - dst_argb[0] = sb; - dst_argb[1] = clamp255(sg); - dst_argb[2] = clamp255(sr); + dst_argb[0] = STATIC_CAST(uint8_t, sb); + dst_argb[1] = STATIC_CAST(uint8_t, clamp255(sg)); + dst_argb[2] = STATIC_CAST(uint8_t, clamp255(sr)); dst_argb += 4; } } @@ -1171,10 +1204,10 @@ void ARGBColorMatrixRow_C(const uint8_t* src_argb, int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] + a * matrix_argb[15]) >> 6; - dst_argb[0] = Clamp(sb); - dst_argb[1] = Clamp(sg); - dst_argb[2] = Clamp(sr); - dst_argb[3] = Clamp(sa); + dst_argb[0] = STATIC_CAST(uint8_t, Clamp(sb)); + dst_argb[1] = STATIC_CAST(uint8_t, Clamp(sg)); + dst_argb[2] = STATIC_CAST(uint8_t, Clamp(sr)); + dst_argb[3] = STATIC_CAST(uint8_t, Clamp(sa)); src_argb += 4; dst_argb += 4; } @@ -1224,9 +1257,12 @@ void ARGBQuantizeRow_C(uint8_t* dst_argb, int b = dst_argb[0]; int g = dst_argb[1]; int r = dst_argb[2]; - dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; - dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset; - dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset; + dst_argb[0] = STATIC_CAST( + uint8_t, (b * scale >> 16) * interval_size + interval_offset); + dst_argb[1] = STATIC_CAST( + uint8_t, (g * scale >> 16) * interval_size + interval_offset); + dst_argb[2] = STATIC_CAST( + uint8_t, (r * scale >> 16) * interval_size + interval_offset); dst_argb += 4; } } @@ -1277,10 +1313,10 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb, const uint32_t g_scale = src_argb1[1]; const uint32_t r_scale = src_argb1[2]; const uint32_t a_scale = src_argb1[3]; - dst_argb[0] = SHADE(b, b_scale); - dst_argb[1] = SHADE(g, g_scale); - dst_argb[2] = SHADE(r, r_scale); - dst_argb[3] = SHADE(a, a_scale); + dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_scale)); + dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_scale)); + dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_scale)); + dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_scale)); src_argb += 4; src_argb1 += 4; dst_argb += 4; @@ -1305,10 +1341,10 @@ void ARGBAddRow_C(const uint8_t* src_argb, const int g_add = src_argb1[1]; const int r_add = src_argb1[2]; const int a_add = src_argb1[3]; - dst_argb[0] = SHADE(b, b_add); - dst_argb[1] = SHADE(g, g_add); - dst_argb[2] = SHADE(r, r_add); - dst_argb[3] = SHADE(a, a_add); + dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_add)); + dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_add)); + dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_add)); + dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_add)); src_argb += 4; src_argb1 += 4; dst_argb += 4; @@ -1332,10 +1368,10 @@ void ARGBSubtractRow_C(const uint8_t* src_argb, const int g_sub = src_argb1[1]; const int r_sub = src_argb1[2]; const int a_sub = src_argb1[3]; - dst_argb[0] = SHADE(b, b_sub); - dst_argb[1] = SHADE(g, g_sub); - dst_argb[2] = SHADE(r, r_sub); - dst_argb[3] = SHADE(a, a_sub); + dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_sub)); + dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_sub)); + dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_sub)); + dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_sub)); src_argb += 4; src_argb1 += 4; dst_argb += 4; @@ -1692,9 +1728,9 @@ static __inline void YuvPixel(uint8_t y, LOAD_YUV_CONSTANTS; uint32_t y32 = y * 0x0101; CALC_RGB16; - *b = Clamp((int32_t)(b16) >> 6); - *g = Clamp((int32_t)(g16) >> 6); - *r = Clamp((int32_t)(r16) >> 6); + *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6)); + *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6)); + *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6)); } // Reads 8 bit YUV and leaves result as 16 bit. @@ -1724,8 +1760,8 @@ static __inline void YuvPixel10_16(uint16_t y, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; uint32_t y32 = (y << 6) | (y >> 4); - u = clamp255(u >> 2); - v = clamp255(v >> 2); + u = STATIC_CAST(uint8_t, clamp255(u >> 2)); + v = STATIC_CAST(uint8_t, clamp255(v >> 2)); CALC_RGB16; *b = b16; *g = g16; @@ -1743,8 +1779,8 @@ static __inline void YuvPixel12_16(int16_t y, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; uint32_t y32 = (y << 4) | (y >> 8); - u = clamp255(u >> 4); - v = clamp255(v >> 4); + u = STATIC_CAST(uint8_t, clamp255(u >> 4)); + v = STATIC_CAST(uint8_t, clamp255(v >> 4)); CALC_RGB16; *b = b16; *g = g16; @@ -1764,9 +1800,9 @@ static __inline void YuvPixel10(uint16_t y, int g16; int r16; YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants); - *b = Clamp(b16 >> 6); - *g = Clamp(g16 >> 6); - *r = Clamp(r16 >> 6); + *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6)); + *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6)); + *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6)); } // C reference code that mimics the YUV 12 bit assembly. @@ -1782,9 +1818,9 @@ static __inline void YuvPixel12(uint16_t y, int g16; int r16; YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants); - *b = Clamp(b16 >> 6); - *g = Clamp(g16 >> 6); - *r = Clamp(r16 >> 6); + *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6)); + *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6)); + *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6)); } // C reference code that mimics the YUV 16 bit assembly. @@ -1798,12 +1834,12 @@ static __inline void YuvPixel16_8(uint16_t y, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; uint32_t y32 = y; - u = clamp255(u >> 8); - v = clamp255(v >> 8); + u = STATIC_CAST(uint16_t, clamp255(u >> 8)); + v = STATIC_CAST(uint16_t, clamp255(v >> 8)); CALC_RGB16; - *b = Clamp((int32_t)(b16) >> 6); - *g = Clamp((int32_t)(g16) >> 6); - *r = Clamp((int32_t)(r16) >> 6); + *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6)); + *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6)); + *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6)); } // C reference code that mimics the YUV 16 bit assembly. @@ -1817,8 +1853,8 @@ static __inline void YuvPixel16_16(uint16_t y, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; uint32_t y32 = y; - u = clamp255(u >> 8); - v = clamp255(v >> 8); + u = STATIC_CAST(uint16_t, clamp255(u >> 8)); + v = STATIC_CAST(uint16_t, clamp255(v >> 8)); CALC_RGB16; *b = b16; *g = g16; @@ -1840,9 +1876,9 @@ static __inline void YPixel(uint8_t y, int yg = yuvconstants->kYToRgb[0]; #endif uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; - *b = Clamp(((int32_t)(y1) + ygb) >> 6); - *g = Clamp(((int32_t)(y1) + ygb) >> 6); - *r = Clamp(((int32_t)(y1) + ygb) >> 6); + *b = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6)); + *g = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6)); + *r = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6)); } void I444ToARGBRow_C(const uint8_t* src_y, @@ -1963,10 +1999,10 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y, for (x = 0; x < width - 1; x += 2) { YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = clamp255(src_a[0] >> 2); + rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2)); YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); - rgb_buf[7] = clamp255(src_a[1] >> 2); + rgb_buf[7] = STATIC_CAST(uint8_t, clamp255(src_a[1] >> 2)); src_y += 2; src_u += 1; src_v += 1; @@ -1976,7 +2012,7 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y, if (width & 1) { YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = clamp255(src_a[0] >> 2); + rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2)); } } @@ -1991,7 +2027,7 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y, for (x = 0; x < width; ++x) { YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = clamp255(src_a[0] >> 2); + rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2)); src_y += 1; src_u += 1; src_v += 1; @@ -2317,8 +2353,10 @@ void I422ToARGB4444Row_C(const uint8_t* src_y, b1 = b1 >> 4; g1 = g1 >> 4; r1 = r1 >> 4; - *(uint16_t*)(dst_argb4444 + 0) = b0 | (g0 << 4) | (r0 << 8) | 0xf000; - *(uint16_t*)(dst_argb4444 + 2) = b1 | (g1 << 4) | (r1 << 8) | 0xf000; + *(uint16_t*)(dst_argb4444 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000); + *(uint16_t*)(dst_argb4444 + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | 0xf000); src_y += 2; src_u += 1; src_v += 1; @@ -2329,7 +2367,8 @@ void I422ToARGB4444Row_C(const uint8_t* src_y, b0 = b0 >> 4; g0 = g0 >> 4; r0 = r0 >> 4; - *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000; + *(uint16_t*)(dst_argb4444) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000); } } @@ -2355,8 +2394,10 @@ void I422ToARGB1555Row_C(const uint8_t* src_y, b1 = b1 >> 3; g1 = g1 >> 3; r1 = r1 >> 3; - *(uint16_t*)(dst_argb1555 + 0) = b0 | (g0 << 5) | (r0 << 10) | 0x8000; - *(uint16_t*)(dst_argb1555 + 2) = b1 | (g1 << 5) | (r1 << 10) | 0x8000; + *(uint16_t*)(dst_argb1555 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000); + *(uint16_t*)(dst_argb1555 + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | 0x8000); src_y += 2; src_u += 1; src_v += 1; @@ -2367,7 +2408,8 @@ void I422ToARGB1555Row_C(const uint8_t* src_y, b0 = b0 >> 3; g0 = g0 >> 3; r0 = r0 >> 3; - *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000; + *(uint16_t*)(dst_argb1555) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000); } } @@ -2393,8 +2435,10 @@ void I422ToRGB565Row_C(const uint8_t* src_y, b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; - *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11); // for ubsan - *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11); + *(uint16_t*)(dst_rgb565 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); + *(uint16_t*)(dst_rgb565 + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11)); src_y += 2; src_u += 1; src_v += 1; @@ -2405,7 +2449,8 @@ void I422ToRGB565Row_C(const uint8_t* src_y, b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; - *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb565 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); } } @@ -2520,8 +2565,12 @@ void NV12ToRGB565Row_C(const uint8_t* src_y, b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; - *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11); - *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11); + *(uint16_t*)(dst_rgb565 + 0) = STATIC_CAST(uint16_t, b0) | + STATIC_CAST(uint16_t, g0 << 5) | + STATIC_CAST(uint16_t, r0 << 11); + *(uint16_t*)(dst_rgb565 + 2) = STATIC_CAST(uint16_t, b1) | + STATIC_CAST(uint16_t, g1 << 5) | + STATIC_CAST(uint16_t, r1 << 11); src_y += 2; src_uv += 2; dst_rgb565 += 4; // Advance 2 pixels. @@ -2531,7 +2580,9 @@ void NV12ToRGB565Row_C(const uint8_t* src_y, b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; - *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb565) = STATIC_CAST(uint16_t, b0) | + STATIC_CAST(uint16_t, g0 << 5) | + STATIC_CAST(uint16_t, r0 << 11); } } @@ -2805,10 +2856,10 @@ void DetileToYUY2_C(const uint8_t* src_y, // in 80 byte blocks representing 64 pixels each. The first 16 bytes of the // block contain all of the lower 2 bits of each pixel packed together, and the // next 64 bytes represent all the upper 8 bits of the pixel. -void UnpackMT2T_C(const uint16_t* src, uint16_t* dst, size_t size) { +void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) { for (size_t i = 0; i < size; i += 80) { - const uint8_t* src_lower_bits = (uint8_t*)src; - const uint8_t* src_upper_bits = ((uint8_t*)src) + 16; + const uint8_t* src_lower_bits = src; + const uint8_t* src_upper_bits = src + 16; for (int j = 0; j < 16; j++) { uint8_t lower_bits = src_lower_bits[j]; @@ -2824,7 +2875,7 @@ void UnpackMT2T_C(const uint16_t* src, uint16_t* dst, size_t size) { (uint16_t)src_upper_bits[j * 4 + 3] >> 2; } - src += 40; + src += 80; } } @@ -2920,10 +2971,10 @@ void MergeAR64Row_C(const uint16_t* src_r, int shift = 16 - depth; int max = (1 << depth) - 1; for (x = 0; x < width; ++x) { - dst_ar64[0] = ClampMax(src_b[x], max) << shift; - dst_ar64[1] = ClampMax(src_g[x], max) << shift; - dst_ar64[2] = ClampMax(src_r[x], max) << shift; - dst_ar64[3] = ClampMax(src_a[x], max) << shift; + dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift); + dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift); + dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift); + dst_ar64[3] = STATIC_CAST(uint16_t, ClampMax(src_a[x], max) << shift); dst_ar64 += 4; } } @@ -2940,10 +2991,10 @@ void MergeARGB16To8Row_C(const uint16_t* src_r, int x; int shift = depth - 8; for (x = 0; x < width; ++x) { - dst_argb[0] = clamp255(src_b[x] >> shift); - dst_argb[1] = clamp255(src_g[x] >> shift); - dst_argb[2] = clamp255(src_r[x] >> shift); - dst_argb[3] = clamp255(src_a[x] >> shift); + dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift)); + dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift)); + dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift)); + dst_argb[3] = STATIC_CAST(uint8_t, clamp255(src_a[x] >> shift)); dst_argb += 4; } } @@ -2960,9 +3011,9 @@ void MergeXR64Row_C(const uint16_t* src_r, int shift = 16 - depth; int max = (1 << depth) - 1; for (x = 0; x < width; ++x) { - dst_ar64[0] = ClampMax(src_b[x], max) << shift; - dst_ar64[1] = ClampMax(src_g[x], max) << shift; - dst_ar64[2] = ClampMax(src_r[x], max) << shift; + dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift); + dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift); + dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift); dst_ar64[3] = 0xffff; dst_ar64 += 4; } @@ -2979,9 +3030,9 @@ void MergeXRGB16To8Row_C(const uint16_t* src_r, int x; int shift = depth - 8; for (x = 0; x < width; ++x) { - dst_argb[0] = clamp255(src_b[x] >> shift); - dst_argb[1] = clamp255(src_g[x] >> shift); - dst_argb[2] = clamp255(src_r[x] >> shift); + dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift)); + dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift)); + dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift)); dst_argb[3] = 0xff; dst_argb += 4; } @@ -3027,8 +3078,8 @@ void MergeUVRow_16_C(const uint16_t* src_u, assert(depth <= 16); int x; for (x = 0; x < width; ++x) { - dst_uv[0] = src_u[x] << shift; - dst_uv[1] = src_v[x] << shift; + dst_uv[0] = STATIC_CAST(uint16_t, src_u[x] << shift); + dst_uv[1] = STATIC_CAST(uint16_t, src_v[x] << shift); dst_uv += 2; } } @@ -3056,7 +3107,7 @@ void MultiplyRow_16_C(const uint16_t* src_y, int width) { int x; for (x = 0; x < width; ++x) { - dst_y[x] = src_y[x] * scale; + dst_y[x] = STATIC_CAST(uint16_t, src_y[x] * scale); } } @@ -3087,7 +3138,7 @@ void Convert16To8Row_C(const uint16_t* src_y, assert(scale <= 32768); for (x = 0; x < width; ++x) { - dst_y[x] = C16TO8(src_y[x], scale); + dst_y[x] = STATIC_CAST(uint8_t, C16TO8(src_y[x], scale)); } } @@ -3250,9 +3301,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb, uint32_t bb = src_argb1[0]; uint32_t bg = src_argb1[1]; uint32_t br = src_argb1[2]; - dst_argb[0] = BLEND(fb, bb, a); - dst_argb[1] = BLEND(fg, bg, a); - dst_argb[2] = BLEND(fr, br, a); + dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a)); + dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a)); + dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a)); dst_argb[3] = 255u; fb = src_argb[4 + 0]; @@ -3262,9 +3313,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb, bb = src_argb1[4 + 0]; bg = src_argb1[4 + 1]; br = src_argb1[4 + 2]; - dst_argb[4 + 0] = BLEND(fb, bb, a); - dst_argb[4 + 1] = BLEND(fg, bg, a); - dst_argb[4 + 2] = BLEND(fr, br, a); + dst_argb[4 + 0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a)); + dst_argb[4 + 1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a)); + dst_argb[4 + 2] = STATIC_CAST(uint8_t, BLEND(fr, br, a)); dst_argb[4 + 3] = 255u; src_argb += 8; src_argb1 += 8; @@ -3279,9 +3330,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb, uint32_t bb = src_argb1[0]; uint32_t bg = src_argb1[1]; uint32_t br = src_argb1[2]; - dst_argb[0] = BLEND(fb, bb, a); - dst_argb[1] = BLEND(fg, bg, a); - dst_argb[2] = BLEND(fr, br, a); + dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a)); + dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a)); + dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a)); dst_argb[3] = 255u; } } @@ -3326,7 +3377,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); - dst_argb[3] = a; + dst_argb[3] = STATIC_CAST(uint8_t, a); b = src_argb[4]; g = src_argb[5]; r = src_argb[6]; @@ -3334,7 +3385,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { dst_argb[4] = ATTENUATE(b, a); dst_argb[5] = ATTENUATE(g, a); dst_argb[6] = ATTENUATE(r, a); - dst_argb[7] = a; + dst_argb[7] = STATIC_CAST(uint8_t, a); src_argb += 8; dst_argb += 8; } @@ -3347,7 +3398,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); - dst_argb[3] = a; + dst_argb[3] = STATIC_CAST(uint8_t, a); } } #undef ATTENUATE @@ -3419,10 +3470,10 @@ void ARGBUnattenuateRow_C(const uint8_t* src_argb, const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point // Clamping should not be necessary but is free in assembly. - dst_argb[0] = UNATTENUATE(b, ia); - dst_argb[1] = UNATTENUATE(g, ia); - dst_argb[2] = UNATTENUATE(r, ia); - dst_argb[3] = a; + dst_argb[0] = STATIC_CAST(uint8_t, UNATTENUATE(b, ia)); + dst_argb[1] = STATIC_CAST(uint8_t, UNATTENUATE(g, ia)); + dst_argb[2] = STATIC_CAST(uint8_t, UNATTENUATE(r, ia)); + dst_argb[3] = STATIC_CAST(uint8_t, a); src_argb += 4; dst_argb += 4; } @@ -3456,12 +3507,20 @@ void CumulativeSumToAverageRow_C(const int32_t* tl, int i; assert(area != 0); - ooa = 1.0f / area; + ooa = 1.0f / STATIC_CAST(float, area); for (i = 0; i < count; ++i) { - dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); - dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); - dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); - dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); + dst[0] = + (uint8_t)(STATIC_CAST(float, bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * + ooa); + dst[1] = + (uint8_t)(STATIC_CAST(float, bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * + ooa); + dst[2] = + (uint8_t)(STATIC_CAST(float, bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * + ooa); + dst[3] = + (uint8_t)(STATIC_CAST(float, bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * + ooa); dst += 4; tl += 4; bl += 4; @@ -3519,7 +3578,9 @@ static void HalfRow_16To8_C(const uint16_t* src_uv, int width) { int x; for (x = 0; x < width; ++x) { - dst_uv[x] = C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale); + dst_uv[x] = STATIC_CAST( + uint8_t, + C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale)); } } @@ -3545,8 +3606,9 @@ void InterpolateRow_C(uint8_t* dst_ptr, return; } for (x = 0; x < width; ++x) { - dst_ptr[0] = - (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; + dst_ptr[0] = STATIC_CAST( + uint8_t, + (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8); ++src_ptr; ++src_ptr1; ++dst_ptr; @@ -3575,8 +3637,9 @@ void InterpolateRow_16_C(uint16_t* dst_ptr, return; } for (x = 0; x < width; ++x) { - dst_ptr[0] = - (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; + dst_ptr[0] = STATIC_CAST( + uint16_t, + (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8); ++src_ptr; ++src_ptr1; ++dst_ptr; @@ -3613,9 +3676,11 @@ void InterpolateRow_16To8_C(uint8_t* dst_ptr, return; } for (x = 0; x < width; ++x) { - dst_ptr[0] = C16TO8( - (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8, - scale); + dst_ptr[0] = STATIC_CAST( + uint8_t, + C16TO8( + (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8, + scale)); src_ptr += 1; src_ptr1 += 1; dst_ptr += 1; @@ -3727,10 +3792,10 @@ void ARGBPolynomialRow_C(const uint8_t* src_argb, dr += poly[14] * r3; da += poly[15] * a3; - dst_argb[0] = Clamp((int32_t)(db)); - dst_argb[1] = Clamp((int32_t)(dg)); - dst_argb[2] = Clamp((int32_t)(dr)); - dst_argb[3] = Clamp((int32_t)(da)); + dst_argb[0] = STATIC_CAST(uint8_t, Clamp((int32_t)(db))); + dst_argb[1] = STATIC_CAST(uint8_t, Clamp((int32_t)(dg))); + dst_argb[2] = STATIC_CAST(uint8_t, Clamp((int32_t)(dr))); + dst_argb[3] = STATIC_CAST(uint8_t, Clamp((int32_t)(da))); src_argb += 4; dst_argb += 4; } @@ -4302,8 +4367,9 @@ void ScaleSamples_C(const float* src, float* dst, float scale, int width) { void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) { int i; for (i = 0; i < width; ++i) { - *dst++ = - (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8; + *dst++ = STATIC_CAST( + uint16_t, + (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8); ++src; } } @@ -4463,7 +4529,9 @@ void HalfMergeUVRow_C(const uint8_t* src_u, } } +#undef STATIC_CAST + #ifdef __cplusplus } // extern "C" } // namespace libyuv -#endif +#endif
\ No newline at end of file diff --git a/source/row_neon.cc b/source/row_neon.cc index 0c6065f8..0611b9aa 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -720,9 +720,9 @@ void DetileToYUY2_NEON(const uint8_t* src_y, } #endif -void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) { - const uint16_t* src_lower_bits = src; - const uint16_t* src_upper_bits = src + 8; +void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) { + const uint8_t* src_lower_bits = src; + const uint8_t* src_upper_bits = src + 16; asm volatile( "1: \n" "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Load 32 bytes of upper diff --git a/source/row_neon64.cc b/source/row_neon64.cc index e0a4ea19..41289fe9 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -751,9 +751,9 @@ void DetileToYUY2_NEON(const uint8_t* src_y, // Unpack MT2T into tiled P010 64 pixels at a time. See // tinyurl.com/mtk-10bit-video-format for format documentation. -void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) { - const uint16_t* src_lower_bits = src; - const uint16_t* src_upper_bits = src + 8; +void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) { + const uint8_t* src_lower_bits = src; + const uint8_t* src_upper_bits = src + 16; asm volatile( "1: \n" "ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n" diff --git a/source/scale.cc b/source/scale.cc index 16854c45..2a7e308d 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -775,9 +775,9 @@ static void ScaleAddCols2_C(int dst_width, int ix = x >> 16; x += dx; boxwidth = MIN1((x >> 16) - ix); - *dst_ptr++ = - SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >> - 16; + *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) * + scaletbl[boxwidth - minboxwidth] >> + 16); } } @@ -814,7 +814,7 @@ static void ScaleAddCols0_C(int dst_width, (void)dx; src_ptr += (x >> 16); for (i = 0; i < dst_width; ++i) { - *dst_ptr++ = src_ptr[i] * scaleval >> 16; + *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16); } } @@ -829,7 +829,7 @@ static void ScaleAddCols1_C(int dst_width, int i; x >>= 16; for (i = 0; i < dst_width; ++i) { - *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16; + *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16); x += boxwidth; } } diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 07840d65..48c10896 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -58,9 +58,9 @@ static void ScaleARGBDown2(int src_width, assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { - src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; } else { - src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4; } #if defined(HAS_SCALEARGBROWDOWN2_SSE2) @@ -162,7 +162,7 @@ static void ScaleARGBDown4Box(int src_width, uint8_t* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; // Advance to odd row, even column. - src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; (void)src_width; (void)src_height; (void)dx; @@ -214,7 +214,7 @@ static void ScaleARGBDownEven(int src_width, enum FilterMode filtering) { int j; int col_step = dx >> 16; - int row_stride = (dy >> 16) * (int64_t)src_stride; + ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride); void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride, int src_step, uint8_t* dst_argb, int dst_width) = filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; @@ -222,7 +222,7 @@ static void ScaleARGBDownEven(int src_width, (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); - src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 @@ -388,7 +388,7 @@ static void ScaleARGBBilinearDown(int src_width, } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_argb + yi * (int64_t)src_stride; + const uint8_t* src = src_argb + yi * (intptr_t)src_stride; if (filtering == kFilterLinear) { ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); } else { @@ -545,7 +545,7 @@ static void ScaleARGBBilinearUp(int src_width, { int yi = y >> 16; - const uint8_t* src = src_argb + yi * (int64_t)src_stride; + const uint8_t* src = src_argb + yi * (intptr_t)src_stride; // Allocate 2 rows of ARGB. const int row_size = (dst_width * 4 + 31) & ~31; @@ -570,7 +570,7 @@ static void ScaleARGBBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_argb + yi * (int64_t)src_stride; + src = src_argb + yi * (intptr_t)src_stride; } if (yi != lasty) { ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); @@ -793,9 +793,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width, const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. int yi = y >> 16; int uv_yi = yi >> kYShift; - const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y; - const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u; - const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v; + const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y; + const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u; + const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v; // Allocate 2 rows of ARGB. const int row_size = (dst_width * 4 + 31) & ~31; @@ -833,9 +833,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width, y = max_y; yi = y >> 16; uv_yi = yi >> kYShift; - src_row_y = src_y + yi * (int64_t)src_stride_y; - src_row_u = src_u + uv_yi * (int64_t)src_stride_u; - src_row_v = src_v + uv_yi * (int64_t)src_stride_v; + src_row_y = src_y + yi * (intptr_t)src_stride_y; + src_row_u = src_u + uv_yi * (intptr_t)src_stride_u; + src_row_v = src_v + uv_yi * (intptr_t)src_stride_v; } if (yi != lasty) { // TODO(fbarchard): Convert the clipped region of row. @@ -926,7 +926,7 @@ static void ScaleARGBSimple(int src_width, } for (j = 0; j < dst_height; ++j) { - ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride, + ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride, dst_width, x, dx); dst_argb += dst_stride; y += dy; @@ -962,7 +962,7 @@ static void ScaleARGB(const uint8_t* src, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (int64_t)src_stride; + src = src + (src_height - 1) * (intptr_t)src_stride; src_stride = -src_stride; } ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -977,7 +977,7 @@ static void ScaleARGB(const uint8_t* src, if (clip_y) { int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); - src += (clipf >> 16) * (int64_t)src_stride; + src += (clipf >> 16) * (intptr_t)src_stride; dst += clip_y * dst_stride; } @@ -1011,7 +1011,7 @@ static void ScaleARGB(const uint8_t* src, filtering = kFilterNone; if (dx == 0x10000 && dy == 0x10000) { // Straight copy. - ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4, + ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4, src_stride, dst, dst_stride, clip_width, clip_height); return; } diff --git a/source/scale_uv.cc b/source/scale_uv.cc index 8bd6b586..ecda769e 100644 --- a/source/scale_uv.cc +++ b/source/scale_uv.cc @@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width, assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { - src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; } else { - src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2; } #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3) @@ -200,7 +200,7 @@ static void ScaleUVDown4Box(int src_width, uint8_t* dst_uv, int dst_width) = ScaleUVRowDown2Box_C; // Advance to odd row, even column. - src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; (void)src_width; (void)src_height; (void)dx; @@ -263,7 +263,7 @@ static void ScaleUVDownEven(int src_width, enum FilterMode filtering) { int j; int col_step = dx >> 16; - int row_stride = (dy >> 16) * (int64_t)src_stride; + ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride); void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride, int src_step, uint8_t* dst_uv, int dst_width) = filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C; @@ -271,7 +271,7 @@ static void ScaleUVDownEven(int src_width, (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); - src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3 @@ -429,7 +429,7 @@ static void ScaleUVBilinearDown(int src_width, } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_uv + yi * (int64_t)src_stride; + const uint8_t* src = src_uv + yi * (intptr_t)src_stride; if (filtering == kFilterLinear) { ScaleUVFilterCols(dst_uv, src, dst_width, x, dx); } else { @@ -571,7 +571,7 @@ static void ScaleUVBilinearUp(int src_width, { int yi = y >> 16; - const uint8_t* src = src_uv + yi * (int64_t)src_stride; + const uint8_t* src = src_uv + yi * (intptr_t)src_stride; // Allocate 2 rows of UV. const int row_size = (dst_width * 2 + 15) & ~15; @@ -596,7 +596,7 @@ static void ScaleUVBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_uv + yi * (int64_t)src_stride; + src = src_uv + yi * (intptr_t)src_stride; } if (yi != lasty) { ScaleUVFilterCols(rowptr, src, dst_width, x, dx); @@ -663,13 +663,13 @@ void ScaleUVLinearUp2(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv, + ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width); dst_uv += dst_stride; y += dy; } @@ -770,13 +770,13 @@ void ScaleUVLinearUp2_16(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv, + ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width); dst_uv += dst_stride; y += dy; } @@ -889,7 +889,7 @@ static void ScaleUVSimple(int src_width, } for (j = 0; j < dst_height; ++j) { - ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x, + ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x, dx); dst_uv += dst_stride; y += dy; @@ -910,7 +910,7 @@ static int UVCopy(const uint8_t* src_uv, // Negative height means invert the image. if (height < 0) { height = -height; - src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv; + src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv; src_stride_uv = -src_stride_uv; } @@ -930,7 +930,7 @@ static int UVCopy_16(const uint16_t* src_uv, // Negative height means invert the image. if (height < 0) { height = -height; - src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv; + src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv; src_stride_uv = -src_stride_uv; } @@ -968,7 +968,7 @@ static void ScaleUV(const uint8_t* src, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (int64_t)src_stride; + src = src + (src_height - 1) * (intptr_t)src_stride; src_stride = -src_stride; } ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -983,7 +983,7 @@ static void ScaleUV(const uint8_t* src, if (clip_y) { int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); - src += (clipf >> 16) * (int64_t)src_stride; + src += (clipf >> 16) * (intptr_t)src_stride; dst += clip_y * dst_stride; } @@ -1024,7 +1024,7 @@ static void ScaleUV(const uint8_t* src, #ifdef HAS_UVCOPY if (dx == 0x10000 && dy == 0x10000) { // Straight copy. - UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2, + UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2, src_stride, dst, dst_stride, clip_width, clip_height); return; } @@ -1118,7 +1118,7 @@ int UVScale_16(const uint16_t* src_uv, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv; + src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv; src_stride_uv = -src_stride_uv; } src_width = Abs(src_width); @@ -1126,13 +1126,13 @@ int UVScale_16(const uint16_t* src_uv, #ifdef HAS_UVCOPY if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) { if (dst_height == 1) { - UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv, + UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv, src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height); } else { dy = src_height / dst_height; - UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv, - dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width, - dst_height); + UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv, + (int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv, + dst_width, dst_height); } return 0; diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index ad34cec7..89ff99ba 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -417,151 +417,136 @@ TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10) TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12) TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) -#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ - DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ - static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ - static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ - "SRC_SUBSAMP_X unsupported"); \ - static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ - "SRC_SUBSAMP_Y unsupported"); \ - static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ - "DST_SUBSAMP_X unsupported"); \ - static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ - "DST_SUBSAMP_Y unsupported"); \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ - const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ - const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ - const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ - const int kPaddedHeight = \ - (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ - const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ - const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ - align_buffer_page_end( \ - src_uv, \ - 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_c, \ - 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_opt, \ - 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ - SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ - for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \ - src_y_p[i] = \ - (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ - } \ - for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \ - src_uv_p[i] = \ - (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ - } \ - memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - MaskCpuFlags(disable_cpu_flags_); \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \ - DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \ - NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \ - DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \ - NEG kHeight); \ - } \ - if (DOY) { \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - } \ - } \ - } \ - for (int i = 0; i < kDstHalfHeight; ++i) { \ - for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \ - EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \ - dst_uv_opt[i * 2 * kDstHalfWidth + j]); \ - } \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_uv_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_uv_opt); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_uv); \ +#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \ + TILE_WIDTH, TILE_HEIGHT) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ + const int kPaddedHeight = \ + (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ + const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ + align_buffer_page_end( \ + src_uv, \ + 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_c, \ + 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_opt, \ + 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ + SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ + for (int i = 0; \ + i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \ + ++i) { \ + src_y_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \ + SRC_BPC / (int)sizeof(SRC_T); \ + ++i) { \ + src_uv_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ + DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ + DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \ + NEG kHeight); \ + } \ + if (DOY) { \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + } \ + for (int i = 0; i < kDstHalfHeight; ++i) { \ + for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \ + EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \ + dst_uv_opt[i * 2 * kDstHalfWidth + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ } -#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) - -TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1) -TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1) -TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) - -// TODO (greenjustin): Test all variants. -TESTBIPLANARTOBPI(MT2T, - uint16_t, - 2, - 2, - 2, - P010, - uint16_t, - 2, - 2, - 2, - benchmark_width_, - _Opt, - +, - 0, - 1, - 10, - 16, - 32) - -#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ +#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) + +TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1) +TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1) +TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) +TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32) + +#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ @@ -641,30 +626,28 @@ TESTBIPLANARTOBPI(MT2T, free_aligned_buffer_page_end(src_uv); \ } -#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) - -TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) +#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) + +TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) // Provide matrix wrappers for full range bt.709 #define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \ @@ -1089,8 +1072,8 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) #endif -#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \ - BPP_B, W1280, N, NEG, OFF) \ +#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ const int kWidth = W1280; \ const int kHeight = benchmark_height_; \ @@ -1143,15 +1126,15 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) free_aligned_buffer_page_end(dst_argb32_opt); \ } -#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Unaligned, +, 2) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Invert, -, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Opt, +, 0) +#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Invert, -, 0) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Opt, +, 0) #define JNV12ToARGB(a, b, c, d, e, f, g, h) \ NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) @@ -1172,29 +1155,29 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) #define JNV12ToRGB565(a, b, c, d, e, f, g, h) \ NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) -TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3) -TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3) +TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3) +TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3) #ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2) +TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2) #endif -TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3) -TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3) -TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3) +TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(NV12, 2, 2, RAW, RAW, 3) +TESTBPTOB(NV21, 2, 2, RAW, RAW, 3) +TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3) #ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2) +TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2) #endif #define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ @@ -1289,8 +1272,8 @@ TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1) TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2) TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) -#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \ - SUBSAMP_Y, W1280, N, NEG, OFF) \ +#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ const int kWidth = W1280; \ const int kHeight = benchmark_height_; \ @@ -1336,25 +1319,25 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) free_aligned_buffer_page_end(src_argb); \ } -#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 2) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) - -TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2) -TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2) -TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2) -TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2) -TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2) -TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2) -TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) -TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2) -TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2) +#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTATOBP(ARGB, 1, 4, NV12, 2, 2) +TESTATOBP(ARGB, 1, 4, NV21, 2, 2) +TESTATOBP(ABGR, 1, 4, NV12, 2, 2) +TESTATOBP(ABGR, 1, 4, NV21, 2, 2) +TESTATOBP(RAW, 1, 3, JNV21, 2, 2) +TESTATOBP(YUY2, 2, 4, NV12, 2, 2) +TESTATOBP(UYVY, 2, 4, NV12, 2, 2) +TESTATOBP(AYUV, 1, 4, NV12, 2, 2) +TESTATOBP(AYUV, 1, 4, NV21, 2, 2) #define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ @@ -3935,8 +3918,8 @@ TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10) TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) #endif // DISABLE_SLOW_TESTS -#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \ +#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ const int kWidth = W1280; \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ @@ -3979,16 +3962,16 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) free_aligned_buffer_page_end(dst_argb_opt); \ } -#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - ALIGN, YALIGN, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH) +#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Opt, +, 0, 0, S_DEPTH) #define P010ToARGB(a, b, c, d, e, f, g, h) \ P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) @@ -4031,23 +4014,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) kFilterBilinear) #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) -TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10) +TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10) +TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12) +TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12) +TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16) +TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16) +TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10) #ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10) +TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10) +TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12) +TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12) +TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16) +TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16) +TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10) #endif // LITTLE_ENDIAN_ONLY_TEST #endif // DISABLE_SLOW_TESTS |