diff options
author | Frank Barchard <fbarchard@google.com> | 2022-09-16 11:12:39 -0700 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2022-09-16 19:46:47 +0000 |
commit | f71c83552d373f0ff41833b17e2880632d8561d7 (patch) | |
tree | 09088188086a6b03d07a5ebaa8edf01658466ad8 | |
parent | 3e38ce50589d9319badc0501f96d6c5b2b177472 (diff) | |
download | libyuv-f71c83552d373f0ff41833b17e2880632d8561d7.tar.gz |
I420ToRGB24MatrixFilter function added
- Implemented as 3 steps: Upsample UV to 4:4:4, I444ToARGB, ARGBToRGB24
- Fix some build warnings for missing prototypes.
Pixel 4
I420ToRGB24_Opt (743 ms)
I420ToRGB24Filter_Opt (1331 ms)
Windows with skylake xeon:
x86 32 bit
I420ToRGB24_Opt (387 ms)
I420ToRGB24Filter_Opt (571 ms)
x64 64 bit
I420ToRGB24_Opt (384 ms)
I420ToRGB24Filter_Opt (582 ms)
Bug: libyuv:938, libyuv:830
Change-Id: Ie27f70816ec084437014f8a1c630ae011ee2348c
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3900298
Reviewed-by: Wan-Teh Chang <wtc@google.com>
-rw-r--r-- | README.chromium | 2 | ||||
-rw-r--r-- | include/libyuv/convert_argb.h | 15 | ||||
-rw-r--r-- | include/libyuv/row.h | 26 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/convert.cc | 72 | ||||
-rw-r--r-- | source/convert_argb.cc | 387 | ||||
-rw-r--r-- | source/convert_from_argb.cc | 14 | ||||
-rw-r--r-- | source/cpu_id.cc | 2 | ||||
-rw-r--r-- | source/planar_functions.cc | 124 | ||||
-rw-r--r-- | source/rotate_argb.cc | 3 | ||||
-rw-r--r-- | source/row_common.cc | 10 | ||||
-rw-r--r-- | source/row_gcc.cc | 26 | ||||
-rw-r--r-- | source/row_neon.cc | 54 | ||||
-rw-r--r-- | source/row_neon64.cc | 46 | ||||
-rw-r--r-- | source/scale.cc | 12 | ||||
-rw-r--r-- | source/scale_argb.cc | 20 | ||||
-rw-r--r-- | source/scale_uv.cc | 14 | ||||
-rw-r--r-- | unit_test/convert_test.cc | 8 | ||||
-rw-r--r-- | util/yuvconvert.cc | 10 |
19 files changed, 549 insertions, 298 deletions
diff --git a/README.chromium b/README.chromium index 1e46d348..681e9cc0 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1840 +Version: 1841 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index 8c92a9c6..4025050a 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -1975,6 +1975,21 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y, int height, enum FilterMode filter); +// Convert I420 to RGB24 with matrix and UV filter mode. +LIBYUV_API +int I420ToRGB24MatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + // Convert I010 to AR30 with matrix and UV filter mode. LIBYUV_API int I010ToAR30MatrixFilter(const uint16_t* src_y, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 70d789e2..1c14ef3b 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -1824,6 +1824,11 @@ void RGBAToUVRow_C(const uint8_t* src_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); +void RGBAToUVJRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void RGB24ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, @@ -2044,11 +2049,11 @@ void DetileSplitUVRow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_v, int width); void DetileToYUY2_C(const uint8_t* src_y, - ptrdiff_t src_y_tile_stride, - const uint8_t* src_uv, - ptrdiff_t src_uv_tile_stride, - uint8_t* dst_yuy2, - int width); + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width); void DetileToYUY2_SSE2(const uint8_t* src_y, ptrdiff_t src_y_tile_stride, const uint8_t* src_uv, @@ -5608,6 +5613,17 @@ void GaussCol_F32_C(const float* src0, float* dst, int width); +void GaussRow_C(const uint32_t* src, uint16_t* dst, int width); +void GaussCol_C(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, + int width); + +void ClampFloatToZero_SSE2(const float* src_x, float* dst_y, int width); + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 02b66d4e..afe00d4b 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1840 +#define LIBYUV_VERSION 1841 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index a740d5ca..8b745e7d 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -727,7 +727,7 @@ int MM21ToYUY2(const uint8_t* src_y, } DetileToYUY2(src_y, src_stride_y, src_uv, src_stride_uv, dst_yuy2, - dst_stride_yuy2, width, height, 32); + dst_stride_yuy2, width, height, 32); return 0; } @@ -2054,8 +2054,8 @@ int RGB24ToI420(const uint8_t* src_rgb24, { #if !defined(HAS_RGB24TOYROW) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); #endif for (y = 0; y < height - 1; y += 2) { @@ -2065,10 +2065,10 @@ int RGB24ToI420(const uint8_t* src_rgb24, RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); #else RGB24ToARGBRow(src_rgb24, row, width); - RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); #endif src_rgb24 += src_stride_rgb24 * 2; dst_y += dst_stride_y * 2; @@ -2208,8 +2208,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24, { #if !defined(HAS_RGB24TOYJROW) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); #endif for (y = 0; y < height - 1; y += 2) { @@ -2219,10 +2219,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24, RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); #else RGB24ToARGBRow(src_rgb24, row, width); - RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); - ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width); + RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width); + ARGBToUVJRow(row, row_size, dst_u, dst_v, width); ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); #endif src_rgb24 += src_stride_rgb24 * 2; dst_y += dst_stride_y * 2; @@ -2382,8 +2382,8 @@ int RAWToI420(const uint8_t* src_raw, { #if !defined(HAS_RAWTOYROW) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); #endif for (y = 0; y < height - 1; y += 2) { @@ -2393,10 +2393,10 @@ int RAWToI420(const uint8_t* src_raw, RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); #else RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); #endif src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; @@ -2536,8 +2536,8 @@ int RAWToJ420(const uint8_t* src_raw, { #if !defined(HAS_RAWTOYJROW) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); #endif for (y = 0; y < height - 1; y += 2) { @@ -2547,10 +2547,10 @@ int RAWToJ420(const uint8_t* src_raw, RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); #else RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); - ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width); + RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); + ARGBToUVJRow(row, row_size, dst_u, dst_v, width); ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); #endif src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; @@ -2714,8 +2714,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); #endif for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ @@ -2725,10 +2725,10 @@ int RGB565ToI420(const uint8_t* src_rgb565, RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); #else RGB565ToARGBRow(src_rgb565, row, width); - RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + row_size, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); #endif src_rgb565 += src_stride_rgb565 * 2; dst_y += dst_stride_y * 2; @@ -2894,8 +2894,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); #endif for (y = 0; y < height - 1; y += 2) { @@ -2907,11 +2907,11 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, width); #else ARGB1555ToARGBRow(src_argb1555, row, width); - ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize, + ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + row_size, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); #endif src_argb1555 += src_stride_argb1555 * 2; dst_y += dst_stride_y * 2; @@ -3089,8 +3089,8 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, { #if !(defined(HAS_ARGB4444TOYROW_NEON)) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); #endif for (y = 0; y < height - 1; y += 2) { @@ -3101,11 +3101,11 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, width); #else ARGB4444ToARGBRow(src_argb4444, row, width); - ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize, + ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + row_size, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); #endif src_argb4444 += src_stride_argb4444 * 2; dst_y += dst_stride_y * 2; diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 1ebb107a..5500fcb2 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -7,8 +7,10 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ + #include "libyuv/convert_argb.h" +#include "libyuv/convert_from_argb.h" #include "libyuv/cpu_id.h" #ifdef HAVE_JPEG #include "libyuv/mjpeg_decoder.h" @@ -5497,22 +5499,22 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, #endif // alloc 4 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 4); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 4); uint8_t* temp_u_1 = row; - uint8_t* temp_u_2 = row + kRowSize; - uint8_t* temp_v_1 = row + kRowSize * 2; - uint8_t* temp_v_2 = row + kRowSize * 3; + uint8_t* temp_u_2 = row + row_size; + uint8_t* temp_v_1 = row + row_size * 2; + uint8_t* temp_v_2 = row + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -5524,8 +5526,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); } @@ -5622,10 +5624,10 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, #endif // alloc 2 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2); uint8_t* temp_u = row; - uint8_t* temp_v = row + kRowSize; + uint8_t* temp_v = row + row_size; for (y = 0; y < height; ++y) { ScaleRowUp(src_u, temp_u, width); @@ -5641,6 +5643,188 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, return 0; } +static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToARGBRow_C; + void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = + ARGBToRGB24Row_C; + void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = + ScaleRowUp2_Bilinear_Any_C; + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } +#if defined(HAS_I444TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444ToARGBRow = I444ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I444ToARGBRow = I444ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I444TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444ToARGBRow = I444ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I444ToARGBRow = I444ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_I444TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444ToARGBRow = I444ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444ToARGBRow = I444ToARGBRow_NEON; + } + } +#endif +#if defined(HAS_I444TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I444ToARGBRow = I444ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I444ToARGBRow = I444ToARGBRow_MSA; + } + } +#endif +#if defined(HAS_I444TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I444ToARGBRow = I444ToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + I444ToARGBRow = I444ToARGBRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToRGB24Row = ARGBToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToRGB24Row = ARGBToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) + if (TestCpuFlag(kCpuHasAVX512VBMI)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX512VBMI; + if (IS_ALIGNED(width, 32)) { + ARGBToRGB24Row = ARGBToRGB24Row_AVX512VBMI; + } + } +#endif +#if defined(HAS_ARGBTORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToRGB24Row = ARGBToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_ARGBTORGB24ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToRGB24Row = ARGBToRGB24Row_MSA; + } + } +#endif +#if defined(HAS_ARGBTORGB24ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToRGB24Row = ARGBToRGB24Row_LASX; + } + } +#endif +// TODO: Fix HAS macros to match function names +#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + } +#endif + + // alloc 4 lines temp + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 8); + uint8_t* temp_u_1 = row; + uint8_t* temp_u_2 = row + row_size; + uint8_t* temp_v_1 = row + row_size * 2; + uint8_t* temp_v_2 = row + row_size * 3; + uint8_t* temp_argb = row + row_size * 4; + + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width); + ARGBToRGB24Row(temp_argb, dst_rgb24, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + + for (y = 0; y < height - 2; y += 2) { + Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); + I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width); + ARGBToRGB24Row(temp_argb, dst_rgb24, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + I444ToARGBRow(src_y, temp_u_2, temp_v_2, temp_argb, yuvconstants, width); + ARGBToRGB24Row(temp_argb, dst_rgb24, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + + if (!(height & 1)) { + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width); + ARGBToRGB24Row(temp_argb, dst_rgb24, width); + } + + free_aligned_buffer_64(row); + return 0; +} + static int I010ToAR30MatrixBilinear(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, @@ -5705,22 +5889,22 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, #endif // alloc 4 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 4 * sizeof(uint16_t)); uint16_t* temp_u_1 = (uint16_t*)(row); - uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; - uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; - uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; + uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; + uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; + uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -5732,8 +5916,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); } @@ -5803,10 +5987,10 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, #endif // alloc 2 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); - uint16_t* temp_v = (uint16_t*)(row) + kRowSize; + uint16_t* temp_v = (uint16_t*)(row) + row_size; for (y = 0; y < height; ++y) { ScaleRowUp(src_u, temp_u, width); @@ -5885,22 +6069,22 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, #endif // alloc 4 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 4 * sizeof(uint16_t)); uint16_t* temp_u_1 = (uint16_t*)(row); - uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; - uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; - uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; + uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; + uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; + uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -5912,8 +6096,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); } @@ -5982,10 +6166,10 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, #endif // alloc 2 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); - uint16_t* temp_v = (uint16_t*)(row) + kRowSize; + uint16_t* temp_v = (uint16_t*)(row) + row_size; for (y = 0; y < height; ++y) { ScaleRowUp(src_u, temp_u, width); @@ -6134,15 +6318,15 @@ static int I420AlphaToARGBMatrixBilinear( #endif // alloc 4 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 4); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 4); uint8_t* temp_u_1 = row; - uint8_t* temp_u_2 = row + kRowSize; - uint8_t* temp_v_1 = row + kRowSize * 2; - uint8_t* temp_v_2 = row + kRowSize * 3; + uint8_t* temp_u_2 = row + row_size; + uint8_t* temp_v_1 = row + row_size * 2; + uint8_t* temp_v_2 = row + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6153,8 +6337,8 @@ static int I420AlphaToARGBMatrixBilinear( src_a += src_stride_a; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6176,8 +6360,8 @@ static int I420AlphaToARGBMatrixBilinear( } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6317,10 +6501,10 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, #endif // alloc 2 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2); uint8_t* temp_u = row; - uint8_t* temp_v = row + kRowSize; + uint8_t* temp_v = row + row_size; for (y = 0; y < height; ++y) { ScaleRowUp(src_u, temp_u, width); @@ -6445,15 +6629,15 @@ static int I010AlphaToARGBMatrixBilinear( #endif // alloc 4 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 4 * sizeof(uint16_t)); uint16_t* temp_u_1 = (uint16_t*)(row); - uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; - uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; - uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; + uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; + uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; + uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6464,8 +6648,8 @@ static int I010AlphaToARGBMatrixBilinear( src_a += src_stride_a; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6487,8 +6671,8 @@ static int I010AlphaToARGBMatrixBilinear( } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + Scale2RowUp(src_u, 0, temp_u_1, row_size, width); + Scale2RowUp(src_v, 0, temp_v_1, row_size, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6600,10 +6784,10 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, #endif // alloc 2 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); - uint16_t* temp_v = (uint16_t*)(row) + kRowSize; + uint16_t* temp_v = (uint16_t*)(row) + row_size; for (y = 0; y < height; ++y) { ScaleRowUp(src_u, temp_u, width); @@ -6684,18 +6868,18 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, #endif // alloc 2 lines temp - const int kRowSize = (2 * width + 31) & ~31; - align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + const int row_size = (2 * width + 31) & ~31; + align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_uv_1 = (uint16_t*)(row); - uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize; + uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; - Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width); + Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6706,7 +6890,7 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); } @@ -6773,8 +6957,8 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, } #endif - const int kRowSize = (2 * width + 31) & ~31; - align_buffer_64(row, kRowSize * sizeof(uint16_t)); + const int row_size = (2 * width + 31) & ~31; + align_buffer_64(row, row_size * sizeof(uint16_t)); uint16_t* temp_uv = (uint16_t*)(row); for (y = 0; y < height; ++y) { @@ -6850,18 +7034,18 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, #endif // alloc 2 lines temp - const int kRowSize = (2 * width + 31) & ~31; - align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + const int row_size = (2 * width + 31) & ~31; + align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_uv_1 = (uint16_t*)(row); - uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize; + uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; - Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width); + Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -6872,7 +7056,7 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); } @@ -6939,8 +7123,8 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, } #endif - const int kRowSize = (2 * width + 31) & ~31; - align_buffer_64(row, kRowSize * sizeof(uint16_t)); + const int row_size = (2 * width + 31) & ~31; + align_buffer_64(row, row_size * sizeof(uint16_t)); uint16_t* temp_uv = (uint16_t*)(row); for (y = 0; y < height; ++y) { @@ -7016,6 +7200,37 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y, } LIBYUV_API +int I420ToRGB24MatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter) { + switch (filter) { + case kFilterNone: + return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + yuvconstants, width, height); + case kFilterBilinear: + case kFilterBox: + return I420ToRGB24MatrixBilinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_rgb24, dst_stride_rgb24, yuvconstants, width, height); + case kFilterLinear: + // TODO: Implement Linear using Bilinear with Scale2RowUp stride 0 + return -1; + } + + return -1; +} + +LIBYUV_API int I010ToAR30MatrixFilter(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index 2f38a488..f7eab0c6 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -1866,7 +1866,7 @@ int ARGBToJ420(const uint8_t* src_argb, int height) { int y; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_uj, uint8_t* dst_vj, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; @@ -2238,7 +2238,7 @@ int ABGRToJ420(const uint8_t* src_abgr, int height) { int y; void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, - uint8_t* dst_uj, uint8_t* dst_vj, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = ABGRToUVJRow_C; void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = ABGRToYJRow_C; @@ -2804,8 +2804,8 @@ int RAWToJNV21(const uint8_t* src_raw, uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31); #if !defined(HAS_RAWTOYJROW) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); #endif for (y = 0; y < height - 1; y += 2) { @@ -2816,11 +2816,11 @@ int RAWToJNV21(const uint8_t* src_raw, RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); #else RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); - ARGBToUVJRow(row, kRowSize, row_uj, row_vj, width); + RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); + ARGBToUVJRow(row, row_size, row_uj, row_vj, width); MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); #endif src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 56fe60e4..7467c5d9 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -115,7 +115,7 @@ void CpuId(int eax, int ecx, int* cpu_info) { defined(__x86_64__)) && \ !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. -int GetXCR0() { +static int GetXCR0() { int xcr0 = 0; #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 1de71dbb..45c34d30 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1035,20 +1035,20 @@ void DetileSplitUVPlane(const uint8_t* src_uv, LIBYUV_API void DetileToYUY2(const uint8_t* src_y, - int src_stride_y, - const uint8_t* src_uv, - int src_stride_uv, - uint8_t* dst_yuy2, - int dst_stride_yuy2, - int width, - int height, - int tile_height) { + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height, + int tile_height) { const ptrdiff_t src_y_tile_stride = 16 * tile_height; const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2; int y; void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride, - const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride, - uint8_t* dst_yuy2, int width) = DetileToYUY2_C; + const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, int width) = DetileToYUY2_C; assert(src_stride_y >= 0); assert(src_stride_y > 0); assert(src_stride_uv >= 0); @@ -1085,8 +1085,8 @@ void DetileToYUY2(const uint8_t* src_y, // Detile plane for (y = 0; y < height; ++y) { - DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, - dst_yuy2, width); + DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, + width); dst_yuy2 += dst_stride_yuy2; src_y += 16; @@ -1224,18 +1224,18 @@ void MergeRGBPlane(const uint8_t* src_r, } LIBYUV_NOINLINE -void SplitARGBPlaneAlpha(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_r, - int dst_stride_r, - uint8_t* dst_g, - int dst_stride_g, - uint8_t* dst_b, - int dst_stride_b, - uint8_t* dst_a, - int dst_stride_a, - int width, - int height) { +static void SplitARGBPlaneAlpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height) { int y; void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, uint8_t* dst_a, int width) = @@ -1295,16 +1295,16 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb, } LIBYUV_NOINLINE -void SplitARGBPlaneOpaque(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_r, - int dst_stride_r, - uint8_t* dst_g, - int dst_stride_g, - uint8_t* dst_b, - int dst_stride_b, - int width, - int height) { +static void SplitARGBPlaneOpaque(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height) { int y; void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) = SplitXRGBRow_C; @@ -1396,18 +1396,18 @@ void SplitARGBPlane(const uint8_t* src_argb, } LIBYUV_NOINLINE -void MergeARGBPlaneAlpha(const uint8_t* src_r, - int src_stride_r, - const uint8_t* src_g, - int src_stride_g, - const uint8_t* src_b, - int src_stride_b, - const uint8_t* src_a, - int src_stride_a, - uint8_t* dst_argb, - int dst_stride_argb, - int width, - int height) { +static void MergeARGBPlaneAlpha(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, const uint8_t* src_a, @@ -1458,16 +1458,16 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r, } LIBYUV_NOINLINE -void MergeARGBPlaneOpaque(const uint8_t* src_r, - int src_stride_r, - const uint8_t* src_g, - int src_stride_g, - const uint8_t* src_b, - int src_stride_b, - uint8_t* dst_argb, - int dst_stride_argb, - int width, - int height) { +static void MergeARGBPlaneOpaque(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_argb, int width) = @@ -4545,16 +4545,16 @@ static int ARGBSobelize(const uint8_t* src_argb, #endif { // 3 rows with edges before/after. - const int kRowSize = (width + kEdge + 31) & ~31; - align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); + const int row_size = (width + kEdge + 31) & ~31; + align_buffer_64(rows, row_size * 2 + (kEdge + row_size * 3 + kEdge)); uint8_t* row_sobelx = rows; - uint8_t* row_sobely = rows + kRowSize; - uint8_t* row_y = rows + kRowSize * 2; + uint8_t* row_sobely = rows + row_size; + uint8_t* row_y = rows + row_size * 2; // Convert first row. uint8_t* row_y0 = row_y + kEdge; - uint8_t* row_y1 = row_y0 + kRowSize; - uint8_t* row_y2 = row_y1 + kRowSize; + uint8_t* row_y1 = row_y0 + row_size; + uint8_t* row_y2 = row_y1 + row_size; ARGBToYJRow(src_argb, row_y0, width); row_y0[-1] = row_y0[0]; memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc index 539cf98d..28226210 100644 --- a/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -8,11 +8,12 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/rotate.h" +#include "libyuv/rotate_argb.h" #include "libyuv/convert.h" #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" +#include "libyuv/rotate.h" #include "libyuv/row.h" #include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */ diff --git a/source/row_common.cc b/source/row_common.cc index 9d94ab28..4e1141f7 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2749,11 +2749,11 @@ void DetileSplitUVRow_C(const uint8_t* src_uv, } void DetileToYUY2_C(const uint8_t* src_y, - ptrdiff_t src_y_tile_stride, - const uint8_t* src_uv, - ptrdiff_t src_uv_tile_stride, - uint8_t* dst_yuy2, - int width) { + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { for (int x = 0; x < width - 15; x += 16) { for (int i = 0; i < 8; i++) { dst_yuy2[0] = src_y[0]; diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 8d0f477c..3bda4482 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -4977,19 +4977,19 @@ void DetileToYUY2_SSE2(const uint8_t* src_y, uint8_t* dst_yuy2, int width) { asm volatile( - "1: \n" - "movdqu (%0),%%xmm0 \n" // Load 16 Y - "sub $0x10,%3 \n" - "lea (%0,%4),%0 \n" - "movdqu (%1),%%xmm1 \n" // Load 8 UV - "lea (%1,%5),%1 \n" - "movdqu %%xmm0,%%xmm2 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm2 \n" - "movdqu %%xmm0,(%2) \n" - "movdqu %%xmm2,0x10(%2) \n" - "lea 0x20(%2),%2 \n" - "jg 1b \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" // Load 16 Y + "sub $0x10,%3 \n" + "lea (%0,%4),%0 \n" + "movdqu (%1),%%xmm1 \n" // Load 8 UV + "lea (%1,%5),%1 \n" + "movdqu %%xmm0,%%xmm2 \n" + "punpcklbw %%xmm1,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm2 \n" + "movdqu %%xmm0,(%2) \n" + "movdqu %%xmm2,0x10(%2) \n" + "lea 0x20(%2),%2 \n" + "jg 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_yuy2), // %2 diff --git a/source/row_neon.cc b/source/row_neon.cc index 82039e9f..3f5c5de1 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -625,20 +625,20 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv, #if LIBYUV_USE_ST2 // Read 16 Y, 8 UV, and write 8 YUYV. void DetileToYUY2_NEON(const uint8_t* src_y, - ptrdiff_t src_y_tile_stride, - const uint8_t* src_uv, - ptrdiff_t src_uv_tile_stride, - uint8_t* dst_yuy2, - int width) { + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { asm volatile( - "1: \n" - "vld1.8 q0, [%0], %4 \n" // Load 16 Y - "pld [%0, 1792] \n" - "vld1.8 q1, [%1], %5 \n" // Load 8 UV - "pld [%1, 1792] \n" - "subs %3, %3, #16 \n" - "vst2.8 {q0, q1}, [%2]! \n" - "bgt 1b \n" + "1: \n" + "vld1.8 q0, [%0], %4 \n" // Load 16 Y + "pld [%0, 1792] \n" + "vld1.8 q1, [%1], %5 \n" // Load 8 UV + "pld [%1, 1792] \n" + "subs %3, %3, #16 \n" + "vst2.8 {q0, q1}, [%2]! \n" + "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_yuy2), // %2 @@ -651,21 +651,21 @@ void DetileToYUY2_NEON(const uint8_t* src_y, #else // Read 16 Y, 8 UV, and write 8 YUYV. void DetileToYUY2_NEON(const uint8_t* src_y, - ptrdiff_t src_y_tile_stride, - const uint8_t* src_uv, - ptrdiff_t src_uv_tile_stride, - uint8_t* dst_yuy2, - int width) { + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { asm volatile( - "1: \n" - "vld1.8 q0, [%0], %4 \n" // Load 16 Y - "vld1.8 q1, [%1], %5 \n" // Load 8 UV - "subs %3, %3, #16 \n" - "pld [%0, 1792] \n" - "vzip.8 q0, q1 \n" - "pld [%1, 1792] \n" - "vst1.8 {q0, q1}, [%2]! \n" - "bgt 1b \n" + "1: \n" + "vld1.8 q0, [%0], %4 \n" // Load 16 Y + "vld1.8 q1, [%1], %5 \n" // Load 8 UV + "subs %3, %3, #16 \n" + "pld [%0, 1792] \n" + "vzip.8 q0, q1 \n" + "pld [%1, 1792] \n" + "vst1.8 {q0, q1}, [%2]! \n" + "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_yuy2), // %2 diff --git a/source/row_neon64.cc b/source/row_neon64.cc index e166ce04..37962378 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -653,11 +653,11 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv, #if LIBYUV_USE_ST2 // Read 16 Y, 8 UV, and write 8 YUY2 void DetileToYUY2_NEON(const uint8_t* src_y, - ptrdiff_t src_y_tile_stride, - const uint8_t* src_uv, - ptrdiff_t src_uv_tile_stride, - uint8_t* dst_yuy2, - int width) { + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { asm volatile( "1: \n" "ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys @@ -667,23 +667,23 @@ void DetileToYUY2_NEON(const uint8_t* src_y, "subs %w3, %w3, #16 \n" // store 8 YUY2 "st2 {v0.16b,v1.16b}, [%2], #32 \n" "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_yuy2), // %2 - "+r"(width) // %3 - : "r"(src_y_tile_stride), // %4 - "r"(src_uv_tile_stride) // %5 + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_yuy2), // %2 + "+r"(width) // %3 + : "r"(src_y_tile_stride), // %4 + "r"(src_uv_tile_stride) // %5 : "cc", "memory", "v0", "v1" // Clobber list ); } #else // Read 16 Y, 8 UV, and write 8 YUY2 void DetileToYUY2_NEON(const uint8_t* src_y, - ptrdiff_t src_y_tile_stride, - const uint8_t* src_uv, - ptrdiff_t src_uv_tile_stride, - uint8_t* dst_yuy2, - int width) { + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { asm volatile( "1: \n" "ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys @@ -694,13 +694,13 @@ void DetileToYUY2_NEON(const uint8_t* src_y, "prfm pldl1keep, [%1, 1792] \n" "zip2 v3.16b, v0.16b, v1.16b \n" "st1 {v2.16b,v3.16b}, [%2], #32 \n" // store 8 YUY2 - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_yuy2), // %2 - "+r"(width) // %3 - : "r"(src_y_tile_stride), // %4 - "r"(src_uv_tile_stride) // %5 + "b.gt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_yuy2), // %2 + "+r"(width) // %3 + : "r"(src_y_tile_stride), // %4 + "r"(src_uv_tile_stride) // %5 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber list ); } diff --git a/source/scale.cc b/source/scale.cc index e1335f1e..16854c45 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -1315,11 +1315,11 @@ void ScalePlaneBilinearUp(int src_width, const uint8_t* src = src_ptr + yi * (int64_t)src_stride; // Allocate 2 row buffers. - const int kRowSize = (dst_width + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (dst_width + 31) & ~31; + align_buffer_64(row, row_size * 2); uint8_t* rowptr = row; - int rowstride = kRowSize; + int rowstride = row_size; int lasty = yi; ScaleFilterCols(rowptr, src, dst_width, x, dx); @@ -1766,11 +1766,11 @@ void ScalePlaneBilinearUp_16(int src_width, const uint16_t* src = src_ptr + yi * (int64_t)src_stride; // Allocate 2 row buffers. - const int kRowSize = (dst_width + 31) & ~31; - align_buffer_64(row, kRowSize * 4); + const int row_size = (dst_width + 31) & ~31; + align_buffer_64(row, row_size * 4); uint16_t* rowptr = (uint16_t*)row; - int rowstride = kRowSize; + int rowstride = row_size; int lasty = yi; ScaleFilterCols(rowptr, src, dst_width, x, dx); diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 9c3acf7f..07840d65 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -155,8 +155,8 @@ static void ScaleARGBDown4Box(int src_width, int dy) { int j; // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 2 * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (dst_width * 2 * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); int row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) = @@ -187,9 +187,9 @@ static void ScaleARGBDown4Box(int src_width, for (j = 0; j < dst_height; ++j) { ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); - ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize, + ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + row_size, dst_width * 2); - ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width); + ScaleARGBRowDown2(row, row_size, dst_argb, dst_width); src_argb += row_stride; dst_argb += dst_stride; } @@ -548,11 +548,11 @@ static void ScaleARGBBilinearUp(int src_width, const uint8_t* src = src_argb + yi * (int64_t)src_stride; // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (dst_width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); uint8_t* rowptr = row; - int rowstride = kRowSize; + int rowstride = row_size; int lasty = yi; ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); @@ -798,14 +798,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width, const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v; // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (dst_width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); // Allocate 1 row of ARGB for source conversion. align_buffer_64(argb_row, src_width * 4); uint8_t* rowptr = row; - int rowstride = kRowSize; + int rowstride = row_size; int lasty = yi; // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. diff --git a/source/scale_uv.cc b/source/scale_uv.cc index 3b3d7b8e..8bd6b586 100644 --- a/source/scale_uv.cc +++ b/source/scale_uv.cc @@ -193,8 +193,8 @@ static void ScaleUVDown4Box(int src_width, int dy) { int j; // Allocate 2 rows of UV. - const int kRowSize = (dst_width * 2 * 2 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); + const int row_size = (dst_width * 2 * 2 + 15) & ~15; + align_buffer_64(row, row_size * 2); int row_stride = src_stride * (dy >> 16); void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width) = @@ -234,9 +234,9 @@ static void ScaleUVDown4Box(int src_width, for (j = 0; j < dst_height; ++j) { ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2); - ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + kRowSize, + ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + row_size, dst_width * 2); - ScaleUVRowDown2(row, kRowSize, dst_uv, dst_width); + ScaleUVRowDown2(row, row_size, dst_uv, dst_width); src_uv += row_stride; dst_uv += dst_stride; } @@ -574,11 +574,11 @@ static void ScaleUVBilinearUp(int src_width, const uint8_t* src = src_uv + yi * (int64_t)src_stride; // Allocate 2 rows of UV. - const int kRowSize = (dst_width * 2 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); + const int row_size = (dst_width * 2 + 15) & ~15; + align_buffer_64(row, row_size * 2); uint8_t* rowptr = row; - int rowstride = kRowSize; + int rowstride = row_size; int lasty = yi; ScaleUVFilterCols(rowptr, src, dst_width, x, dx); diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index ea4a99ac..bb06e4ed 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -680,6 +680,9 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) #define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ kFilterBilinear) +#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ + I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) #define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) @@ -816,6 +819,7 @@ TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1) #endif TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) #else TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) @@ -832,13 +836,13 @@ TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1) TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) -TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) @@ -1412,7 +1416,7 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2) EPP_B, STRIDE_B, HEIGHT_B) #else #define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ - EPP_B, STRIDE_B, HEIGHT_B, INPLACE) \ + EPP_B, STRIDE_B, HEIGHT_B) \ TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0) #endif diff --git a/util/yuvconvert.cc b/util/yuvconvert.cc index 332699e3..93b52668 100644 --- a/util/yuvconvert.cc +++ b/util/yuvconvert.cc @@ -42,9 +42,9 @@ static __inline uint32_t Abs(int32_t v) { } // Parse PYUV format. ie name.1920x800_24Hz_P420.yuv -bool ExtractResolutionFromFilename(const char* name, - int* width_ptr, - int* height_ptr) { +static bool ExtractResolutionFromFilename(const char* name, + int* width_ptr, + int* height_ptr) { // Isolate the .width_height. section of the filename by searching for a // dot or underscore followed by a digit. for (int i = 0; name[i]; ++i) { @@ -59,7 +59,7 @@ bool ExtractResolutionFromFilename(const char* name, return false; } -void PrintHelp(const char* program) { +static void PrintHelp(const char* program) { printf("%s [-options] src_argb.raw dst_yuv.raw\n", program); printf( " -s <width> <height> .... specify source resolution. " @@ -78,7 +78,7 @@ void PrintHelp(const char* program) { exit(0); } -void ParseOptions(int argc, const char* argv[]) { +static void ParseOptions(int argc, const char* argv[]) { if (argc <= 1) { PrintHelp(argv[0]); } |