diff options
author | Frank Barchard <fbarchard@google.com> | 2015-11-02 10:24:12 -0800 |
---|---|---|
committer | Frank Barchard <fbarchard@google.com> | 2015-11-02 10:24:12 -0800 |
commit | 2c7aa0070a2944fa311367f3b5ce4c8b069f1104 (patch) | |
tree | 6880e8e291794a8e9b67b921885d9d33f39ad7b7 | |
parent | 5d97b9336922eaee34c342a00c8e370933938703 (diff) | |
download | libyuv-2c7aa0070a2944fa311367f3b5ce4c8b069f1104.tar.gz |
remove I422ToBGRA and use I422ToRGBA internally
Removes low levels for I420ToBGRA and I420ToRAW and reimplements them as I420ToRGBA and I420ToRGB24 with transposed color matrix.
Adds unittests that do 1 step conversion vs 2 steps to test end swapping versions match direct conversions.
R=harryjin@google.com
BUG=libyuv:518
Review URL: https://codereview.chromium.org/1427993004 .
-rw-r--r-- | README.chromium | 2 | ||||
-rw-r--r-- | include/libyuv/row.h | 107 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/convert_from.cc | 216 | ||||
-rw-r--r-- | source/planar_functions.cc | 142 | ||||
-rw-r--r-- | source/row_any.cc | 10 | ||||
-rw-r--r-- | source/row_common.cc | 72 | ||||
-rw-r--r-- | source/row_gcc.cc | 172 | ||||
-rw-r--r-- | source/row_mips.cc | 64 | ||||
-rw-r--r-- | source/row_neon.cc | 61 | ||||
-rw-r--r-- | source/row_neon64.cc | 63 | ||||
-rw-r--r-- | source/row_win.cc | 149 | ||||
-rw-r--r-- | unit_test/convert_test.cc | 130 |
13 files changed, 222 insertions, 968 deletions
diff --git a/README.chromium b/README.chromium index a8c8ec6a..d6759ba7 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1527 +Version: 1528 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index f0b6f3ca..4c7fd663 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -103,7 +103,6 @@ extern "C" { #define HAS_BGRATOYROW_SSSE3 #define HAS_COPYROW_ERMS #define HAS_COPYROW_SSE2 -#define HAS_H422TOABGRROW_SSSE3 #define HAS_H422TOARGBROW_SSSE3 #define HAS_I400TOARGBROW_SSE2 #define HAS_I411TOARGBROW_SSSE3 @@ -115,8 +114,6 @@ extern "C" { #define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOARGB4444ROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 -#define HAS_I422TOBGRAROW_SSSE3 -#define HAS_I422TORAWROW_SSSE3 #define HAS_I422TORGB24ROW_SSSE3 #define HAS_I422TORGB565ROW_SSSE3 #define HAS_I422TORGBAROW_SSSE3 @@ -124,7 +121,6 @@ extern "C" { #define HAS_I422TOYUY2ROW_SSE2 #define HAS_I444TOARGBROW_SSSE3 #define HAS_J400TOARGBROW_SSE2 -#define HAS_J422TOABGRROW_SSSE3 #define HAS_J422TOARGBROW_SSSE3 #define HAS_MERGEUVROW_SSE2 #define HAS_MIRRORROW_SSSE3 @@ -198,7 +194,6 @@ extern "C" { #define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYROW_AVX2 #define HAS_COPYROW_AVX -#define HAS_H422TOABGRROW_AVX2 #define HAS_H422TOARGBROW_AVX2 #define HAS_I400TOARGBROW_AVX2 #if !(defined(_DEBUG) && defined(__i386__)) @@ -207,12 +202,9 @@ extern "C" { #define HAS_I422ALPHATOARGBROW_AVX2 #endif #define HAS_I422TOARGBROW_AVX2 -#define HAS_I422TOBGRAROW_AVX2 -#define HAS_I422TORAWROW_AVX2 #define HAS_I422TORGB24ROW_AVX2 #define HAS_I422TORGBAROW_AVX2 #define HAS_INTERPOLATEROW_AVX2 -#define HAS_J422TOABGRROW_AVX2 #define HAS_J422TOARGBROW_AVX2 #define HAS_MERGEUVROW_AVX2 #define HAS_MIRRORROW_AVX2 @@ -295,8 +287,6 @@ extern "C" { #define HAS_I422TOARGB1555ROW_NEON #define HAS_I422TOARGB4444ROW_NEON #define HAS_I422TOARGBROW_NEON -#define HAS_I422TOBGRAROW_NEON -#define HAS_I422TORAWROW_NEON #define HAS_I422TORGB24ROW_NEON #define HAS_I422TORGB565ROW_NEON #define HAS_I422TORGBAROW_NEON @@ -359,7 +349,6 @@ extern "C" { #define HAS_COPYROW_MIPS #if defined(__mips_dsp) && (__mips_dsp_rev >= 2) #define HAS_I422TOARGBROW_MIPS_DSPR2 -#define HAS_I422TOBGRAROW_MIPS_DSPR2 #define HAS_INTERPOLATEROW_MIPS_DSPR2 #define HAS_MIRRORROW_MIPS_DSPR2 #define HAS_MIRRORUVROW_MIPS_DSPR2 @@ -574,12 +563,6 @@ void I411ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToBGRARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -592,12 +575,6 @@ void I422ToRGB24Row_NEON(const uint8* src_y, uint8* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToRAWRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGB565Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1075,12 +1052,6 @@ void UYVYToARGBRow_C(const uint8* src_uyvy, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToBGRARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1093,12 +1064,6 @@ void I422ToRGB24Row_C(const uint8* src_y, uint8* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToRAWRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGB4444Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1129,12 +1094,6 @@ void I422ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToBGRARow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1249,12 +1208,6 @@ void UYVYToARGBRow_AVX2(const uint8* src_uyvy, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToBGRARow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1309,30 +1262,12 @@ void I422ToRGB24Row_AVX2(const uint8* src_y, uint8* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToRAWRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRAWRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToBGRARow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1429,12 +1364,6 @@ void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToBGRARow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1489,18 +1418,6 @@ void I422ToRGB24Row_Any_AVX2(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRAWRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRAWRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width); void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); @@ -1615,12 +1532,6 @@ void I411ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToBGRARow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1633,12 +1544,6 @@ void I422ToRGB24Row_Any_NEON(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRAWRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGB4444Row_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1686,24 +1591,12 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width); void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index c11017af..cd3bc860 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1527 +#define LIBYUV_VERSION 1528 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert_from.cc b/source/convert_from.cc index 436b19ca..0043b640 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -450,83 +450,13 @@ int I420ToNV21(const uint8* src_y, int src_stride_y, width, height); } -// Convert I420 to BGRA. -LIBYUV_API -int I420ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height) { - int y; - void (*I422ToBGRARow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I422ToBGRARow_C; - if (!src_y || !src_u || !src_v || !dst_bgra || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra; - dst_stride_bgra = -dst_stride_bgra; - } -#if defined(HAS_I422TOBGRAROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I422ToBGRARow = I422ToBGRARow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToBGRARow = I422ToBGRARow_SSSE3; - } - } -#endif -#if defined(HAS_I422TOBGRAROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - I422ToBGRARow = I422ToBGRARow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I422ToBGRARow = I422ToBGRARow_AVX2; - } - } -#endif -#if defined(HAS_I422TOBGRAROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I422ToBGRARow = I422ToBGRARow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToBGRARow = I422ToBGRARow_NEON; - } - } -#endif -#if defined(HAS_I422TOBGRAROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) { - I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2; - } -#endif - - for (y = 0; y < height; ++y) { - I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvIConstants, width); - dst_bgra += dst_stride_bgra; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to RGBA. -LIBYUV_API -int I420ToRGBA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height) { +// Convert I422 to RGBA with matrix +static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_rgba, int dst_stride_rgba, + const struct YuvConstants* yuvconstants, + int width, int height) { int y; void (*I422ToRGBARow)(const uint8* y_buf, const uint8* u_buf, @@ -568,9 +498,18 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TORGBAROW_MIPS_DSPR2) + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && + IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && + IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && + IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && + IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) { + I422ToRGBARow = I422ToRGBARow_MIPS_DSPR2; + } +#endif for (y = 0; y < height; ++y) { - I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvIConstants, width); + I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width); dst_rgba += dst_stride_rgba; src_y += src_stride_y; if (y & 1) { @@ -581,13 +520,43 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y, return 0; } -// Convert I420 to RGB24. +// Convert I420 to RGBA. LIBYUV_API -int I420ToRGB24(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height) { +int I420ToRGBA(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_rgba, int dst_stride_rgba, + int width, int height) { + return I420ToRGBAMatrix(src_y, src_stride_y, + src_u, src_stride_u, + src_v, src_stride_v, + dst_rgba, dst_stride_rgba, + &kYuvIConstants, + width, height); +} + +// Convert I420 to BGRA. +LIBYUV_API +int I420ToBGRA(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_bgra, int dst_stride_bgra, + int width, int height) { + return I420ToRGBAMatrix(src_y, src_stride_y, + src_v, src_stride_v, // Swap U and V + src_u, src_stride_u, + dst_bgra, dst_stride_bgra, + &kYvuIConstants, // Use Yvu matrix + width, height); +} + +// Convert I420 to RGB24 with matrix +static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_rgb24, int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, int height) { int y; void (*I422ToRGB24Row)(const uint8* y_buf, const uint8* u_buf, @@ -631,7 +600,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, &kYuvIConstants, width); + I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); dst_rgb24 += dst_stride_rgb24; src_y += src_stride_y; if (y & 1) { @@ -642,65 +611,34 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y, return 0; } -// Convert I420 to RAW. +// Convert I420 to RGB24. LIBYUV_API -int I420ToRAW(const uint8* src_y, int src_stride_y, +int I420ToRGB24(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, - uint8* dst_raw, int dst_stride_raw, + uint8* dst_rgb24, int dst_stride_rgb24, int width, int height) { - int y; - void (*I422ToRAWRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I422ToRAWRow_C; - if (!src_y || !src_u || !src_v || !dst_raw || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_raw = dst_raw + (height - 1) * dst_stride_raw; - dst_stride_raw = -dst_stride_raw; - } -#if defined(HAS_I422TORAWROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I422ToRAWRow = I422ToRAWRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToRAWRow = I422ToRAWRow_SSSE3; - } - } -#endif -#if defined(HAS_I422TORAWROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - I422ToRAWRow = I422ToRAWRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I422ToRAWRow = I422ToRAWRow_AVX2; - } - } -#endif -#if defined(HAS_I422TORAWROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I422ToRAWRow = I422ToRAWRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToRAWRow = I422ToRAWRow_NEON; - } - } -#endif + return I420ToRGB24Matrix(src_y, src_stride_y, + src_u, src_stride_u, + src_v, src_stride_v, + dst_rgb24, dst_stride_rgb24, + &kYuvIConstants, + width, height); +} - for (y = 0; y < height; ++y) { - I422ToRAWRow(src_y, src_u, src_v, dst_raw, &kYuvIConstants, width); - dst_raw += dst_stride_raw; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; +// Convert I420 to RAW. +LIBYUV_API +int I420ToRAW(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_raw, int dst_stride_raw, + int width, int height) { + return I420ToRGB24Matrix(src_y, src_stride_y, + src_v, src_stride_v, // Swap U and V + src_u, src_stride_u, + dst_raw, dst_stride_raw, + &kYvuIConstants, // Use Yvu matrix + width, height); } // Convert I420 to ARGB1555. diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 33b77faf..00b9287a 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -764,78 +764,67 @@ int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, } return 0; } - -// Convert I422 to BGRA. -LIBYUV_API -int I422ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height) { +// Convert I422 to RGBA with matrix +static int I422ToRGBAMatrix(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_rgba, int dst_stride_rgba, + const struct YuvConstants* yuvconstants, + int width, int height) { int y; - void (*I422ToBGRARow)(const uint8* y_buf, + void (*I422ToRGBARow)(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, const struct YuvConstants* yuvconstants, - int width) = I422ToBGRARow_C; - if (!src_y || !src_u || !src_v || - !dst_bgra || + int width) = I422ToRGBARow_C; + if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra; - dst_stride_bgra = -dst_stride_bgra; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 2 == width && - src_stride_v * 2 == width && - dst_stride_bgra == width * 4) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0; + dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; + dst_stride_rgba = -dst_stride_rgba; } -#if defined(HAS_I422TOBGRAROW_SSSE3) +#if defined(HAS_I422TORGBAROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - I422ToBGRARow = I422ToBGRARow_Any_SSSE3; + I422ToRGBARow = I422ToRGBARow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - I422ToBGRARow = I422ToBGRARow_SSSE3; + I422ToRGBARow = I422ToRGBARow_SSSE3; } } #endif -#if defined(HAS_I422TOBGRAROW_AVX2) +#if defined(HAS_I422TORGBAROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - I422ToBGRARow = I422ToBGRARow_Any_AVX2; + I422ToRGBARow = I422ToRGBARow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - I422ToBGRARow = I422ToBGRARow_AVX2; + I422ToRGBARow = I422ToRGBARow_AVX2; } } #endif -#if defined(HAS_I422TOBGRAROW_NEON) +#if defined(HAS_I422TORGBAROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - I422ToBGRARow = I422ToBGRARow_Any_NEON; + I422ToRGBARow = I422ToRGBARow_Any_NEON; if (IS_ALIGNED(width, 8)) { - I422ToBGRARow = I422ToBGRARow_NEON; + I422ToRGBARow = I422ToRGBARow_NEON; } } #endif -#if defined(HAS_I422TOBGRAROW_MIPS_DSPR2) +#if defined(HAS_I422TORGBAROW_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) { - I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2; + IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) { + I422ToRGBARow = I422ToRGBARow_MIPS_DSPR2; } #endif for (y = 0; y < height; ++y) { - I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvIConstants, width); - dst_bgra += dst_stride_bgra; + I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width); + dst_rgba += dst_stride_rgba; src_y += src_stride_y; src_u += src_stride_u; src_v += src_stride_v; @@ -850,66 +839,27 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y, const uint8* src_v, int src_stride_v, uint8* dst_rgba, int dst_stride_rgba, int width, int height) { - int y; - void (*I422ToRGBARow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I422ToRGBARow_C; - if (!src_y || !src_u || !src_v || - !dst_rgba || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; - dst_stride_rgba = -dst_stride_rgba; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 2 == width && - src_stride_v * 2 == width && - dst_stride_rgba == width * 4) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0; - } -#if defined(HAS_I422TORGBAROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToRGBARow = I422ToRGBARow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToRGBARow = I422ToRGBARow_NEON; - } - } -#endif -#if defined(HAS_I422TORGBAROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I422ToRGBARow = I422ToRGBARow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToRGBARow = I422ToRGBARow_SSSE3; - } - } -#endif -#if defined(HAS_I422TORGBAROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - I422ToRGBARow = I422ToRGBARow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I422ToRGBARow = I422ToRGBARow_AVX2; - } - } -#endif + return I422ToRGBAMatrix(src_y, src_stride_y, + src_u, src_stride_u, + src_v, src_stride_v, + dst_rgba, dst_stride_rgba, + &kYuvIConstants, + width, height); +} - for (y = 0; y < height; ++y) { - I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvIConstants, width); - dst_rgba += dst_stride_rgba; - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - } - return 0; +// Convert I422 to BGRA. +LIBYUV_API +int I422ToBGRA(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_bgra, int dst_stride_bgra, + int width, int height) { + return I422ToRGBAMatrix(src_y, src_stride_y, + src_v, src_stride_v, // Swap U and V + src_u, src_stride_u, + dst_bgra, dst_stride_bgra, + &kYvuIConstants, // Use Yvu matrix + width, height); } // Convert NV12 to RGB565. diff --git a/source/row_any.cc b/source/row_any.cc index 098e3b05..c6dc7fc1 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -115,26 +115,18 @@ ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) #ifdef HAS_I444TOARGBROW_SSSE3 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) -ANY31C(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7) ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) -ANY31C(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7) #endif // HAS_I444TOARGBROW_SSSE3 #ifdef HAS_I422TORGB24ROW_AVX2 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) #endif -#ifdef HAS_I422TORAWROW_AVX2 -ANY31C(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15) -#endif #ifdef HAS_I422TOARGBROW_AVX2 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) #endif -#ifdef HAS_I422TOBGRAROW_AVX2 -ANY31C(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15) -#endif #ifdef HAS_I422TORGBAROW_AVX2 ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) #endif @@ -157,10 +149,8 @@ ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) -ANY31C(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7) ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) -ANY31C(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7) ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) diff --git a/source/row_common.cc b/source/row_common.cc index 2f989bbc..0587e3ff 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1443,29 +1443,6 @@ void I422ToRGB24Row_C(const uint8* src_y, } } -void I422ToRAWRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 5, rgb_buf + 4, rgb_buf + 3, yuvconstants); - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 6; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); - } -} - void I422ToARGB4444Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1752,32 +1729,6 @@ void UYVYToARGBRow_C(const uint8* src_uyvy, } } -void I422ToBGRARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 3, rgb_buf + 2, rgb_buf + 1, yuvconstants); - rgb_buf[0] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 7, rgb_buf + 6, rgb_buf + 5, yuvconstants); - rgb_buf[4] = 255; - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 3, rgb_buf + 2, rgb_buf + 1, yuvconstants); - rgb_buf[0] = 255; - } -} - void I422ToRGBARow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -2629,29 +2580,6 @@ void I422ToRGB24Row_AVX2(const uint8* src_y, } #endif -#if defined(HAS_I422TORAWROW_AVX2) -void I422ToRAWRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - const struct YuvConstants* yuvconstants, - int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); - // TODO(fbarchard): ARGBToRAWRow_AVX2 - ARGBToRAWRow_SSSE3(row, dst_raw, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_raw += twidth * 3; - width -= twidth; - } -} -#endif - #if defined(HAS_NV12TORGB565ROW_AVX2) void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv, diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 6efae28f..fa485f1f 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1663,50 +1663,6 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, ); } -void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_raw, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n" - "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n" - "sub %[u_buf],%[v_buf] \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB(yuvconstants) - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm2,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "palignr $0xc,%%xmm0,%%xmm1 \n" - "movq %%xmm0," MEMACCESS([dst_raw]) " \n" - "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n" - "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n" - "subl $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_raw]"+r"(dst_raw), // %[dst_raw] -#if defined(__i386__) && defined(__pic__) - [width]"+m"(width) // %[width] -#else - [width]"+rm"(width) // %[width] -#endif - : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] - [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), - [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); -} - void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -1892,60 +1848,6 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, ); } -void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_bgra, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB(yuvconstants) - STOREBGRA - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] - [width]"+rm"(width) // %[width] - : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); -} - -void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB(yuvconstants) - STOREABGR - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] - [width]"+rm"(width) // %[width] - : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); -} - void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -2088,48 +1990,6 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \ "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n" -#if defined(HAS_I422TOBGRAROW_AVX2) -// 16 pixels -// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). -void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_bgra, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - READYUV422_AVX2 - YUVTORGB_AVX2(yuvconstants) - - // Step 3: Weave into BGRA - "vpunpcklbw %%ymm0,%%ymm1,%%ymm1 \n" // GB - "vpermq $0xd8,%%ymm1,%%ymm1 \n" - "vpunpcklbw %%ymm2,%%ymm5,%%ymm2 \n" // AR - "vpermq $0xd8,%%ymm2,%%ymm2 \n" - "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n" // ARGB first 8 pixels - "vpunpckhwd %%ymm1,%%ymm2,%%ymm2 \n" // ARGB next 8 pixels - "vmovdqu %%ymm0," MEMACCESS([dst_bgra]) "\n" - "vmovdqu %%ymm2," MEMACCESS2(0x20,[dst_bgra]) "\n" - "lea " MEMLEA(0x40,[dst_bgra]) ",%[dst_bgra] \n" - "sub $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] - [width]"+rm"(width) // %[width] - : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); -} -#endif // HAS_I422TOBGRAROW_AVX2 - #if defined(HAS_I422TOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). @@ -2199,38 +2059,6 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, } #endif // HAS_I422ALPHATOARGBROW_AVX2 -#if defined(HAS_I422TOABGRROW_AVX2) -// 16 pixels -// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). -void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - READYUV422_AVX2 - YUVTORGB_AVX2(yuvconstants) - STOREABGR_AVX2 - "sub $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] - [width]"+rm"(width) // %[width] - : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); -} -#endif // HAS_I422TOABGRROW_AVX2 - #if defined(HAS_I422TORGBAROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). diff --git a/source/row_mips.cc b/source/row_mips.cc index 182979f6..d12cf6ab 100644 --- a/source/row_mips.cc +++ b/source/row_mips.cc @@ -715,70 +715,6 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf, ); } -void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 | - "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25| - "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52| - "repl.ph $s3, 102 \n" // |VR|VR| = |102|102| - "repl.ph $s4, 16 \n" // |0|16|0|16| - "repl.ph $s5, 128 \n" // |128|128| - "lui $s6, 0xff \n" - "ori $s6, 0xff \n" // |00|ff|00|ff| - - "1: \n" - YUVTORGB - // Arranging into bgra format - "precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1| - "precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0| - "precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0| - "precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0| - - "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0| - "addiu %[width], -4 \n" - "addiu %[y_buf], 4 \n" - "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0| - "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0| - "sll $t1, $t1, 8 \n" // |R1|0 |R0|0 | - "sll $t2, $t2, 8 \n" // |r1|0 |r0|0 | - "or $t1, $t1, $s6 \n" // |R1|ff|R0|ff| - "or $t2, $t2, $s6 \n" // |r1|ff|r0|ff| - "precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff| - "precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff| - "sll $t1, $t1, 16 \n" - "sll $t2, $t2, 16 \n" - "packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff| - "packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff| -// Store results. - "sw $t2, 0(%[rgb_buf]) \n" - "sw $t0, 4(%[rgb_buf]) \n" - "sw $t1, 8(%[rgb_buf]) \n" - "sw $t3, 12(%[rgb_buf]) \n" - "bnez %[width], 1b \n" - " addiu %[rgb_buf], 16 \n" - "2: \n" - ".set pop \n" - :[y_buf] "+r" (y_buf), - [u_buf] "+r" (u_buf), - [v_buf] "+r" (v_buf), - [width] "+r" (width), - [rgb_buf] "+r" (rgb_buf) - : - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9", - "s0", "s1", "s2", "s3", - "s4", "s5", "s6" - ); -} - // Bilinear filter 8x2 -> 8x1 void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, diff --git a/source/row_neon.cc b/source/row_neon.cc index e2a6744d..b7a3be01 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -224,37 +224,6 @@ void I411ToARGBRow_NEON(const uint8* src_y, ); } -void I422ToBGRARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - YUVTORGB_SETUP - "1: \n" - READYUV422 - YUVTORGB - "subs %4, %4, #8 \n" - "vswp.u8 d20, d22 \n" - "vmov.u8 d19, #255 \n" - MEMACCESS(3) - "vst4.8 {d19, d20, d21, d22}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_bgra), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - void I422ToRGBARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -314,36 +283,6 @@ void I422ToRGB24Row_NEON(const uint8* src_y, ); } -void I422ToRAWRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - YUVTORGB_SETUP - "1: \n" - READYUV422 - YUVTORGB - "subs %4, %4, #8 \n" - "vswp.u8 d20, d22 \n" - MEMACCESS(3) - "vst3.8 {d20, d21, d22}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_raw), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - #define ARGBTORGB565 \ "vshr.u8 d20, d20, #3 \n" /* B */ \ "vshr.u8 d21, d21, #2 \n" /* G */ \ diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 08e6e2e1..66dea24c 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -233,38 +233,6 @@ void I411ToARGBRow_NEON(const uint8* src_y, } #endif // HAS_I411TOARGBROW_NEON -#ifdef HAS_I422TOBGRAROW_NEON -void I422ToBGRARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - YUVTORGB_SETUP - "1: \n" - READYUV422 - YUVTORGB(v21, v22, v23) - "subs %w4, %w4, #8 \n" - "movi v20.8b, #255 \n" /* A */ - MEMACCESS(3) - "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_bgra), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", - "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" - ); -} -#endif // HAS_I422TOBGRAROW_NEON - #ifdef HAS_I422TORGBAROW_NEON void I422ToRGBARow_NEON(const uint8* src_y, const uint8* src_u, @@ -328,37 +296,6 @@ void I422ToRGB24Row_NEON(const uint8* src_y, } #endif // HAS_I422TORGB24ROW_NEON -#ifdef HAS_I422TORAWROW_NEON -void I422ToRAWRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - YUVTORGB_SETUP - "1: \n" - READYUV422 - YUVTORGB(v20, v21, v22) - "subs %w4, %w4, #8 \n" - MEMACCESS(3) - "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_raw), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", - "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" - ); -} -#endif // HAS_I422TORAWROW_NEON - #define ARGBTORGB565 \ "shll v0.8h, v22.8b, #8 \n" /* R */ \ "shll v20.8h, v20.8b, #8 \n" /* B */ \ diff --git a/source/row_win.cc b/source/row_win.cc index fbd76681..f80c17a2 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -2354,48 +2354,6 @@ void UYVYToARGBRow_AVX2(const uint8* src_uyvy, } } - -#ifdef HAS_I422TOBGRAROW_AVX2 -// 16 pixels -// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). -// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3. -__declspec(naked) -void I422ToBGRARow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - __asm { - push esi - push edi - push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U - mov edi, [esp + 12 + 12] // V - mov edx, [esp + 12 + 16] // abgr - mov ebx, [esp + 12 + 20] // yuvconstants - mov ecx, [esp + 12 + 24] // width - sub edi, esi - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha - - convertloop: - READYUV422_AVX2 - YUVTORGB_AVX2(ebx) - STOREBGRA_AVX2 - - sub ecx, 16 - jg convertloop - - pop ebx - pop edi - pop esi - vzeroupper - ret - } -} -#endif // HAS_I422TOBGRAROW_AVX2 - #ifdef HAS_I422TORGBAROW_AVX2 // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). @@ -2749,44 +2707,6 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf, } } -// 8 pixels. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes). -__declspec(naked) -void I422ToRAWRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_raw, - const struct YuvConstants* yuvconstants, - int width) { - __asm { - push esi - push edi - push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U - mov edi, [esp + 12 + 12] // V - mov edx, [esp + 12 + 16] // argb - mov ebx, [esp + 12 + 20] // yuvconstants - mov ecx, [esp + 12 + 24] // width - sub edi, esi - movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0 - movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW - - convertloop: - READYUV422 - YUVTORGB(ebx) - STORERAW - - sub ecx, 8 - jg convertloop - - pop ebx - pop edi - pop esi - ret - } -} - // 8 pixels // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). __declspec(naked) @@ -3066,75 +2986,6 @@ void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, } __declspec(naked) -void I422ToBGRARow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_bgra, - const struct YuvConstants* yuvconstants, - int width) { - __asm { - push esi - push edi - push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U - mov edi, [esp + 12 + 12] // V - mov edx, [esp + 12 + 16] // argb - mov ebx, [esp + 12 + 20] // yuvconstants - mov ecx, [esp + 12 + 24] // width - sub edi, esi - - convertloop: - READYUV422 - YUVTORGB(ebx) - STOREBGRA - - sub ecx, 8 - jg convertloop - - pop ebx - pop edi - pop esi - ret - } -} - -__declspec(naked) -void I422ToABGRRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - const struct YuvConstants* yuvconstants, - int width) { - __asm { - push esi - push edi - push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U - mov edi, [esp + 12 + 12] // V - mov edx, [esp + 12 + 16] // argb - mov ebx, [esp + 12 + 20] // yuvconstants - mov ecx, [esp + 12 + 24] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - - convertloop: - READYUV422 - YUVTORGB(ebx) - STOREABGR - - sub ecx, 8 - jg convertloop - - pop ebx - pop edi - pop esi - ret - } -} - -__declspec(naked) void I422ToRGBARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 8ab007be..8ca05a19 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -1691,39 +1691,103 @@ TEST_F(LibYUVConvertTest, NAME) { \ TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12) TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12) -TEST_F(LibYUVConvertTest, TestI444ToABGRvsARGB) { - align_buffer_64(src_y, benchmark_width_ * benchmark_height_); - align_buffer_64(src_u, benchmark_width_ * benchmark_height_); - align_buffer_64(src_v, benchmark_width_ * benchmark_height_); - align_buffer_64(dst_argb, benchmark_width_ * benchmark_height_ * 4); - align_buffer_64(dst_abgr, benchmark_width_ * benchmark_height_ * 4); - MemRandomize(src_y, benchmark_width_ * benchmark_height_); - MemRandomize(src_u, benchmark_width_ * benchmark_height_); - MemRandomize(src_v, benchmark_width_ * benchmark_height_); - MemRandomize(dst_argb, benchmark_width_ * benchmark_height_ * 4); - MemRandomize(dst_abgr, benchmark_width_ * benchmark_height_ * 4); - libyuv::I444ToARGB(src_y, benchmark_width_, - src_u, benchmark_width_, - src_v, benchmark_width_, - dst_argb, benchmark_width_ * 4, - benchmark_width_, benchmark_height_); - libyuv::I444ToABGR(src_y, benchmark_width_, - src_u, benchmark_width_, - src_v, benchmark_width_, - dst_abgr, benchmark_width_ * 4, - benchmark_width_, benchmark_height_); - // swap in place. - libyuv::ABGRToARGB(dst_abgr, benchmark_width_ * 4, - dst_abgr, benchmark_width_ * 4, - benchmark_width_, benchmark_height_); - for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) { - EXPECT_EQ(dst_abgr[i], dst_argb[i]); - } - free_aligned_buffer_64(src_y); - free_aligned_buffer_64(src_u); - free_aligned_buffer_64(src_v); - free_aligned_buffer_64(dst_argb); - free_aligned_buffer_64(dst_abgr); +#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + W1280, N, NEG, OFF, FMT_C, BPP_C) \ +TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = kWidth * BPP_B; \ + const int kSizeUV = \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_64(src_y, kWidth * kHeight + OFF); \ + align_buffer_64(src_u, kSizeUV + OFF); \ + align_buffer_64(src_v, kSizeUV + OFF); \ + align_buffer_64(dst_argb_b, kStrideB * kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_argb_b + OFF, kStrideB, \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_64(dst_argb_c, kStrideC * kHeight + OFF); \ + align_buffer_64(dst_argb_bc, kStrideC * kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, \ + src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_argb_c + OFF, kStrideC, \ + kWidth, NEG kHeight); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, \ + dst_argb_bc + OFF, kStrideC, \ + kWidth, kHeight); \ + for (int i = 0; i < kStrideC * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ + } \ + free_aligned_buffer_64(src_y); \ + free_aligned_buffer_64(src_u); \ + free_aligned_buffer_64(src_v); \ + free_aligned_buffer_64(dst_argb_b); \ + free_aligned_buffer_64(dst_argb_c); \ + free_aligned_buffer_64(dst_argb_bc); \ } +#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, _Unaligned, +, 1, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) + +TESTPLANARTOE(I420, 2, 2, ARGB, 4, ABGR, 4) +TESTPLANARTOE(J420, 2, 2, ARGB, 4, ARGB, 4) +TESTPLANARTOE(J420, 2, 2, ABGR, 4, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, ARGB, 4, ARGB, 4) +TESTPLANARTOE(H420, 2, 2, ABGR, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, BGRA, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, ABGR, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RGBA, 4, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, RGB24, 3, ARGB, 4) +// TESTPLANARTOE(I420, 2, 2, RGB24, 3, RAW, 3) +TESTPLANARTOE(I420, 2, 2, ARGB, 4, RAW, 3) +TESTPLANARTOE(I420, 2, 2, RAW, 3, ARGB, 4) +TESTPLANARTOE(I420, 2, 2, ARGB, 4, RGB565, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 4, ARGB1555, 2) +TESTPLANARTOE(I420, 2, 2, ARGB, 4, ARGB4444, 2) +TESTPLANARTOE(I422, 2, 1, ARGB, 4, ARGB, 4) +TESTPLANARTOE(J422, 2, 1, ARGB, 4, ARGB, 4) +TESTPLANARTOE(J422, 2, 1, ABGR, 4, ARGB, 4) +TESTPLANARTOE(H422, 2, 1, ARGB, 4, ARGB, 4) +TESTPLANARTOE(H422, 2, 1, ABGR, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, BGRA, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, ABGR, 4, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, RGBA, 4, ARGB, 4) +TESTPLANARTOE(I411, 4, 1, ARGB, 4, ARGB, 4) +TESTPLANARTOE(I444, 1, 1, ARGB, 4, ARGB, 4) +TESTPLANARTOE(J444, 1, 1, ARGB, 4, ARGB, 4) +TESTPLANARTOE(I444, 1, 1, ABGR, 4, ARGB, 4) +// TESTPLANARTOE(I420, 2, 2, YUY2, 2, ARGB, 4) +// TESTPLANARTOE(I420, 2, 2, UYVY, 2, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, YUY2, 2, ARGB, 4) +TESTPLANARTOE(I422, 2, 1, UYVY, 2, ARGB, 4) +// TESTPLANARTOE(I420, 2, 2, ARGB, 4, I400, 1) +// TESTPLANARTOE(J420, 2, 2, ARGB, 4, J400, 1) + } // namespace libyuv |