diff options
Diffstat (limited to 'files/source/planar_functions.cc')
-rw-r--r-- | files/source/planar_functions.cc | 2375 |
1 files changed, 1895 insertions, 480 deletions
diff --git a/files/source/planar_functions.cc b/files/source/planar_functions.cc index 9cab230f..169d4a8f 100644 --- a/files/source/planar_functions.cc +++ b/files/source/planar_functions.cc @@ -10,6 +10,7 @@ #include "libyuv/planar_functions.h" +#include <assert.h> #include <string.h> // for memset() #include "libyuv/cpu_id.h" @@ -34,6 +35,9 @@ void CopyPlane(const uint8_t* src_y, int height) { int y; void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -80,8 +84,6 @@ void CopyPlane(const uint8_t* src_y, } } -// TODO(fbarchard): Consider support for negative height. -// TODO(fbarchard): Consider stride measured in bytes. LIBYUV_API void CopyPlane_16(const uint16_t* src_y, int src_stride_y, @@ -89,36 +91,8 @@ void CopyPlane_16(const uint16_t* src_y, int dst_stride_y, int width, int height) { - int y; - void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C; - // Coalesce rows. - if (src_stride_y == width && dst_stride_y == width) { - width *= height; - height = 1; - src_stride_y = dst_stride_y = 0; - } -#if defined(HAS_COPYROW_16_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) { - CopyRow = CopyRow_16_SSE2; - } -#endif -#if defined(HAS_COPYROW_16_ERMS) - if (TestCpuFlag(kCpuHasERMS)) { - CopyRow = CopyRow_16_ERMS; - } -#endif -#if defined(HAS_COPYROW_16_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { - CopyRow = CopyRow_16_NEON; - } -#endif - - // Copy plane - for (y = 0; y < height; ++y) { - CopyRow(src_y, dst_y, width); - src_y += src_stride_y; - dst_y += dst_stride_y; - } + CopyPlane((const uint8_t*)src_y, src_stride_y * 2, (uint8_t*)dst_y, + dst_stride_y * 2, width * 2, height); } // Convert a plane of 16 bit data to 8 bit @@ -134,6 +108,9 @@ void Convert16To8Plane(const uint16_t* src_y, void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale, int width) = Convert16To8Row_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -146,6 +123,14 @@ void Convert16To8Plane(const uint16_t* src_y, height = 1; src_stride_y = dst_stride_y = 0; } +#if defined(HAS_CONVERT16TO8ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Convert16To8Row = Convert16To8Row_Any_NEON; + if (IS_ALIGNED(width, 16)) { + Convert16To8Row = Convert16To8Row_NEON; + } + } +#endif #if defined(HAS_CONVERT16TO8ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { Convert16To8Row = Convert16To8Row_Any_SSSE3; @@ -184,6 +169,9 @@ void Convert8To16Plane(const uint8_t* src_y, void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale, int width) = Convert8To16Row_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -238,9 +226,12 @@ int I422Copy(const uint8_t* src_y, int width, int height) { int halfwidth = (width + 1) >> 1; - if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; @@ -276,7 +267,8 @@ int I444Copy(const uint8_t* src_y, int dst_stride_v, int width, int height) { - if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { return -1; } // Negative height means invert the image. @@ -298,6 +290,49 @@ int I444Copy(const uint8_t* src_y, return 0; } +// Copy I210. +LIBYUV_API +int I210Copy(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { + return -1; + } + + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + if (dst_y) { + CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + // Copy UV planes. + CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); + CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); + return 0; +} + // Copy I400. LIBYUV_API int I400ToI400(const uint8_t* src_y, @@ -349,6 +384,54 @@ int I420ToI400(const uint8_t* src_y, return 0; } +// Copy NV12. Supports inverting. +int NV12Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + + if (!src_y || !dst_y || !src_uv || !dst_uv || width <= 0 || height == 0) { + return -1; + } + + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_uv = src_uv + (halfheight - 1) * src_stride_uv; + src_stride_y = -src_stride_y; + src_stride_uv = -src_stride_uv; + } + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, halfwidth * 2, + halfheight); + return 0; +} + +// Copy NV21. Supports inverting. +int NV21Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + return NV12Copy(src_y, src_stride_y, src_vu, src_stride_vu, dst_y, + dst_stride_y, dst_vu, dst_stride_vu, width, height); +} + // Support function for NV12 etc UV channels. // Width and height are plane sizes (typically half pixel width). LIBYUV_API @@ -363,6 +446,9 @@ void SplitUVPlane(const uint8_t* src_uv, int y; void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -410,11 +496,11 @@ void SplitUVPlane(const uint8_t* src_uv, } } #endif -#if defined(HAS_SPLITUVROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - SplitUVRow = SplitUVRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - SplitUVRow = SplitUVRow_MMI; +#if defined(HAS_SPLITUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SplitUVRow = SplitUVRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + SplitUVRow = SplitUVRow_LSX; } } #endif @@ -440,6 +526,9 @@ void MergeUVPlane(const uint8_t* src_u, int y; void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -485,11 +574,11 @@ void MergeUVPlane(const uint8_t* src_u, } } #endif -#if defined(HAS_MERGEUVROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - MergeUVRow = MergeUVRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - MergeUVRow = MergeUVRow_MMI; +#if defined(HAS_MERGEUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MergeUVRow = MergeUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + MergeUVRow = MergeUVRow_LSX; } } #endif @@ -503,6 +592,289 @@ void MergeUVPlane(const uint8_t* src_u, } } +// Support function for P010 etc UV channels. +// Width and height are plane sizes (typically half pixel width). +LIBYUV_API +void SplitUVPlane_16(const uint16_t* src_uv, + int src_stride_uv, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + int depth) { + int y; + void (*SplitUVRow_16)(const uint16_t* src_uv, uint16_t* dst_u, + uint16_t* dst_v, int depth, int width) = + SplitUVRow_16_C; + if (width <= 0 || height == 0) { + return; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_u = dst_u + (height - 1) * dst_stride_u; + dst_v = dst_v + (height - 1) * dst_stride_v; + dst_stride_u = -dst_stride_u; + dst_stride_v = -dst_stride_v; + } + // Coalesce rows. + if (src_stride_uv == width * 2 && dst_stride_u == width && + dst_stride_v == width) { + width *= height; + height = 1; + src_stride_uv = dst_stride_u = dst_stride_v = 0; + } +#if defined(HAS_SPLITUVROW_16_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + SplitUVRow_16 = SplitUVRow_16_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + SplitUVRow_16 = SplitUVRow_16_AVX2; + } + } +#endif +#if defined(HAS_SPLITUVROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + SplitUVRow_16 = SplitUVRow_16_Any_NEON; + if (IS_ALIGNED(width, 8)) { + SplitUVRow_16 = SplitUVRow_16_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + // Copy a row of UV. + SplitUVRow_16(src_uv, dst_u, dst_v, depth, width); + dst_u += dst_stride_u; + dst_v += dst_stride_v; + src_uv += src_stride_uv; + } +} + +LIBYUV_API +void MergeUVPlane_16(const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height, + int depth) { + int y; + void (*MergeUVRow_16)(const uint16_t* src_u, const uint16_t* src_v, + uint16_t* dst_uv, int depth, int width) = + MergeUVRow_16_C; + assert(depth >= 8); + assert(depth <= 16); + if (width <= 0 || height == 0) { + return; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_uv = dst_uv + (height - 1) * dst_stride_uv; + dst_stride_uv = -dst_stride_uv; + } + // Coalesce rows. + if (src_stride_u == width && src_stride_v == width && + dst_stride_uv == width * 2) { + width *= height; + height = 1; + src_stride_u = src_stride_v = dst_stride_uv = 0; + } +#if defined(HAS_MERGEUVROW_16_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeUVRow_16 = MergeUVRow_16_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + MergeUVRow_16 = MergeUVRow_16_AVX2; + } + } +#endif +#if defined(HAS_MERGEUVROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeUVRow_16 = MergeUVRow_16_Any_NEON; + if (IS_ALIGNED(width, 8)) { + MergeUVRow_16 = MergeUVRow_16_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + // Merge a row of U and V into a row of UV. + MergeUVRow_16(src_u, src_v, dst_uv, depth, width); + src_u += src_stride_u; + src_v += src_stride_v; + dst_uv += dst_stride_uv; + } +} + +// Convert plane from lsb to msb +LIBYUV_API +void ConvertToMSBPlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height, + int depth) { + int y; + int scale = 1 << (16 - depth); + void (*MultiplyRow_16)(const uint16_t* src_y, uint16_t* dst_y, int scale, + int width) = MultiplyRow_16_C; + if (width <= 0 || height == 0) { + return; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + // Coalesce rows. + if (src_stride_y == width && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_y = dst_stride_y = 0; + } + +#if defined(HAS_MULTIPLYROW_16_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MultiplyRow_16 = MultiplyRow_16_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + MultiplyRow_16 = MultiplyRow_16_AVX2; + } + } +#endif +#if defined(HAS_MULTIPLYROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MultiplyRow_16 = MultiplyRow_16_Any_NEON; + if (IS_ALIGNED(width, 16)) { + MultiplyRow_16 = MultiplyRow_16_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + MultiplyRow_16(src_y, dst_y, scale, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } +} + +// Convert plane from msb to lsb +LIBYUV_API +void ConvertToLSBPlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height, + int depth) { + int y; + int scale = 1 << depth; + void (*DivideRow)(const uint16_t* src_y, uint16_t* dst_y, int scale, + int width) = DivideRow_16_C; + if (width <= 0 || height == 0) { + return; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + // Coalesce rows. + if (src_stride_y == width && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_y = dst_stride_y = 0; + } + +#if defined(HAS_DIVIDEROW_16_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + DivideRow = DivideRow_16_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + DivideRow = DivideRow_16_AVX2; + } + } +#endif +#if defined(HAS_DIVIDEROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + DivideRow = DivideRow_16_Any_NEON; + if (IS_ALIGNED(width, 16)) { + DivideRow = DivideRow_16_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + DivideRow(src_y, dst_y, scale, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } +} + +// Swap U and V channels in interleaved UV plane. +LIBYUV_API +void SwapUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + int y; + void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = + SwapUVRow_C; + if (width <= 0 || height == 0) { + return; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_uv = src_uv + (height - 1) * src_stride_uv; + src_stride_uv = -src_stride_uv; + } + // Coalesce rows. + if (src_stride_uv == width * 2 && dst_stride_vu == width * 2) { + width *= height; + height = 1; + src_stride_uv = dst_stride_vu = 0; + } + +#if defined(HAS_SWAPUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + SwapUVRow = SwapUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + SwapUVRow = SwapUVRow_SSSE3; + } + } +#endif +#if defined(HAS_SWAPUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + SwapUVRow = SwapUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + SwapUVRow = SwapUVRow_AVX2; + } + } +#endif +#if defined(HAS_SWAPUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + SwapUVRow = SwapUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + SwapUVRow = SwapUVRow_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + SwapUVRow(src_uv, dst_vu, width); + src_uv += src_stride_uv; + dst_vu += dst_stride_vu; + } +} + // Convert NV21 to NV12. LIBYUV_API int NV21ToNV12(const uint8_t* src_y, @@ -515,49 +887,150 @@ int NV21ToNV12(const uint8_t* src_y, int dst_stride_uv, int width, int height) { - int y; - void (*UVToVURow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = - UVToVURow_C; - int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; + if (!src_vu || !dst_uv || width <= 0 || height == 0) { return -1; } + + if (dst_y) { + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (height - 1) * src_stride_y; src_vu = src_vu + (halfheight - 1) * src_stride_vu; - src_stride_y = -src_stride_y; src_stride_vu = -src_stride_vu; } - // Coalesce rows. - if (src_stride_vu == halfwidth * 2 && dst_stride_uv == halfwidth * 2) { - halfwidth *= halfheight; - halfheight = 1; - src_stride_vu = dst_stride_uv = 0; + + SwapUVPlane(src_vu, src_stride_vu, dst_uv, dst_stride_uv, halfwidth, + halfheight); + return 0; +} + +// Detile a plane of data +// tile width is 16 and assumed. +// tile_height is 16 or 32 for MM21. +// src_stride_y is bytes per row of source ignoring tiling. e.g. 640 +// TODO: More detile row functions. + +LIBYUV_API +void DetilePlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height) { + const ptrdiff_t src_tile_stride = 16 * tile_height; + int y; + void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, + int width) = DetileRow_C; + assert(src_stride_y >= 0); + assert(tile_height > 0); + assert(src_stride_y > 0); + + if (width <= 0 || height == 0) { + return; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; } -#if defined(HAS_UVToVUROW_NEON) +#if defined(HAS_DETILEROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + DetileRow = DetileRow_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + DetileRow = DetileRow_SSE2; + } + } +#endif +#if defined(HAS_DETILEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - UVToVURow = UVToVURow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - UVToVURow = UVToVURow_NEON; + DetileRow = DetileRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + DetileRow = DetileRow_NEON; } } #endif - if (dst_y) { - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + + // Detile plane + for (y = 0; y < height; ++y) { + DetileRow(src_y, src_tile_stride, dst_y, width); + dst_y += dst_stride_y; + src_y += 16; + // Advance to next row of tiles. + if ((y & (tile_height - 1)) == (tile_height - 1)) { + src_y = src_y - src_tile_stride + src_stride_y * tile_height; + } } +} - for (y = 0; y < halfheight; ++y) { - UVToVURow(src_vu, dst_uv, halfwidth); - src_vu += src_stride_vu; - dst_uv += dst_stride_uv; +LIBYUV_API +void DetileSplitUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + int tile_height) { + const ptrdiff_t src_tile_stride = 16 * tile_height; + int y; + void (*DetileSplitUVRow)(const uint8_t* src, ptrdiff_t src_tile_stride, + uint8_t* dst_u, uint8_t* dst_v, int width) = + DetileSplitUVRow_C; + assert(src_stride_uv >= 0); + assert(tile_height > 0); + assert(src_stride_uv > 0); + + if (width <= 0 || height == 0) { + return; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_u = dst_u + (height - 1) * dst_stride_u; + dst_stride_u = -dst_stride_u; + dst_v = dst_v + (height - 1) * dst_stride_v; + dst_stride_v = -dst_stride_v; + } + +#if defined(HAS_DETILESPLITUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + DetileSplitUVRow = DetileSplitUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + DetileSplitUVRow = DetileSplitUVRow_SSSE3; + } + } +#endif +#if defined(HAS_DETILESPLITUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + DetileSplitUVRow = DetileSplitUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + DetileSplitUVRow = DetileSplitUVRow_NEON; + } + } +#endif + + // Detile plane + for (y = 0; y < height; ++y) { + DetileSplitUVRow(src_uv, src_tile_stride, dst_u, dst_v, width); + dst_u += dst_stride_u; + dst_v += dst_stride_v; + src_uv += 16; + // Advance to next row of tiles. + if ((y & (tile_height - 1)) == (tile_height - 1)) { + src_uv = src_uv - src_tile_stride + src_stride_uv * tile_height; + } } - return 0; } // Support function for NV12 etc RGB channels. @@ -576,6 +1049,9 @@ void SplitRGBPlane(const uint8_t* src_rgb, int y; void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) = SplitRGBRow_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -609,14 +1085,6 @@ void SplitRGBPlane(const uint8_t* src_rgb, } } #endif -#if defined(HAS_SPLITRGBROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - SplitRGBRow = SplitRGBRow_Any_MMI; - if (IS_ALIGNED(width, 4)) { - SplitRGBRow = SplitRGBRow_MMI; - } - } -#endif for (y = 0; y < height; ++y) { // Copy a row of RGB. @@ -643,6 +1111,9 @@ void MergeRGBPlane(const uint8_t* src_r, void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width) = MergeRGBRow_C; + if (width <= 0 || height == 0) { + return; + } // Coalesce rows. // Negative height means invert the image. if (height < 0) { @@ -673,86 +1144,673 @@ void MergeRGBPlane(const uint8_t* src_r, } } #endif -#if defined(HAS_MERGERGBROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - MergeRGBRow = MergeRGBRow_Any_MMI; + + for (y = 0; y < height; ++y) { + // Merge a row of U and V into a row of RGB. + MergeRGBRow(src_r, src_g, src_b, dst_rgb, width); + src_r += src_stride_r; + src_g += src_stride_g; + src_b += src_stride_b; + dst_rgb += dst_stride_rgb; + } +} + +LIBYUV_NOINLINE +void SplitARGBPlaneAlpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height) { + int y; + void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, + uint8_t* dst_b, uint8_t* dst_a, int width) = + SplitARGBRow_C; + + assert(height > 0); + + if (src_stride_argb == width * 4 && dst_stride_r == width && + dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) { + width *= height; + height = 1; + src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b = + dst_stride_a = 0; + } + +#if defined(HAS_SPLITARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + SplitARGBRow = SplitARGBRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { - MergeRGBRow = MergeRGBRow_MMI; + SplitARGBRow = SplitARGBRow_SSE2; + } + } +#endif +#if defined(HAS_SPLITARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + SplitARGBRow = SplitARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + SplitARGBRow = SplitARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_SPLITARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + SplitARGBRow = SplitARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + SplitARGBRow = SplitARGBRow_AVX2; + } + } +#endif +#if defined(HAS_SPLITARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + SplitARGBRow = SplitARGBRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + SplitARGBRow = SplitARGBRow_NEON; } } #endif for (y = 0; y < height; ++y) { - // Merge a row of U and V into a row of RGB. - MergeRGBRow(src_r, src_g, src_b, dst_rgb, width); + SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width); + dst_r += dst_stride_r; + dst_g += dst_stride_g; + dst_b += dst_stride_b; + dst_a += dst_stride_a; + src_argb += src_stride_argb; + } +} + +LIBYUV_NOINLINE +void SplitARGBPlaneOpaque(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height) { + int y; + void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, + uint8_t* dst_b, int width) = SplitXRGBRow_C; + assert(height > 0); + + if (src_stride_argb == width * 4 && dst_stride_r == width && + dst_stride_g == width && dst_stride_b == width) { + width *= height; + height = 1; + src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b = 0; + } + +#if defined(HAS_SPLITXRGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + SplitXRGBRow = SplitXRGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + SplitXRGBRow = SplitXRGBRow_SSE2; + } + } +#endif +#if defined(HAS_SPLITXRGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + SplitXRGBRow = SplitXRGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + SplitXRGBRow = SplitXRGBRow_SSSE3; + } + } +#endif +#if defined(HAS_SPLITXRGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + SplitXRGBRow = SplitXRGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + SplitXRGBRow = SplitXRGBRow_AVX2; + } + } +#endif +#if defined(HAS_SPLITXRGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + SplitXRGBRow = SplitXRGBRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + SplitXRGBRow = SplitXRGBRow_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width); + dst_r += dst_stride_r; + dst_g += dst_stride_g; + dst_b += dst_stride_b; + src_argb += src_stride_argb; + } +} + +LIBYUV_API +void SplitARGBPlane(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_r = dst_r + (height - 1) * dst_stride_r; + dst_g = dst_g + (height - 1) * dst_stride_g; + dst_b = dst_b + (height - 1) * dst_stride_b; + dst_a = dst_a + (height - 1) * dst_stride_a; + dst_stride_r = -dst_stride_r; + dst_stride_g = -dst_stride_g; + dst_stride_b = -dst_stride_b; + dst_stride_a = -dst_stride_a; + } + + if (dst_a == NULL) { + SplitARGBPlaneOpaque(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g, + dst_stride_g, dst_b, dst_stride_b, width, height); + } else { + SplitARGBPlaneAlpha(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g, + dst_stride_g, dst_b, dst_stride_b, dst_a, dst_stride_a, + width, height); + } +} + +LIBYUV_NOINLINE +void MergeARGBPlaneAlpha(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + int y; + void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g, + const uint8_t* src_b, const uint8_t* src_a, + uint8_t* dst_argb, int width) = MergeARGBRow_C; + + assert(height > 0); + + if (src_stride_r == width && src_stride_g == width && src_stride_b == width && + src_stride_a == width && dst_stride_argb == width * 4) { + width *= height; + height = 1; + src_stride_r = src_stride_g = src_stride_b = src_stride_a = + dst_stride_argb = 0; + } +#if defined(HAS_MERGEARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + MergeARGBRow = MergeARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + MergeARGBRow = MergeARGBRow_SSE2; + } + } +#endif +#if defined(HAS_MERGEARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeARGBRow = MergeARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + MergeARGBRow = MergeARGBRow_AVX2; + } + } +#endif +#if defined(HAS_MERGEARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeARGBRow = MergeARGBRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + MergeARGBRow = MergeARGBRow_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width); src_r += src_stride_r; src_g += src_stride_g; src_b += src_stride_b; - dst_rgb += dst_stride_rgb; + src_a += src_stride_a; + dst_argb += dst_stride_argb; } } -// Mirror a plane of data. -void MirrorPlane(const uint8_t* src_y, - int src_stride_y, - uint8_t* dst_y, - int dst_stride_y, - int width, - int height) { +LIBYUV_NOINLINE +void MergeARGBPlaneOpaque(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C; + void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g, + const uint8_t* src_b, uint8_t* dst_argb, int width) = + MergeXRGBRow_C; + + assert(height > 0); + + if (src_stride_r == width && src_stride_g == width && src_stride_b == width && + dst_stride_argb == width * 4) { + width *= height; + height = 1; + src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0; + } +#if defined(HAS_MERGEXRGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + MergeXRGBRow = MergeXRGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + MergeXRGBRow = MergeXRGBRow_SSE2; + } + } +#endif +#if defined(HAS_MERGEXRGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeXRGBRow = MergeXRGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + MergeXRGBRow = MergeXRGBRow_AVX2; + } + } +#endif +#if defined(HAS_MERGEXRGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeXRGBRow = MergeXRGBRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + MergeXRGBRow = MergeXRGBRow_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + MergeXRGBRow(src_r, src_g, src_b, dst_argb, width); + src_r += src_stride_r; + src_g += src_stride_g; + src_b += src_stride_b; + dst_argb += dst_stride_argb; + } +} + +LIBYUV_API +void MergeARGBPlane(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_stride_y = -src_stride_y; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; } -#if defined(HAS_MIRRORROW_NEON) + + if (src_a == NULL) { + MergeARGBPlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b, + src_stride_b, dst_argb, dst_stride_argb, width, + height); + } else { + MergeARGBPlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b, + src_stride_b, src_a, src_stride_a, dst_argb, + dst_stride_argb, width, height); + } +} + +// TODO(yuan): Support 2 bit alpha channel. +LIBYUV_API +void MergeXR30Plane(const uint16_t* src_r, + int src_stride_r, + const uint16_t* src_g, + int src_stride_g, + const uint16_t* src_b, + int src_stride_b, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height, + int depth) { + int y; + void (*MergeXR30Row)(const uint16_t* src_r, const uint16_t* src_g, + const uint16_t* src_b, uint8_t* dst_ar30, int depth, + int width) = MergeXR30Row_C; + + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; + dst_stride_ar30 = -dst_stride_ar30; + } + // Coalesce rows. + if (src_stride_r == width && src_stride_g == width && src_stride_b == width && + dst_stride_ar30 == width * 4) { + width *= height; + height = 1; + src_stride_r = src_stride_g = src_stride_b = dst_stride_ar30 = 0; + } +#if defined(HAS_MERGEXR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeXR30Row = MergeXR30Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + MergeXR30Row = MergeXR30Row_AVX2; + } + } +#endif +#if defined(HAS_MERGEXR30ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - MirrorRow = MirrorRow_Any_NEON; + if (depth == 10) { + MergeXR30Row = MergeXR30Row_10_Any_NEON; + if (IS_ALIGNED(width, 8)) { + MergeXR30Row = MergeXR30Row_10_NEON; + } + } else { + MergeXR30Row = MergeXR30Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + MergeXR30Row = MergeXR30Row_NEON; + } + } + } +#endif + + for (y = 0; y < height; ++y) { + MergeXR30Row(src_r, src_g, src_b, dst_ar30, depth, width); + src_r += src_stride_r; + src_g += src_stride_g; + src_b += src_stride_b; + dst_ar30 += dst_stride_ar30; + } +} + +LIBYUV_NOINLINE +static void MergeAR64PlaneAlpha(const uint16_t* src_r, + int src_stride_r, + const uint16_t* src_g, + int src_stride_g, + const uint16_t* src_b, + int src_stride_b, + const uint16_t* src_a, + int src_stride_a, + uint16_t* dst_ar64, + int dst_stride_ar64, + int width, + int height, + int depth) { + int y; + void (*MergeAR64Row)(const uint16_t* src_r, const uint16_t* src_g, + const uint16_t* src_b, const uint16_t* src_a, + uint16_t* dst_argb, int depth, int width) = + MergeAR64Row_C; + + if (src_stride_r == width && src_stride_g == width && src_stride_b == width && + src_stride_a == width && dst_stride_ar64 == width * 4) { + width *= height; + height = 1; + src_stride_r = src_stride_g = src_stride_b = src_stride_a = + dst_stride_ar64 = 0; + } +#if defined(HAS_MERGEAR64ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeAR64Row = MergeAR64Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { - MirrorRow = MirrorRow_NEON; + MergeAR64Row = MergeAR64Row_AVX2; } } #endif -#if defined(HAS_MIRRORROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - MirrorRow = MirrorRow_Any_SSSE3; +#if defined(HAS_MERGEAR64ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeAR64Row = MergeAR64Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + MergeAR64Row = MergeAR64Row_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + MergeAR64Row(src_r, src_g, src_b, src_a, dst_ar64, depth, width); + src_r += src_stride_r; + src_g += src_stride_g; + src_b += src_stride_b; + src_a += src_stride_a; + dst_ar64 += dst_stride_ar64; + } +} + +LIBYUV_NOINLINE +static void MergeAR64PlaneOpaque(const uint16_t* src_r, + int src_stride_r, + const uint16_t* src_g, + int src_stride_g, + const uint16_t* src_b, + int src_stride_b, + uint16_t* dst_ar64, + int dst_stride_ar64, + int width, + int height, + int depth) { + int y; + void (*MergeXR64Row)(const uint16_t* src_r, const uint16_t* src_g, + const uint16_t* src_b, uint16_t* dst_argb, int depth, + int width) = MergeXR64Row_C; + + // Coalesce rows. + if (src_stride_r == width && src_stride_g == width && src_stride_b == width && + dst_stride_ar64 == width * 4) { + width *= height; + height = 1; + src_stride_r = src_stride_g = src_stride_b = dst_stride_ar64 = 0; + } +#if defined(HAS_MERGEXR64ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeXR64Row = MergeXR64Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { - MirrorRow = MirrorRow_SSSE3; + MergeXR64Row = MergeXR64Row_AVX2; } } #endif -#if defined(HAS_MIRRORROW_AVX2) +#if defined(HAS_MERGEXR64ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeXR64Row = MergeXR64Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + MergeXR64Row = MergeXR64Row_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + MergeXR64Row(src_r, src_g, src_b, dst_ar64, depth, width); + src_r += src_stride_r; + src_g += src_stride_g; + src_b += src_stride_b; + dst_ar64 += dst_stride_ar64; + } +} + +LIBYUV_API +void MergeAR64Plane(const uint16_t* src_r, + int src_stride_r, + const uint16_t* src_g, + int src_stride_g, + const uint16_t* src_b, + int src_stride_b, + const uint16_t* src_a, + int src_stride_a, + uint16_t* dst_ar64, + int dst_stride_ar64, + int width, + int height, + int depth) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_ar64 = dst_ar64 + (height - 1) * dst_stride_ar64; + dst_stride_ar64 = -dst_stride_ar64; + } + + if (src_a == NULL) { + MergeAR64PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b, + src_stride_b, dst_ar64, dst_stride_ar64, width, height, + depth); + } else { + MergeAR64PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b, + src_stride_b, src_a, src_stride_a, dst_ar64, + dst_stride_ar64, width, height, depth); + } +} + +LIBYUV_NOINLINE +static void MergeARGB16To8PlaneAlpha(const uint16_t* src_r, + int src_stride_r, + const uint16_t* src_g, + int src_stride_g, + const uint16_t* src_b, + int src_stride_b, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int depth) { + int y; + void (*MergeARGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g, + const uint16_t* src_b, const uint16_t* src_a, + uint8_t* dst_argb, int depth, int width) = + MergeARGB16To8Row_C; + + if (src_stride_r == width && src_stride_g == width && src_stride_b == width && + src_stride_a == width && dst_stride_argb == width * 4) { + width *= height; + height = 1; + src_stride_r = src_stride_g = src_stride_b = src_stride_a = + dst_stride_argb = 0; + } +#if defined(HAS_MERGEARGB16TO8ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - MirrorRow = MirrorRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - MirrorRow = MirrorRow_AVX2; + MergeARGB16To8Row = MergeARGB16To8Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + MergeARGB16To8Row = MergeARGB16To8Row_AVX2; } } #endif -#if defined(HAS_MIRRORROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MirrorRow = MirrorRow_Any_MSA; - if (IS_ALIGNED(width, 64)) { - MirrorRow = MirrorRow_MSA; +#if defined(HAS_MERGEARGB16TO8ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeARGB16To8Row = MergeARGB16To8Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + MergeARGB16To8Row = MergeARGB16To8Row_NEON; } } #endif -#if defined(HAS_MIRRORROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - MirrorRow = MirrorRow_Any_MMI; + + for (y = 0; y < height; ++y) { + MergeARGB16To8Row(src_r, src_g, src_b, src_a, dst_argb, depth, width); + src_r += src_stride_r; + src_g += src_stride_g; + src_b += src_stride_b; + src_a += src_stride_a; + dst_argb += dst_stride_argb; + } +} + +LIBYUV_NOINLINE +static void MergeARGB16To8PlaneOpaque(const uint16_t* src_r, + int src_stride_r, + const uint16_t* src_g, + int src_stride_g, + const uint16_t* src_b, + int src_stride_b, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int depth) { + int y; + void (*MergeXRGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g, + const uint16_t* src_b, uint8_t* dst_argb, int depth, + int width) = MergeXRGB16To8Row_C; + + // Coalesce rows. + if (src_stride_r == width && src_stride_g == width && src_stride_b == width && + dst_stride_argb == width * 4) { + width *= height; + height = 1; + src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0; + } +#if defined(HAS_MERGEXRGB16TO8ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeXRGB16To8Row = MergeXRGB16To8Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + MergeXRGB16To8Row = MergeXRGB16To8Row_AVX2; + } + } +#endif +#if defined(HAS_MERGEXRGB16TO8ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeXRGB16To8Row = MergeXRGB16To8Row_Any_NEON; if (IS_ALIGNED(width, 8)) { - MirrorRow = MirrorRow_MMI; + MergeXRGB16To8Row = MergeXRGB16To8Row_NEON; } } #endif - // Mirror plane for (y = 0; y < height; ++y) { - MirrorRow(src_y, dst_y, width); - src_y += src_stride_y; - dst_y += dst_stride_y; + MergeXRGB16To8Row(src_r, src_g, src_b, dst_argb, depth, width); + src_r += src_stride_r; + src_g += src_stride_g; + src_b += src_stride_b; + dst_argb += dst_stride_argb; + } +} + +LIBYUV_API +void MergeARGB16To8Plane(const uint16_t* src_r, + int src_stride_r, + const uint16_t* src_g, + int src_stride_g, + const uint16_t* src_b, + int src_stride_b, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int depth) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } + + if (src_a == NULL) { + MergeARGB16To8PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b, + src_stride_b, dst_argb, dst_stride_argb, width, + height, depth); + } else { + MergeARGB16To8PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b, + src_stride_b, src_a, src_stride_a, dst_argb, + dst_stride_argb, width, height, depth); } } @@ -820,7 +1878,7 @@ int YUY2ToI422(const uint8_t* src_yuy2, } } #endif -#if defined(HAS_YUY2TOYROW_MSA) +#if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { YUY2ToYRow = YUY2ToYRow_Any_MSA; YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA; @@ -830,13 +1888,13 @@ int YUY2ToI422(const uint8_t* src_yuy2, } } #endif -#if defined(HAS_YUY2TOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - YUY2ToYRow = YUY2ToYRow_Any_MMI; - YUY2ToUV422Row = YUY2ToUV422Row_Any_MMI; - if (IS_ALIGNED(width, 8)) { - YUY2ToYRow = YUY2ToYRow_MMI; - YUY2ToUV422Row = YUY2ToUV422Row_MMI; +#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + YUY2ToYRow = YUY2ToYRow_Any_LASX; + YUY2ToUV422Row = YUY2ToUV422Row_Any_LASX; + if (IS_ALIGNED(width, 32)) { + YUY2ToYRow = YUY2ToYRow_LASX; + YUY2ToUV422Row = YUY2ToUV422Row_LASX; } } #endif @@ -916,7 +1974,7 @@ int UYVYToI422(const uint8_t* src_uyvy, } } #endif -#if defined(HAS_UYVYTOYROW_MSA) +#if defined(HAS_UYVYTOYROW_MSA) && defined(HAS_UYVYTOUV422ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { UYVYToYRow = UYVYToYRow_Any_MSA; UYVYToUV422Row = UYVYToUV422Row_Any_MSA; @@ -926,13 +1984,13 @@ int UYVYToI422(const uint8_t* src_uyvy, } } #endif -#if defined(HAS_UYVYTOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - UYVYToYRow = UYVYToYRow_Any_MMI; - UYVYToUV422Row = UYVYToUV422Row_Any_MMI; - if (IS_ALIGNED(width, 16)) { - UYVYToYRow = UYVYToYRow_MMI; - UYVYToUV422Row = UYVYToUV422Row_MMI; +#if defined(HAS_UYVYTOYROW_LASX) && defined(HAS_UYVYTOUV422ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + UYVYToYRow = UYVYToYRow_Any_LASX; + UYVYToUV422Row = UYVYToUV422Row_Any_LASX; + if (IS_ALIGNED(width, 32)) { + UYVYToYRow = UYVYToYRow_LASX; + UYVYToUV422Row = UYVYToUV422Row_LASX; } } #endif @@ -1006,23 +2064,214 @@ int YUY2ToY(const uint8_t* src_yuy2, } } #endif -#if defined(HAS_YUY2TOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - YUY2ToYRow = YUY2ToYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - YUY2ToYRow = YUY2ToYRow_MMI; + + for (y = 0; y < height; ++y) { + YUY2ToYRow(src_yuy2, dst_y, width); + src_yuy2 += src_stride_yuy2; + dst_y += dst_stride_y; + } + return 0; +} + +// Convert UYVY to Y. +LIBYUV_API +int UYVYToY(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { + int y; + void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = + UYVYToYRow_C; + if (!src_uyvy || !dst_y || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; + src_stride_uyvy = -src_stride_uyvy; + } + // Coalesce rows. + if (src_stride_uyvy == width * 2 && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_uyvy = dst_stride_y = 0; + } +#if defined(HAS_UYVYTOYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + UYVYToYRow = UYVYToYRow_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + UYVYToYRow = UYVYToYRow_SSE2; + } + } +#endif +#if defined(HAS_UYVYTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + UYVYToYRow = UYVYToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + UYVYToYRow = UYVYToYRow_AVX2; + } + } +#endif +#if defined(HAS_UYVYTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + UYVYToYRow = UYVYToYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + UYVYToYRow = UYVYToYRow_NEON; + } + } +#endif +#if defined(HAS_UYVYTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + UYVYToYRow = UYVYToYRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + UYVYToYRow = UYVYToYRow_MSA; } } #endif for (y = 0; y < height; ++y) { - YUY2ToYRow(src_yuy2, dst_y, width); - src_yuy2 += src_stride_yuy2; + UYVYToYRow(src_uyvy, dst_y, width); + src_uyvy += src_stride_uyvy; dst_y += dst_stride_y; } return 0; } +// Mirror a plane of data. +// See Also I400Mirror +LIBYUV_API +void MirrorPlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { + int y; + void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C; + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_stride_y = -src_stride_y; + } +#if defined(HAS_MIRRORROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MirrorRow = MirrorRow_Any_NEON; + if (IS_ALIGNED(width, 32)) { + MirrorRow = MirrorRow_NEON; + } + } +#endif +#if defined(HAS_MIRRORROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + MirrorRow = MirrorRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + MirrorRow = MirrorRow_SSSE3; + } + } +#endif +#if defined(HAS_MIRRORROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MirrorRow = MirrorRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + MirrorRow = MirrorRow_AVX2; + } + } +#endif +#if defined(HAS_MIRRORROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + MirrorRow = MirrorRow_Any_MSA; + if (IS_ALIGNED(width, 64)) { + MirrorRow = MirrorRow_MSA; + } + } +#endif +#if defined(HAS_MIRRORROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + MirrorRow = MirrorRow_Any_LASX; + if (IS_ALIGNED(width, 64)) { + MirrorRow = MirrorRow_LASX; + } + } +#endif + + // Mirror plane + for (y = 0; y < height; ++y) { + MirrorRow(src_y, dst_y, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } +} + +// Mirror a plane of UV data. +LIBYUV_API +void MirrorUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + int y; + void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst, int width) = + MirrorUVRow_C; + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_uv = src_uv + (height - 1) * src_stride_uv; + src_stride_uv = -src_stride_uv; + } +#if defined(HAS_MIRRORUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MirrorUVRow = MirrorUVRow_Any_NEON; + if (IS_ALIGNED(width, 32)) { + MirrorUVRow = MirrorUVRow_NEON; + } + } +#endif +#if defined(HAS_MIRRORUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + MirrorUVRow = MirrorUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + MirrorUVRow = MirrorUVRow_SSSE3; + } + } +#endif +#if defined(HAS_MIRRORUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MirrorUVRow = MirrorUVRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + MirrorUVRow = MirrorUVRow_AVX2; + } + } +#endif +#if defined(HAS_MIRRORUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + MirrorUVRow = MirrorUVRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + MirrorUVRow = MirrorUVRow_MSA; + } + } +#endif +#if defined(HAS_MIRRORUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + MirrorUVRow = MirrorUVRow_Any_LASX; + if (IS_ALIGNED(width, 16)) { + MirrorUVRow = MirrorUVRow_LASX; + } + } +#endif + + // MirrorUV plane + for (y = 0; y < height; ++y) { + MirrorUVRow(src_uv, dst_uv, width); + src_uv += src_stride_uv; + dst_uv += dst_stride_uv; + } +} + // Mirror I400 with optional flipping LIBYUV_API int I400Mirror(const uint8_t* src_y, @@ -1063,10 +2312,12 @@ int I420Mirror(const uint8_t* src_y, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || width <= 0 || + + if (!src_y || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; @@ -1087,6 +2338,43 @@ int I420Mirror(const uint8_t* src_y, return 0; } +// NV12 mirror. +LIBYUV_API +int NV12Mirror(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + + if (!src_y || !src_uv || !dst_uv || width <= 0 || height == 0) { + return -1; + } + + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_uv = src_uv + (halfheight - 1) * src_stride_uv; + src_stride_y = -src_stride_y; + src_stride_uv = -src_stride_uv; + } + + if (dst_y) { + MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + MirrorUVPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, halfwidth, + halfheight); + return 0; +} + // ARGB mirror. LIBYUV_API int ARGBMirror(const uint8_t* src_argb, @@ -1110,7 +2398,7 @@ int ARGBMirror(const uint8_t* src_argb, #if defined(HAS_ARGBMIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBMirrorRow = ARGBMirrorRow_Any_NEON; - if (IS_ALIGNED(width, 4)) { + if (IS_ALIGNED(width, 8)) { ARGBMirrorRow = ARGBMirrorRow_NEON; } } @@ -1139,11 +2427,11 @@ int ARGBMirror(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBMIRRORROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBMirrorRow = ARGBMirrorRow_Any_MMI; - if (IS_ALIGNED(width, 2)) { - ARGBMirrorRow = ARGBMirrorRow_MMI; +#if defined(HAS_ARGBMIRRORROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBMirrorRow = ARGBMirrorRow_Any_LASX; + if (IS_ALIGNED(width, 16)) { + ARGBMirrorRow = ARGBMirrorRow_LASX; } } #endif @@ -1157,6 +2445,52 @@ int ARGBMirror(const uint8_t* src_argb, return 0; } +// RGB24 mirror. +LIBYUV_API +int RGB24Mirror(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { + int y; + void (*RGB24MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = + RGB24MirrorRow_C; + if (!src_rgb24 || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; + src_stride_rgb24 = -src_stride_rgb24; + } +#if defined(HAS_RGB24MIRRORROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB24MirrorRow = RGB24MirrorRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RGB24MirrorRow = RGB24MirrorRow_NEON; + } + } +#endif +#if defined(HAS_RGB24MIRRORROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGB24MirrorRow = RGB24MirrorRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24MirrorRow = RGB24MirrorRow_SSSE3; + } + } +#endif + + // Mirror plane + for (y = 0; y < height; ++y) { + RGB24MirrorRow(src_rgb24, dst_rgb24, width); + src_rgb24 += src_stride_rgb24; + dst_rgb24 += dst_stride_rgb24; + } + return 0; +} + // Get a blender that optimized for the CPU and pixel count. // As there are 6 blenders to choose from, the caller should try to use // the same blend function for all pixels if possible. @@ -1180,9 +2514,9 @@ ARGBBlendRow GetARGBBlend() { ARGBBlendRow = ARGBBlendRow_MSA; } #endif -#if defined(HAS_ARGBBLENDROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBBlendRow = ARGBBlendRow_MMI; +#if defined(HAS_ARGBBLENDROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBBlendRow = ARGBBlendRow_LSX; } #endif return ARGBBlendRow; @@ -1277,14 +2611,6 @@ int BlendPlane(const uint8_t* src_y0, } } #endif -#if defined(HAS_BLENDPLANEROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - BlendPlaneRow = BlendPlaneRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - BlendPlaneRow = BlendPlaneRow_MMI; - } - } -#endif for (y = 0; y < height; ++y) { BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width); @@ -1329,6 +2655,7 @@ int I420Blend(const uint8_t* src_y0, BlendPlaneRow_C; void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C; + if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 || !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -1361,14 +2688,6 @@ int I420Blend(const uint8_t* src_y0, } } #endif -#if defined(HAS_BLENDPLANEROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - BlendPlaneRow = BlendPlaneRow_Any_MMI; - if (IS_ALIGNED(halfwidth, 8)) { - BlendPlaneRow = BlendPlaneRow_MMI; - } - } -#endif if (!IS_ALIGNED(width, 2)) { ScaleRowDown2 = ScaleRowDown2Box_Odd_C; } @@ -1405,17 +2724,6 @@ int I420Blend(const uint8_t* src_y0, } } #endif -#if defined(HAS_SCALEROWDOWN2_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ScaleRowDown2 = ScaleRowDown2Box_Odd_MMI; - if (IS_ALIGNED(width, 2)) { - ScaleRowDown2 = ScaleRowDown2Box_Any_MMI; - if (IS_ALIGNED(halfwidth, 8)) { - ScaleRowDown2 = ScaleRowDown2Box_MMI; - } - } - } -#endif // Row buffer for intermediate alpha pixels. align_buffer_64(halfalpha, halfwidth); @@ -1501,11 +2809,11 @@ int ARGBMultiply(const uint8_t* src_argb0, } } #endif -#if defined(HAS_ARGBMULTIPLYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBMultiplyRow = ARGBMultiplyRow_Any_MMI; - if (IS_ALIGNED(width, 2)) { - ARGBMultiplyRow = ARGBMultiplyRow_MMI; +#if defined(HAS_ARGBMULTIPLYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBMultiplyRow = ARGBMultiplyRow_Any_LASX; + if (IS_ALIGNED(width, 8)) { + ARGBMultiplyRow = ARGBMultiplyRow_LASX; } } #endif @@ -1549,12 +2857,12 @@ int ARGBAdd(const uint8_t* src_argb0, height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; } -#if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__)) +#if defined(HAS_ARGBADDROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBAddRow = ARGBAddRow_SSE2; } #endif -#if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__)) +#if defined(HAS_ARGBADDROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBAddRow = ARGBAddRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { @@ -1586,11 +2894,11 @@ int ARGBAdd(const uint8_t* src_argb0, } } #endif -#if defined(HAS_ARGBADDROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBAddRow = ARGBAddRow_Any_MMI; - if (IS_ALIGNED(width, 2)) { - ARGBAddRow = ARGBAddRow_MMI; +#if defined(HAS_ARGBADDROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBAddRow = ARGBAddRow_Any_LASX; + if (IS_ALIGNED(width, 8)) { + ARGBAddRow = ARGBAddRow_LASX; } } #endif @@ -1666,11 +2974,11 @@ int ARGBSubtract(const uint8_t* src_argb0, } } #endif -#if defined(HAS_ARGBSUBTRACTROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBSubtractRow = ARGBSubtractRow_Any_MMI; - if (IS_ALIGNED(width, 2)) { - ARGBSubtractRow = ARGBSubtractRow_MMI; +#if defined(HAS_ARGBSUBTRACTROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBSubtractRow = ARGBSubtractRow_Any_LASX; + if (IS_ALIGNED(width, 8)) { + ARGBSubtractRow = ARGBSubtractRow_LASX; } } #endif @@ -1684,177 +2992,6 @@ int ARGBSubtract(const uint8_t* src_argb0, } return 0; } -// Convert I422 to RGBA with matrix -static int I422ToRGBAMatrix(const uint8_t* src_y, - int src_stride_y, - const uint8_t* src_u, - int src_stride_u, - const uint8_t* src_v, - int src_stride_v, - uint8_t* dst_rgba, - int dst_stride_rgba, - const struct YuvConstants* yuvconstants, - int width, - int height) { - int y; - void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf, - const uint8_t* v_buf, uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, int width) = - I422ToRGBARow_C; - if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; - dst_stride_rgba = -dst_stride_rgba; - } -#if defined(HAS_I422TORGBAROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I422ToRGBARow = I422ToRGBARow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToRGBARow = I422ToRGBARow_SSSE3; - } - } -#endif -#if defined(HAS_I422TORGBAROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - I422ToRGBARow = I422ToRGBARow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I422ToRGBARow = I422ToRGBARow_AVX2; - } - } -#endif -#if defined(HAS_I422TORGBAROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I422ToRGBARow = I422ToRGBARow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToRGBARow = I422ToRGBARow_NEON; - } - } -#endif -#if defined(HAS_I422TORGBAROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToRGBARow = I422ToRGBARow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToRGBARow = I422ToRGBARow_MSA; - } - } -#endif - - for (y = 0; y < height; ++y) { - I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width); - dst_rgba += dst_stride_rgba; - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - } - return 0; -} - -// Convert I422 to RGBA. -LIBYUV_API -int I422ToRGBA(const uint8_t* src_y, - int src_stride_y, - const uint8_t* src_u, - int src_stride_u, - const uint8_t* src_v, - int src_stride_v, - uint8_t* dst_rgba, - int dst_stride_rgba, - int width, - int height) { - return I422ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, - src_stride_v, dst_rgba, dst_stride_rgba, - &kYuvI601Constants, width, height); -} - -// Convert I422 to BGRA. -LIBYUV_API -int I422ToBGRA(const uint8_t* src_y, - int src_stride_y, - const uint8_t* src_u, - int src_stride_u, - const uint8_t* src_v, - int src_stride_v, - uint8_t* dst_bgra, - int dst_stride_bgra, - int width, - int height) { - return I422ToRGBAMatrix(src_y, src_stride_y, src_v, - src_stride_v, // Swap U and V - src_u, src_stride_u, dst_bgra, dst_stride_bgra, - &kYvuI601Constants, // Use Yvu matrix - width, height); -} - -// Convert NV12 to RGB565. -LIBYUV_API -int NV12ToRGB565(const uint8_t* src_y, - int src_stride_y, - const uint8_t* src_uv, - int src_stride_uv, - uint8_t* dst_rgb565, - int dst_stride_rgb565, - int width, - int height) { - int y; - void (*NV12ToRGB565Row)( - const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C; - if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; - dst_stride_rgb565 = -dst_stride_rgb565; - } -#if defined(HAS_NV12TORGB565ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - NV12ToRGB565Row = NV12ToRGB565Row_SSSE3; - } - } -#endif -#if defined(HAS_NV12TORGB565ROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - NV12ToRGB565Row = NV12ToRGB565Row_AVX2; - } - } -#endif -#if defined(HAS_NV12TORGB565ROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - NV12ToRGB565Row = NV12ToRGB565Row_NEON; - } - } -#endif -#if defined(HAS_NV12TORGB565ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - NV12ToRGB565Row = NV12ToRGB565Row_Any_MSA; - if (IS_ALIGNED(width, 8)) { - NV12ToRGB565Row = NV12ToRGB565Row_MSA; - } - } -#endif - - for (y = 0; y < height; ++y) { - NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width); - dst_rgb565 += dst_stride_rgb565; - src_y += src_stride_y; - if (y & 1) { - src_uv += src_stride_uv; - } - } - return 0; -} // Convert RAW to RGB24. LIBYUV_API @@ -1906,11 +3043,11 @@ int RAWToRGB24(const uint8_t* src_raw, } } #endif -#if defined(HAS_RAWTORGB24ROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - RAWToRGB24Row = RAWToRGB24Row_Any_MMI; - if (IS_ALIGNED(width, 4)) { - RAWToRGB24Row = RAWToRGB24Row_MMI; +#if defined(HAS_RAWTORGB24ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RAWToRGB24Row = RAWToRGB24Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToRGB24Row = RAWToRGB24Row_LSX; } } #endif @@ -1931,6 +3068,10 @@ void SetPlane(uint8_t* dst_y, uint32_t value) { int y; void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C; + + if (width <= 0 || height == 0) { + return; + } if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; @@ -1968,6 +3109,14 @@ void SetPlane(uint8_t* dst_y, SetRow = SetRow_MSA; } #endif +#if defined(HAS_SETROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SetRow = SetRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + SetRow = SetRow_LSX; + } + } +#endif // Set plane for (y = 0; y < height; ++y) { @@ -1996,6 +3145,7 @@ int I420Rect(uint8_t* dst_y, uint8_t* start_y = dst_y + y * dst_stride_y + x; uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); + if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 || y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 || value_v < 0 || value_v > 255) { @@ -2057,6 +3207,14 @@ int ARGBRect(uint8_t* dst_argb, } } #endif +#if defined(HAS_ARGBSETROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBSetRow = ARGBSetRow_Any_LSX; + if (IS_ALIGNED(width, 4)) { + ARGBSetRow = ARGBSetRow_LSX; + } + } +#endif // Set plane for (y = 0; y < height; ++y) { @@ -2135,11 +3293,11 @@ int ARGBAttenuate(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBATTENUATEROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI; - if (IS_ALIGNED(width, 2)) { - ARGBAttenuateRow = ARGBAttenuateRow_MMI; +#if defined(HAS_ARGBATTENUATEROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_LASX; + if (IS_ALIGNED(width, 16)) { + ARGBAttenuateRow = ARGBAttenuateRow_LASX; } } #endif @@ -2243,9 +3401,9 @@ int ARGBGrayTo(const uint8_t* src_argb, ARGBGrayRow = ARGBGrayRow_MSA; } #endif -#if defined(HAS_ARGBGRAYROW_MMI) - if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) { - ARGBGrayRow = ARGBGrayRow_MMI; +#if defined(HAS_ARGBGRAYROW_LASX) + if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) { + ARGBGrayRow = ARGBGrayRow_LASX; } #endif @@ -2293,9 +3451,9 @@ int ARGBGray(uint8_t* dst_argb, ARGBGrayRow = ARGBGrayRow_MSA; } #endif -#if defined(HAS_ARGBGRAYROW_MMI) - if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) { - ARGBGrayRow = ARGBGrayRow_MMI; +#if defined(HAS_ARGBGRAYROW_LASX) + if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) { + ARGBGrayRow = ARGBGrayRow_LASX; } #endif @@ -2341,9 +3499,9 @@ int ARGBSepia(uint8_t* dst_argb, ARGBSepiaRow = ARGBSepiaRow_MSA; } #endif -#if defined(HAS_ARGBSEPIAROW_MMI) - if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) { - ARGBSepiaRow = ARGBSepiaRow_MMI; +#if defined(HAS_ARGBSEPIAROW_LASX) + if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) { + ARGBSepiaRow = ARGBSepiaRow_LASX; } #endif @@ -2397,9 +3555,9 @@ int ARGBColorMatrix(const uint8_t* src_argb, ARGBColorMatrixRow = ARGBColorMatrixRow_MSA; } #endif -#if defined(HAS_ARGBCOLORMATRIXROW_MMI) - if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) { - ARGBColorMatrixRow = ARGBColorMatrixRow_MMI; +#if defined(HAS_ARGBCOLORMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { + ARGBColorMatrixRow = ARGBColorMatrixRow_LSX; } #endif for (y = 0; y < height; ++y) { @@ -2567,6 +3725,11 @@ int ARGBQuantize(uint8_t* dst_argb, ARGBQuantizeRow = ARGBQuantizeRow_MSA; } #endif +#if defined(HAS_ARGBQUANTIZEROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { + ARGBQuantizeRow = ARGBQuantizeRow_LSX; + } +#endif for (y = 0; y < height; ++y) { ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width); dst += dst_stride_argb; @@ -2596,11 +3759,6 @@ int ARGBComputeCumulativeSum(const uint8_t* src_argb, ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; } #endif -#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ComputeCumulativeSumRow = ComputeCumulativeSumRow_MMI; - } -#endif memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel. for (y = 0; y < height; ++y) { @@ -2651,7 +3809,7 @@ int ARGBBlur(const uint8_t* src_argb, if (radius > (width / 2 - 1)) { radius = width / 2 - 1; } - if (radius <= 0) { + if (radius <= 0 || height <= 1) { return -1; } #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) @@ -2660,11 +3818,6 @@ int ARGBBlur(const uint8_t* src_argb, CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2; } #endif -#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ComputeCumulativeSumRow = ComputeCumulativeSumRow_MMI; - } -#endif // Compute enough CumulativeSum for first row to be blurred. After this // one row of CumulativeSum is updated at a time. ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum, @@ -2771,9 +3924,9 @@ int ARGBShade(const uint8_t* src_argb, ARGBShadeRow = ARGBShadeRow_MSA; } #endif -#if defined(HAS_ARGBSHADEROW_MMI) - if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) { - ARGBShadeRow = ARGBShadeRow_MMI; +#if defined(HAS_ARGBSHADEROW_LASX) + if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 8)) { + ARGBShadeRow = ARGBShadeRow_LASX; } #endif @@ -2847,11 +4000,11 @@ int InterpolatePlane(const uint8_t* src0, } } #endif -#if defined(HAS_INTERPOLATEROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - InterpolateRow = InterpolateRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - InterpolateRow = InterpolateRow_MMI; +#if defined(HAS_INTERPOLATEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + InterpolateRow = InterpolateRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + InterpolateRow = InterpolateRow_LSX; } } #endif @@ -2865,6 +4018,86 @@ int InterpolatePlane(const uint8_t* src0, return 0; } +// Interpolate 2 planes by specified amount (0 to 255). +LIBYUV_API +int InterpolatePlane_16(const uint16_t* src0, + int src_stride0, + const uint16_t* src1, + int src_stride1, + uint16_t* dst, + int dst_stride, + int width, + int height, + int interpolation) { + int y; + void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_16_C; + if (!src0 || !src1 || !dst || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst = dst + (height - 1) * dst_stride; + dst_stride = -dst_stride; + } + // Coalesce rows. + if (src_stride0 == width && src_stride1 == width && dst_stride == width) { + width *= height; + height = 1; + src_stride0 = src_stride1 = dst_stride = 0; + } +#if defined(HAS_INTERPOLATEROW_16_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow_16 = InterpolateRow_16_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + InterpolateRow_16 = InterpolateRow_16_SSSE3; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + InterpolateRow_16 = InterpolateRow_16_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + InterpolateRow_16 = InterpolateRow_16_AVX2; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + InterpolateRow_16 = InterpolateRow_16_Any_NEON; + if (IS_ALIGNED(width, 8)) { + InterpolateRow_16 = InterpolateRow_16_NEON; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + InterpolateRow_16 = InterpolateRow_16_Any_MSA; + if (IS_ALIGNED(width, 32)) { + InterpolateRow_16 = InterpolateRow_16_MSA; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + InterpolateRow_16 = InterpolateRow_16_Any_LSX; + if (IS_ALIGNED(width, 32)) { + InterpolateRow_16 = InterpolateRow_16_LSX; + } + } +#endif + + for (y = 0; y < height; ++y) { + InterpolateRow_16(dst, src0, src1 - src0, width, interpolation); + src0 += src_stride0; + src1 += src_stride1; + dst += dst_stride; + } + return 0; +} + // Interpolate 2 ARGB images by specified amount (0 to 255). LIBYUV_API int ARGBInterpolate(const uint8_t* src_argb0, @@ -2906,10 +4139,12 @@ int I420Interpolate(const uint8_t* src0_y, int interpolation) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; + if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } + InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y, dst_stride_y, width, height, interpolation); InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u, @@ -2978,11 +4213,11 @@ int ARGBShuffle(const uint8_t* src_bgra, } } #endif -#if defined(HAS_ARGBSHUFFLEROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBShuffleRow = ARGBShuffleRow_Any_MMI; - if (IS_ALIGNED(width, 2)) { - ARGBShuffleRow = ARGBShuffleRow_MMI; +#if defined(HAS_ARGBSHUFFLEROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBShuffleRow = ARGBShuffleRow_Any_LASX; + if (IS_ALIGNED(width, 16)) { + ARGBShuffleRow = ARGBShuffleRow_LASX; } } #endif @@ -2995,6 +4230,142 @@ int ARGBShuffle(const uint8_t* src_bgra, return 0; } +// Shuffle AR64 channel order. e.g. AR64 to AB64. +LIBYUV_API +int AR64Shuffle(const uint16_t* src_ar64, + int src_stride_ar64, + uint16_t* dst_ar64, + int dst_stride_ar64, + const uint8_t* shuffler, + int width, + int height) { + int y; + void (*AR64ShuffleRow)(const uint8_t* src_ar64, uint8_t* dst_ar64, + const uint8_t* shuffler, int width) = AR64ShuffleRow_C; + if (!src_ar64 || !dst_ar64 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_ar64 = src_ar64 + (height - 1) * src_stride_ar64; + src_stride_ar64 = -src_stride_ar64; + } + // Coalesce rows. + if (src_stride_ar64 == width * 4 && dst_stride_ar64 == width * 4) { + width *= height; + height = 1; + src_stride_ar64 = dst_stride_ar64 = 0; + } + // Assembly versions can be reused if it's implemented with shuffle. +#if defined(HAS_ARGBSHUFFLEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + AR64ShuffleRow = ARGBShuffleRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + AR64ShuffleRow = ARGBShuffleRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBSHUFFLEROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + AR64ShuffleRow = ARGBShuffleRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + AR64ShuffleRow = ARGBShuffleRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBSHUFFLEROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + AR64ShuffleRow = ARGBShuffleRow_Any_NEON; + if (IS_ALIGNED(width, 4)) { + AR64ShuffleRow = ARGBShuffleRow_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + AR64ShuffleRow((uint8_t*)(src_ar64), (uint8_t*)(dst_ar64), shuffler, + width * 2); + src_ar64 += src_stride_ar64; + dst_ar64 += dst_stride_ar64; + } + return 0; +} + +// Gauss blur a float plane using Gaussian 5x5 filter with +// coefficients of 1, 4, 6, 4, 1. +// Each destination pixel is a blur of the 5x5 +// pixels from the source. +// Source edges are clamped. +// Edge is 2 pixels on each side, and interior is multiple of 4. +LIBYUV_API +int GaussPlane_F32(const float* src, + int src_stride, + float* dst, + int dst_stride, + int width, + int height) { + int y; + void (*GaussCol_F32)(const float* src0, const float* src1, const float* src2, + const float* src3, const float* src4, float* dst, + int width) = GaussCol_F32_C; + void (*GaussRow_F32)(const float* src, float* dst, int width) = + GaussRow_F32_C; + if (!src || !dst || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src = src + (height - 1) * src_stride; + src_stride = -src_stride; + } + +#if defined(HAS_GAUSSCOL_F32_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { + GaussCol_F32 = GaussCol_F32_NEON; + } +#endif +#if defined(HAS_GAUSSROW_F32_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { + GaussRow_F32 = GaussRow_F32_NEON; + } +#endif + { + // 2 pixels on each side, but aligned out to 16 bytes. + align_buffer_64(rowbuf, (4 + width + 4) * 4); + memset(rowbuf, 0, 16); + memset(rowbuf + (4 + width) * 4, 0, 16); + float* row = (float*)(rowbuf + 16); + const float* src0 = src; + const float* src1 = src; + const float* src2 = src; + const float* src3 = src2 + ((height > 1) ? src_stride : 0); + const float* src4 = src3 + ((height > 2) ? src_stride : 0); + + for (y = 0; y < height; ++y) { + GaussCol_F32(src0, src1, src2, src3, src4, row, width); + + // Extrude edge by 2 floats + row[-2] = row[-1] = row[0]; + row[width + 1] = row[width] = row[width - 1]; + + GaussRow_F32(row - 2, dst, width); + + src0 = src1; + src1 = src2; + src2 = src3; + src3 = src4; + if ((y + 2) < (height - 1)) { + src4 += src_stride; + } + dst += dst_stride; + } + free_aligned_buffer_64(rowbuf); + } + return 0; +} + // Sobel ARGB effect. static int ARGBSobelize(const uint8_t* src_argb, int src_stride_argb, @@ -3044,7 +4415,7 @@ static int ARGBSobelize(const uint8_t* src_argb, #if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYJRow = ARGBToYJRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { ARGBToYJRow = ARGBToYJRow_NEON; } } @@ -3057,11 +4428,19 @@ static int ARGBSobelize(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYJROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBToYJRow = ARGBToYJRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - ARGBToYJRow = ARGBToYJRow_MMI; +#if defined(HAS_ARGBTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYJRow = ARGBToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYJRow = ARGBToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_LASX; } } #endif @@ -3081,11 +4460,6 @@ static int ARGBSobelize(const uint8_t* src_argb, SobelYRow = SobelYRow_MSA; } #endif -#if defined(HAS_SOBELYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - SobelYRow = SobelYRow_MMI; - } -#endif #if defined(HAS_SOBELXROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelXRow = SobelXRow_SSE2; @@ -3101,11 +4475,6 @@ static int ARGBSobelize(const uint8_t* src_argb, SobelXRow = SobelXRow_MSA; } #endif -#if defined(HAS_SOBELXROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - SobelXRow = SobelXRow_MMI; - } -#endif { // 3 rows with edges before/after. const int kRowSize = (width + kEdge + 31) & ~31; @@ -3188,11 +4557,11 @@ int ARGBSobel(const uint8_t* src_argb, } } #endif -#if defined(HAS_SOBELROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - SobelRow = SobelRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - SobelRow = SobelRow_MMI; +#if defined(HAS_SOBELROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SobelRow = SobelRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + SobelRow = SobelRow_LSX; } } #endif @@ -3234,11 +4603,11 @@ int ARGBSobelToPlane(const uint8_t* src_argb, } } #endif -#if defined(HAS_SOBELTOPLANEROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - SobelToPlaneRow = SobelToPlaneRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - SobelToPlaneRow = SobelToPlaneRow_MMI; +#if defined(HAS_SOBELTOPLANEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SobelToPlaneRow = SobelToPlaneRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + SobelToPlaneRow = SobelToPlaneRow_LSX; } } #endif @@ -3281,11 +4650,11 @@ int ARGBSobelXY(const uint8_t* src_argb, } } #endif -#if defined(HAS_SOBELXYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - SobelXYRow = SobelXYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - SobelXYRow = SobelXYRow_MMI; +#if defined(HAS_SOBELXYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SobelXYRow = SobelXYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + SobelXYRow = SobelXYRow_LSX; } } #endif @@ -3412,6 +4781,14 @@ int HalfFloatPlane(const uint16_t* src_y, } } #endif +#if defined(HAS_HALFFLOATROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + HalfFloatRow = HalfFloatRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + HalfFloatRow = HalfFloatRow_LSX; + } + } +#endif for (y = 0; y < height; ++y) { HalfFloatRow(src_y, dst_y, scale, width); @@ -3526,14 +4903,6 @@ int ARGBCopyAlpha(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBCOPYALPHAROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_MMI; - if (IS_ALIGNED(width, 2)) { - ARGBCopyAlphaRow = ARGBCopyAlphaRow_MMI; - } - } -#endif for (y = 0; y < height; ++y) { ARGBCopyAlphaRow(src_argb, dst_argb, width); @@ -3592,10 +4961,10 @@ int ARGBExtractAlpha(const uint8_t* src_argb, : ARGBExtractAlphaRow_Any_MSA; } #endif -#if defined(HAS_ARGBEXTRACTALPHAROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_MMI - : ARGBExtractAlphaRow_Any_MMI; +#if defined(HAS_ARGBEXTRACTALPHAROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_LSX + : ARGBExtractAlphaRow_Any_LSX; } #endif @@ -3649,14 +5018,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y, } } #endif -#if defined(HAS_ARGBCOPYYTOALPHAROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_MMI; - } - } -#endif for (y = 0; y < height; ++y) { ARGBCopyYToAlphaRow(src_y, dst_argb, width); @@ -3685,9 +5046,11 @@ int YUY2ToNV12(const uint8_t* src_yuy2, void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; + if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; @@ -3726,11 +5089,11 @@ int YUY2ToNV12(const uint8_t* src_yuy2, } } #endif -#if defined(HAS_SPLITUVROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - SplitUVRow = SplitUVRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - SplitUVRow = SplitUVRow_MMI; +#if defined(HAS_SPLITUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SplitUVRow = SplitUVRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + SplitUVRow = SplitUVRow_LSX; } } #endif @@ -3766,11 +5129,11 @@ int YUY2ToNV12(const uint8_t* src_yuy2, } } #endif -#if defined(HAS_INTERPOLATEROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - InterpolateRow = InterpolateRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - InterpolateRow = InterpolateRow_MMI; +#if defined(HAS_INTERPOLATEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + InterpolateRow = InterpolateRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + InterpolateRow = InterpolateRow_LSX; } } #endif @@ -3817,9 +5180,11 @@ int UYVYToNV12(const uint8_t* src_uyvy, void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; + if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; @@ -3858,11 +5223,11 @@ int UYVYToNV12(const uint8_t* src_uyvy, } } #endif -#if defined(HAS_SPLITUVROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - SplitUVRow = SplitUVRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - SplitUVRow = SplitUVRow_MMI; +#if defined(HAS_SPLITUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + SplitUVRow = SplitUVRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + SplitUVRow = SplitUVRow_LSX; } } #endif @@ -3898,11 +5263,11 @@ int UYVYToNV12(const uint8_t* src_uyvy, } } #endif -#if defined(HAS_INTERPOLATEROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - InterpolateRow = InterpolateRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - InterpolateRow = InterpolateRow_MMI; +#if defined(HAS_INTERPOLATEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + InterpolateRow = InterpolateRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + InterpolateRow = InterpolateRow_LSX; } } #endif @@ -3933,6 +5298,56 @@ int UYVYToNV12(const uint8_t* src_uyvy, return 0; } +// width and height are src size allowing odd size handling. +LIBYUV_API +void HalfMergeUVPlane(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + int y; + void (*HalfMergeUVRow)(const uint8_t* src_u, int src_stride_u, + const uint8_t* src_v, int src_stride_v, + uint8_t* dst_uv, int width) = HalfMergeUVRow_C; + + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } +#if defined(HAS_HALFMERGEUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { + HalfMergeUVRow = HalfMergeUVRow_NEON; + } +#endif +#if defined(HAS_HALFMERGEUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) { + HalfMergeUVRow = HalfMergeUVRow_SSSE3; + } +#endif +#if defined(HAS_HALFMERGEUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) { + HalfMergeUVRow = HalfMergeUVRow_AVX2; + } +#endif + for (y = 0; y < height - 1; y += 2) { + // Merge a row of U and V into a row of UV. + HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width); + src_u += src_stride_u * 2; + src_v += src_stride_v * 2; + dst_uv += dst_stride_uv; + } + if (height & 1) { + HalfMergeUVRow(src_u, 0, src_v, 0, dst_uv, width); + } +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv |