diff options
author | Justin Green <greenjustin@google.com> | 2022-02-03 11:46:44 -0500 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-02-03 17:01:49 +0000 |
commit | b4ddbaf549a1bf5572bf703fd2862d1eb7380c6a (patch) | |
tree | 9e0a90646de7b1c50f40e3aeb452f749d297561b /source/planar_functions.cc | |
parent | 804980bbab748fd0e180cd6e7d9292ff49baf704 (diff) | |
download | libyuv-b4ddbaf549a1bf5572bf703fd2862d1eb7380c6a.tar.gz |
Add support for MM21.
Add support for MM21 to NV12 and I420 conversion, and add SIMD
optimizations for arm, aarch64, SSE2, and SSSE3 machines.
Bug: libyuv:915, b/215425056
Change-Id: Iecb0c33287f35766a6169d4adf3b7397f1ba8b5d
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3433269
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Justin Green <greenjustin@google.com>
Diffstat (limited to 'source/planar_functions.cc')
-rw-r--r-- | source/planar_functions.cc | 73 |
1 files changed, 71 insertions, 2 deletions
diff --git a/source/planar_functions.cc b/source/planar_functions.cc index d7cb8dc7..f2f2d695 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -882,9 +882,20 @@ void DetilePlane(const uint8_t* src_y, dst_stride_y = -dst_stride_y; } +#if defined(HAS_DETILEROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + DetileRow = DetileRow_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + DetileRow = DetileRow_SSE2; + } + } +#endif #if defined(HAS_DETILEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { - DetileRow = DetileRow_NEON; + if (TestCpuFlag(kCpuHasNEON)) { + DetileRow = DetileRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + DetileRow = DetileRow_NEON; + } } #endif @@ -900,6 +911,64 @@ void DetilePlane(const uint8_t* src_y, } } +LIBYUV_API +void DetileSplitUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + int tile_height) { + const ptrdiff_t src_tile_stride = 16 * tile_height; + int y; + void (*DetileSplitUVRow)(const uint8_t* src, ptrdiff_t src_tile_stride, + uint8_t* dst_u, uint8_t* dst_v, int width) = + DetileSplitUVRow_C; + assert(src_stride_uv >= 0); + assert(tile_height > 0); + assert(src_stride_uv > 0); + + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_u = dst_u + (height - 1) * dst_stride_u; + dst_stride_u = -dst_stride_u; + dst_v = dst_v + (height - 1) * dst_stride_v; + dst_stride_v = -dst_stride_v; + } + +#if defined(HAS_DETILESPLITUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + DetileSplitUVRow = DetileSplitUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + DetileSplitUVRow = DetileSplitUVRow_SSSE3; + } + } +#endif +#if defined(HAS_DETILESPLITROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + DetileSplitUVRow = DetileSplitUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + DetileSplitUVRow = DetileSplitUVRow_NEON; + } + } +#endif + + // Detile plane + for (y = 0; y < height; ++y) { + DetileSplitUVRow(src_uv, src_tile_stride, dst_u, dst_v, width); + dst_u += dst_stride_u; + dst_v += dst_stride_v; + src_uv += 16; + // Advance to next row of tiles. + if ((y & (tile_height - 1)) == (tile_height - 1)) { + src_uv = src_uv - src_tile_stride + src_stride_uv * tile_height; + } + } +} + // Support function for NV12 etc RGB channels. // Width and height are plane sizes (typically half pixel width). LIBYUV_API |