diff options
author | Frank Barchard <fbarchard@google.com> | 2023-02-27 01:23:59 -0800 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2023-03-03 17:46:23 +0000 |
commit | f9b23b9cc0ca3bd27b9acc07ea0450cd5097175d (patch) | |
tree | a671b95e4d159f91e85d9e4053a0af6db8c1d7eb /include | |
parent | e66f436560fa8a4773fbd079837bc602cf97e35a (diff) | |
download | libyuv-f9b23b9cc0ca3bd27b9acc07ea0450cd5097175d.tar.gz |
Transpose 4x4 for SSE2 and AVX2
Skylake Xeon
AVX2 Transpose4x4_Opt (290 ms)
SSE2 Transpose4x4_Opt (302 ms)
C Transpose4x4_Opt (522 ms)
AMD Zen2
AVX2 Transpose4x4_Opt (136 ms)
SSE2 Transpose4x4_Opt (137 ms)
C Transpose4x4_Opt (431 ms)
Bug: None
Change-Id: I4997dbd5c5387c22bfd6c5960b421504e4bc8a2a
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4292946
Reviewed-by: Justin Green <greenjustin@google.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/libyuv/rotate_row.h | 26 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 |
2 files changed, 20 insertions, 8 deletions
diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index 64d0b59f..d047b703 100644 --- a/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -42,6 +42,8 @@ extern "C" { // The following are available for GCC 32 or 64 bit: #if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__)) #define HAS_TRANSPOSEWX8_SSSE3 +#define HAS_TRANSPOSE4X4_32_SSE2 +#define HAS_TRANSPOSE4X4_32_AVX2 #endif // The following are available for 64 bit GCC: @@ -56,6 +58,11 @@ extern "C" { #define HAS_TRANSPOSEUVWX8_NEON #endif +#if !defined(LIBYUV_DISABLE_NEON) && \ + (defined(LIBYUV_NEON) || defined(__aarch64__)) +#define HAS_TRANSPOSE4X4_32_NEON +#endif + #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_TRANSPOSEWX16_MSA #define HAS_TRANSPOSEUVWX16_MSA @@ -240,19 +247,24 @@ void Transpose4x4_32_NEON(const uint8_t* src, int dst_stride, int width); -void Transpose4x4_32_C(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride, - int width); +void Transpose4x4_32_SSE2(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); -// Transpose 32 bit values (ARGB) -void Transpose8x8_32_NEON(const uint8_t* src, +void Transpose4x4_32_AVX2(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width); +void Transpose4x4_32_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 42f81662..cc1e66e7 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1861 +#define LIBYUV_VERSION 1862 #endif // INCLUDE_LIBYUV_VERSION_H_ |