aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2023-02-27 01:23:59 -0800
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2023-03-03 17:46:23 +0000
commitf9b23b9cc0ca3bd27b9acc07ea0450cd5097175d (patch)
treea671b95e4d159f91e85d9e4053a0af6db8c1d7eb /include
parente66f436560fa8a4773fbd079837bc602cf97e35a (diff)
downloadlibyuv-f9b23b9cc0ca3bd27b9acc07ea0450cd5097175d.tar.gz
Transpose 4x4 for SSE2 and AVX2
Skylake Xeon AVX2 Transpose4x4_Opt (290 ms) SSE2 Transpose4x4_Opt (302 ms) C Transpose4x4_Opt (522 ms) AMD Zen2 AVX2 Transpose4x4_Opt (136 ms) SSE2 Transpose4x4_Opt (137 ms) C Transpose4x4_Opt (431 ms) Bug: None Change-Id: I4997dbd5c5387c22bfd6c5960b421504e4bc8a2a Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4292946 Reviewed-by: Justin Green <greenjustin@google.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'include')
-rw-r--r--include/libyuv/rotate_row.h26
-rw-r--r--include/libyuv/version.h2
2 files changed, 20 insertions, 8 deletions
diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h
index 64d0b59f..d047b703 100644
--- a/include/libyuv/rotate_row.h
+++ b/include/libyuv/rotate_row.h
@@ -42,6 +42,8 @@ extern "C" {
// The following are available for GCC 32 or 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
#define HAS_TRANSPOSEWX8_SSSE3
+#define HAS_TRANSPOSE4X4_32_SSE2
+#define HAS_TRANSPOSE4X4_32_AVX2
#endif
// The following are available for 64 bit GCC:
@@ -56,6 +58,11 @@ extern "C" {
#define HAS_TRANSPOSEUVWX8_NEON
#endif
+#if !defined(LIBYUV_DISABLE_NEON) && \
+ (defined(LIBYUV_NEON) || defined(__aarch64__))
+#define HAS_TRANSPOSE4X4_32_NEON
+#endif
+
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_TRANSPOSEWX16_MSA
#define HAS_TRANSPOSEUVWX16_MSA
@@ -240,19 +247,24 @@ void Transpose4x4_32_NEON(const uint8_t* src,
int dst_stride,
int width);
-void Transpose4x4_32_C(const uint8_t* src,
- int src_stride,
- uint8_t* dst,
- int dst_stride,
- int width);
+void Transpose4x4_32_SSE2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
-// Transpose 32 bit values (ARGB)
-void Transpose8x8_32_NEON(const uint8_t* src,
+void Transpose4x4_32_AVX2(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
+void Transpose4x4_32_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 42f81662..cc1e66e7 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1861
+#define LIBYUV_VERSION 1862
#endif // INCLUDE_LIBYUV_VERSION_H_