aboutsummaryrefslogtreecommitdiff
path: root/source/convert_from_argb.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2023-02-20 02:21:22 -0800
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2023-02-22 21:19:08 +0000
commit88b050f337cc0ca2a51800fe7bf4737222c87344 (patch)
treea4ffa708c5e32fb6b0baffa42823098784bee677 /source/convert_from_argb.cc
parent2bdc210be9eb11ded16bf3ef1f6cadb0d4dcb0c2 (diff)
downloadlibyuv-88b050f337cc0ca2a51800fe7bf4737222c87344.tar.gz
MergeUV AVX512BW use assembly
- Convert MergeUVRow_AVX512BW to assembly - Enable MergeUVRow_AVX512BW for Windows with clangcl - MergeUVRow_AVX2 use vpmovzxbw and vpsllw - MergeUVRow_16_AVX2 use vpmovzxbw and vpsllw with different shift for U and V AMD Zen 4 640x360 100000 iterations Was AVX512 MergeUVPlane_Opt (884 ms) AVX2 MergeUVPlane_Opt (945 ms) AVX2 MergeUVPlane_16_Opt (2167 ms) Now AVX512 MergeUVPlane_Opt (865 ms) AVX2 MergeUVPlane_Opt (943 ms) SSE2 MergeUVPlane_Opt (973 ms) AVX2 MergeUVPlane_16_Opt (2102 ms) Bug: None Change-Id: I658ada2a75d44c3f93be8bd3ed96f83d5fa2ab8d Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4271230 Reviewed-by: Fritz Koenig <frkoenig@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com>
Diffstat (limited to 'source/convert_from_argb.cc')
-rw-r--r--source/convert_from_argb.cc10
1 files changed, 5 insertions, 5 deletions
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index 1b8572a0..55516cbd 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -384,7 +384,7 @@ int ARGBToNV12(const uint8_t* src_argb,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
@@ -562,7 +562,7 @@ int ARGBToNV21(const uint8_t* src_argb,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
@@ -737,7 +737,7 @@ int ABGRToNV12(const uint8_t* src_abgr,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
@@ -913,7 +913,7 @@ int ABGRToNV21(const uint8_t* src_abgr,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
@@ -2948,7 +2948,7 @@ int RAWToJNV21(const uint8_t* src_raw,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}