aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2022-04-15 11:21:25 -0700
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-04-15 18:46:09 +0000
commiteec8dd37e827a78c3bdbb66da6caad89f4b8c4dd (patch)
tree0351a558844b5c62f4065df29f2180a83b307d26
parent18f91105162a6ebe7a46ee1c81e9ab67ca97a02b (diff)
downloadlibyuv-eec8dd37e827a78c3bdbb66da6caad89f4b8c4dd.tar.gz
Change ScaleUVRowUp2_Biinear_16_SSE2 to SSE41
Bug: libyuv:928 xed -i scale_gcc.o: SYM ScaleUVRowUp2_Linear_16_SSE2: XDIS 0: LOGICAL SSE2 660FEFED pxor xmm5, xmm5 XDIS 4: SSE SSE2 660F76E4 pcmpeqd xmm4, xmm4 XDIS 8: SSE SSE2 660F72D41F psrld xmm4, 0x1f XDIS d: SSE SSE2 660F72F401 pslld xmm4, 0x1 XDIS 12: DATAXFER SSE2 F30F7E07 movq xmm0, qword ptr [rdi] XDIS 16: DATAXFER SSE2 F30F7E4F04 movq xmm1, qword ptr [rdi+0x4] XDIS 1b: SSE SSE2 660F61C5 punpcklwd xmm0, xmm5 XDIS 1f: SSE SSE2 660F61CD punpcklwd xmm1, xmm5 XDIS 23: DATAXFER SSE2 660F6FD0 movdqa xmm2, xmm0 XDIS 27: DATAXFER SSE2 660F6FD9 movdqa xmm3, xmm1 XDIS 2b: SSE SSE2 660F70D24E pshufd xmm2, xmm2, 0x4e XDIS 30: SSE SSE2 660F70DB4E pshufd xmm3, xmm3, 0x4e XDIS 35: SSE SSE2 660FFED4 paddd xmm2, xmm4 XDIS 39: SSE SSE2 660FFEDC paddd xmm3, xmm4 XDIS 3d: SSE SSE2 660FFED0 paddd xmm2, xmm0 XDIS 41: SSE SSE2 660FFED9 paddd xmm3, xmm1 XDIS 45: SSE SSE2 660FFEC0 paddd xmm0, xmm0 XDIS 49: SSE SSE2 660FFEC9 paddd xmm1, xmm1 XDIS 4d: SSE SSE2 660FFEC2 paddd xmm0, xmm2 XDIS 51: SSE SSE2 660FFECB paddd xmm1, xmm3 XDIS 55: SSE SSE2 660F72D002 psrld xmm0, 0x2 XDIS 5a: SSE SSE2 660F72D102 psrld xmm1, 0x2 XDIS 5f: SSE SSE4 660F382BC1 packusdw xmm0, xmm1 XDIS 64: DATAXFER SSE2 F30F7F06 movdqu xmmword ptr [rsi], xmm0 XDIS 68: MISC BASE 488D7F08 lea rdi, ptr [rdi+0x8] XDIS 6c: MISC BASE 488D7610 lea rsi, ptr [rsi+0x10] XDIS 70: BINARY BASE 83EA04 sub edx, 0x4 XDIS 73: COND_BR BASE 7F9D jnle 0x12 <ScaleUVRowUp2_Linear_16_SSE2+0x12> XDIS 75: RET BASE C3 ret SYM ScaleUVRowUp2_Bilinear_16_SSE2: XDIS 0: LOGICAL SSE2 660FEFFF pxor xmm7, xmm7 XDIS 4: SSE SSE2 660F76F6 pcmpeqd xmm6, xmm6 XDIS 8: SSE SSE2 660F72D61F psrld xmm6, 0x1f XDIS d: SSE SSE2 660F72F603 pslld xmm6, 0x3 XDIS 12: DATAXFER SSE2 F30F7E07 movq xmm0, qword ptr [rdi] XDIS 16: DATAXFER SSE2 F30F7E4F04 movq xmm1, qword ptr [rdi+0x4] XDIS 1b: SSE SSE2 660F61C7 punpcklwd xmm0, xmm7 XDIS 1f: SSE SSE2 660F61CF punpcklwd xmm1, xmm7 XDIS 23: DATAXFER SSE2 660F6FD0 movdqa xmm2, xmm0 XDIS 27: DATAXFER SSE2 660F6FD9 movdqa xmm3, xmm1 XDIS 2b: SSE SSE2 660F70D24E pshufd xmm2, xmm2, 0x4e XDIS 30: SSE SSE2 660F70DB4E pshufd xmm3, xmm3, 0x4e XDIS 35: SSE SSE2 660FFED0 paddd xmm2, xmm0 XDIS 39: SSE SSE2 660FFED9 paddd xmm3, xmm1 XDIS 3d: SSE SSE2 660FFEC0 paddd xmm0, xmm0 XDIS 41: SSE SSE2 660FFEC9 paddd xmm1, xmm1 XDIS 45: SSE SSE2 660FFEC2 paddd xmm0, xmm2 XDIS 49: SSE SSE2 660FFECB paddd xmm1, xmm3 XDIS 4d: DATAXFER SSE2 F30F7E1477 movq xmm2, qword ptr [rdi+rsi*2] XDIS 52: DATAXFER SSE2 F30F7E5C7704 movq xmm3, qword ptr [rdi+rsi*2+0x4] XDIS 58: SSE SSE2 660F61D7 punpcklwd xmm2, xmm7 XDIS 5c: SSE SSE2 660F61DF punpcklwd xmm3, xmm7 XDIS 60: DATAXFER SSE2 660F6FE2 movdqa xmm4, xmm2 XDIS 64: DATAXFER SSE2 660F6FEB movdqa xmm5, xmm3 XDIS 68: SSE SSE2 660F70E44E pshufd xmm4, xmm4, 0x4e XDIS 6d: SSE SSE2 660F70ED4E pshufd xmm5, xmm5, 0x4e XDIS 72: SSE SSE2 660FFEE2 paddd xmm4, xmm2 XDIS 76: SSE SSE2 660FFEEB paddd xmm5, xmm3 XDIS 7a: SSE SSE2 660FFED2 paddd xmm2, xmm2 XDIS 7e: SSE SSE2 660FFEDB paddd xmm3, xmm3 XDIS 82: SSE SSE2 660FFED4 paddd xmm2, xmm4 XDIS 86: SSE SSE2 660FFEDD paddd xmm3, xmm5 XDIS 8a: DATAXFER SSE2 660F6FE0 movdqa xmm4, xmm0 XDIS 8e: DATAXFER SSE2 660F6FEA movdqa xmm5, xmm2 XDIS 92: SSE SSE2 660FFEE0 paddd xmm4, xmm0 XDIS 96: SSE SSE2 660FFEEE paddd xmm5, xmm6 XDIS 9a: SSE SSE2 660FFEE0 paddd xmm4, xmm0 XDIS 9e: SSE SSE2 660FFEE5 paddd xmm4, xmm5 XDIS a2: SSE SSE2 660F72D404 psrld xmm4, 0x4 XDIS a7: DATAXFER SSE2 660F6FEA movdqa xmm5, xmm2 XDIS ab: SSE SSE2 660FFEEA paddd xmm5, xmm2 XDIS af: SSE SSE2 660FFEC6 paddd xmm0, xmm6 XDIS b3: SSE SSE2 660FFEEA paddd xmm5, xmm2 XDIS b7: SSE SSE2 660FFEE8 paddd xmm5, xmm0 XDIS bb: SSE SSE2 660F72D504 psrld xmm5, 0x4 XDIS c0: DATAXFER SSE2 660F6FC1 movdqa xmm0, xmm1 XDIS c4: DATAXFER SSE2 660F6FD3 movdqa xmm2, xmm3 XDIS c8: SSE SSE2 660FFEC1 paddd xmm0, xmm1 XDIS cc: SSE SSE2 660FFED6 paddd xmm2, xmm6 XDIS d0: SSE SSE2 660FFEC1 paddd xmm0, xmm1 XDIS d4: SSE SSE2 660FFEC2 paddd xmm0, xmm2 XDIS d8: SSE SSE2 660F72D004 psrld xmm0, 0x4 XDIS dd: DATAXFER SSE2 660F6FD3 movdqa xmm2, xmm3 XDIS e1: SSE SSE2 660FFED3 paddd xmm2, xmm3 XDIS e5: SSE SSE2 660FFECE paddd xmm1, xmm6 XDIS e9: SSE SSE2 660FFED3 paddd xmm2, xmm3 XDIS ed: SSE SSE2 660FFED1 paddd xmm2, xmm1 XDIS f1: SSE SSE2 660F72D204 psrld xmm2, 0x4 XDIS f6: SSE SSE4 660F382BE0 packusdw xmm4, xmm0 XDIS fb: DATAXFER SSE2 F30F7F22 movdqu xmmword ptr [rdx], xmm4 XDIS ff: SSE SSE4 660F382BEA packusdw xmm5, xmm2 XDIS 104: DATAXFER SSE2 F30F7F2C4A movdqu xmmword ptr [rdx+rcx*2], xmm5 XDIS 109: MISC BASE 488D7F08 lea rdi, ptr [rdi+0x8] XDIS 10d: MISC BASE 488D5210 lea rdx, ptr [rdx+0x10] XDIS 111: BINARY BASE 4183E804 sub r8d, 0x4 XDIS 115: COND_BR BASE 0F8FF7FEFFFF jnle 0x12 <ScaleUVRowUp2_Bilinear_16_SSE2+0x12> XDIS 11b: RET BASE C3 ret Change-Id: Ia20860e9c3c45368822cfd8877167ff0bf973dcc Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3587602 Reviewed-by: richard winterton <rrwinterton@gmail.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
-rw-r--r--README.chromium2
-rw-r--r--include/libyuv/scale_row.h36
-rw-r--r--include/libyuv/version.h2
-rw-r--r--source/convert.cc12
-rw-r--r--source/convert_argb.cc24
-rw-r--r--source/convert_from.cc6
-rw-r--r--source/planar_functions.cc9
-rw-r--r--source/scale_any.cc12
-rw-r--r--source/scale_gcc.cc21
-rw-r--r--source/scale_uv.cc12
10 files changed, 72 insertions, 64 deletions
diff --git a/README.chromium b/README.chromium
index a96f0529..13fa8747 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1819
+Version: 1820
License: BSD
License File: LICENSE
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index 682b3342..cc1c9061 100644
--- a/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -86,8 +86,8 @@ extern "C" {
#define HAS_SCALEROWUP2BILINEAR_16_SSE2
#define HAS_SCALEUVROWUP2LINEAR_SSSE3
#define HAS_SCALEUVROWUP2BILINEAR_SSSE3
-#define HAS_SCALEUVROWUP2LINEAR_16_SSE2
-#define HAS_SCALEUVROWUP2BILINEAR_16_SSE2
+#define HAS_SCALEUVROWUP2LINEAR_16_SSE41
+#define HAS_SCALEUVROWUP2BILINEAR_16_SSE41
#endif
// The following are available for gcc/clang x86 platforms, but
@@ -1235,22 +1235,22 @@ void ScaleUVRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
-void ScaleUVRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
- uint16_t* dst_ptr,
- int dst_width);
-void ScaleUVRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst_ptr,
- ptrdiff_t dst_stride,
- int dst_width);
-void ScaleUVRowUp2_Linear_16_Any_SSE2(const uint16_t* src_ptr,
- uint16_t* dst_ptr,
- int dst_width);
-void ScaleUVRowUp2_Bilinear_16_Any_SSE2(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst_ptr,
- ptrdiff_t dst_stride,
- int dst_width);
+void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ int dst_width);
+void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst_ptr,
+ ptrdiff_t dst_stride,
+ int dst_width);
+void ScaleUVRowUp2_Linear_16_Any_SSE41(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ int dst_width);
+void ScaleUVRowUp2_Bilinear_16_Any_SSE41(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst_ptr,
+ ptrdiff_t dst_stride,
+ int dst_width);
void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 47580436..f42a46b9 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1819
+#define LIBYUV_VERSION 1820
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/source/convert.cc b/source/convert.cc
index 38f0a0a5..502f002d 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -83,7 +83,8 @@ int I420Copy(const uint8_t* src_y,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
- if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
return -1;
}
// Negative height means invert the image.
@@ -125,7 +126,8 @@ int I010Copy(const uint16_t* src_y,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
- if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
return -1;
}
// Negative height means invert the image.
@@ -169,7 +171,8 @@ static int Planar16bitTo8bit(const uint16_t* src_y,
int uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
int uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
int scale = 1 << (24 - depth);
- if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
return -1;
}
// Negative height means invert the image.
@@ -539,7 +542,8 @@ int I422ToI210(const uint8_t* src_y,
int width,
int height) {
int halfwidth = (width + 1) >> 1;
- if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
return -1;
}
// Negative height means invert the image.
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index 11cda078..942df30a 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -6647,9 +6647,9 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
}
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2
- if (TestCpuFlag(kCpuHasSSE2)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE2;
+#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
+ if (TestCpuFlag(kCpuHasSSE41)) {
+ Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
}
#endif
@@ -6737,9 +6737,9 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
}
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2
- if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE2;
+#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
+ if (TestCpuFlag(kCpuHasSSE41)) {
+ ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
}
#endif
@@ -6813,9 +6813,9 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
}
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2
- if (TestCpuFlag(kCpuHasSSE2)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE2;
+#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
+ if (TestCpuFlag(kCpuHasSSE41)) {
+ Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
}
#endif
@@ -6903,9 +6903,9 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
}
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2
- if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE2;
+#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
+ if (TestCpuFlag(kCpuHasSSE41)) {
+ ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
}
#endif
diff --git a/source/convert_from.cc b/source/convert_from.cc
index 932a32b8..8bd07e4c 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -85,7 +85,8 @@ int I420ToI010(const uint8_t* src_y,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
- if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
return -1;
}
// Negative height means invert the image.
@@ -129,7 +130,8 @@ int I420ToI012(const uint8_t* src_y,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
- if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
return -1;
}
// Negative height means invert the image.
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index a6979264..42fd9c51 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -240,7 +240,8 @@ int I422Copy(const uint8_t* src_y,
int height) {
int halfwidth = (width + 1) >> 1;
- if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
return -1;
}
@@ -279,7 +280,8 @@ int I444Copy(const uint8_t* src_y,
int dst_stride_v,
int width,
int height) {
- if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
return -1;
}
// Negative height means invert the image.
@@ -319,7 +321,8 @@ int I210Copy(const uint16_t* src_y,
int height) {
int halfwidth = (width + 1) >> 1;
- if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
return -1;
}
diff --git a/source/scale_any.cc b/source/scale_any.cc
index 0f6c345d..e820584b 100644
--- a/source/scale_any.cc
+++ b/source/scale_any.cc
@@ -924,9 +924,9 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2,
uint8_t)
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2
-SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE2,
- ScaleUVRowUp2_Linear_16_SSE2,
+#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
+SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41,
+ ScaleUVRowUp2_Linear_16_SSE41,
ScaleUVRowUp2_Linear_16_C,
3,
uint16_t)
@@ -1022,9 +1022,9 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2,
uint8_t)
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2
-SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE2,
- ScaleUVRowUp2_Bilinear_16_SSE2,
+#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
+SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41,
+ ScaleUVRowUp2_Bilinear_16_SSE41,
ScaleUVRowUp2_Bilinear_16_C,
7,
uint16_t)
diff --git a/source/scale_gcc.cc b/source/scale_gcc.cc
index 0ac65f35..d827c0e7 100644
--- a/source/scale_gcc.cc
+++ b/source/scale_gcc.cc
@@ -1285,7 +1285,6 @@ void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
"psrlw $2,%%xmm2 \n" // 3/4*near+1/4*far (hi)
"packuswb %%xmm2,%%xmm0 \n"
"movdqu %%xmm0,(%1) \n"
-
"lea 0x8(%0),%0 \n"
"lea 0x10(%1),%1 \n" // 8 sample to 16 sample
"sub $0x10,%2 \n"
@@ -2666,10 +2665,10 @@ void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
}
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2
-void ScaleUVRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
- uint16_t* dst_ptr,
- int dst_width) {
+#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
+void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ int dst_width) {
asm volatile(
"pxor %%xmm5,%%xmm5 \n"
"pcmpeqd %%xmm4,%%xmm4 \n"
@@ -2716,12 +2715,12 @@ void ScaleUVRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
}
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2
-void ScaleUVRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst_ptr,
- ptrdiff_t dst_stride,
- int dst_width) {
+#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
+void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst_ptr,
+ ptrdiff_t dst_stride,
+ int dst_width) {
asm volatile(
"pxor %%xmm7,%%xmm7 \n"
"pcmpeqd %%xmm6,%%xmm6 \n"
diff --git a/source/scale_uv.cc b/source/scale_uv.cc
index 67cc26b8..f4b56427 100644
--- a/source/scale_uv.cc
+++ b/source/scale_uv.cc
@@ -747,9 +747,9 @@ void ScaleUVLinearUp2_16(int src_width,
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
-#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2
- if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE2;
+#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
+ if (TestCpuFlag(kCpuHasSSE41)) {
+ ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
}
#endif
@@ -800,9 +800,9 @@ void ScaleUVBilinearUp2_16(int src_width,
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
-#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2
- if (TestCpuFlag(kCpuHasSSE2)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE2;
+#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
+ if (TestCpuFlag(kCpuHasSSE41)) {
+ Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
}
#endif