diff options
author | Frank Barchard <fbarchard@google.com> | 2022-04-15 11:21:25 -0700 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-04-15 18:46:09 +0000 |
commit | eec8dd37e827a78c3bdbb66da6caad89f4b8c4dd (patch) | |
tree | 0351a558844b5c62f4065df29f2180a83b307d26 /source/convert_argb.cc | |
parent | 18f91105162a6ebe7a46ee1c81e9ab67ca97a02b (diff) | |
download | libyuv-eec8dd37e827a78c3bdbb66da6caad89f4b8c4dd.tar.gz |
Change ScaleUVRowUp2_Biinear_16_SSE2 to SSE41
Bug: libyuv:928
xed -i scale_gcc.o:
SYM ScaleUVRowUp2_Linear_16_SSE2:
XDIS 0: LOGICAL SSE2 660FEFED pxor xmm5, xmm5
XDIS 4: SSE SSE2 660F76E4 pcmpeqd xmm4, xmm4
XDIS 8: SSE SSE2 660F72D41F psrld xmm4, 0x1f
XDIS d: SSE SSE2 660F72F401 pslld xmm4, 0x1
XDIS 12: DATAXFER SSE2 F30F7E07 movq xmm0, qword ptr [rdi]
XDIS 16: DATAXFER SSE2 F30F7E4F04 movq xmm1, qword ptr [rdi+0x4]
XDIS 1b: SSE SSE2 660F61C5 punpcklwd xmm0, xmm5
XDIS 1f: SSE SSE2 660F61CD punpcklwd xmm1, xmm5
XDIS 23: DATAXFER SSE2 660F6FD0 movdqa xmm2, xmm0
XDIS 27: DATAXFER SSE2 660F6FD9 movdqa xmm3, xmm1
XDIS 2b: SSE SSE2 660F70D24E pshufd xmm2, xmm2, 0x4e
XDIS 30: SSE SSE2 660F70DB4E pshufd xmm3, xmm3, 0x4e
XDIS 35: SSE SSE2 660FFED4 paddd xmm2, xmm4
XDIS 39: SSE SSE2 660FFEDC paddd xmm3, xmm4
XDIS 3d: SSE SSE2 660FFED0 paddd xmm2, xmm0
XDIS 41: SSE SSE2 660FFED9 paddd xmm3, xmm1
XDIS 45: SSE SSE2 660FFEC0 paddd xmm0, xmm0
XDIS 49: SSE SSE2 660FFEC9 paddd xmm1, xmm1
XDIS 4d: SSE SSE2 660FFEC2 paddd xmm0, xmm2
XDIS 51: SSE SSE2 660FFECB paddd xmm1, xmm3
XDIS 55: SSE SSE2 660F72D002 psrld xmm0, 0x2
XDIS 5a: SSE SSE2 660F72D102 psrld xmm1, 0x2
XDIS 5f: SSE SSE4 660F382BC1 packusdw xmm0, xmm1
XDIS 64: DATAXFER SSE2 F30F7F06 movdqu xmmword ptr [rsi], xmm0
XDIS 68: MISC BASE 488D7F08 lea rdi, ptr [rdi+0x8]
XDIS 6c: MISC BASE 488D7610 lea rsi, ptr [rsi+0x10]
XDIS 70: BINARY BASE 83EA04 sub edx, 0x4
XDIS 73: COND_BR BASE 7F9D jnle 0x12 <ScaleUVRowUp2_Linear_16_SSE2+0x12>
XDIS 75: RET BASE C3 ret
SYM ScaleUVRowUp2_Bilinear_16_SSE2:
XDIS 0: LOGICAL SSE2 660FEFFF pxor xmm7, xmm7
XDIS 4: SSE SSE2 660F76F6 pcmpeqd xmm6, xmm6
XDIS 8: SSE SSE2 660F72D61F psrld xmm6, 0x1f
XDIS d: SSE SSE2 660F72F603 pslld xmm6, 0x3
XDIS 12: DATAXFER SSE2 F30F7E07 movq xmm0, qword ptr [rdi]
XDIS 16: DATAXFER SSE2 F30F7E4F04 movq xmm1, qword ptr [rdi+0x4]
XDIS 1b: SSE SSE2 660F61C7 punpcklwd xmm0, xmm7
XDIS 1f: SSE SSE2 660F61CF punpcklwd xmm1, xmm7
XDIS 23: DATAXFER SSE2 660F6FD0 movdqa xmm2, xmm0
XDIS 27: DATAXFER SSE2 660F6FD9 movdqa xmm3, xmm1
XDIS 2b: SSE SSE2 660F70D24E pshufd xmm2, xmm2, 0x4e
XDIS 30: SSE SSE2 660F70DB4E pshufd xmm3, xmm3, 0x4e
XDIS 35: SSE SSE2 660FFED0 paddd xmm2, xmm0
XDIS 39: SSE SSE2 660FFED9 paddd xmm3, xmm1
XDIS 3d: SSE SSE2 660FFEC0 paddd xmm0, xmm0
XDIS 41: SSE SSE2 660FFEC9 paddd xmm1, xmm1
XDIS 45: SSE SSE2 660FFEC2 paddd xmm0, xmm2
XDIS 49: SSE SSE2 660FFECB paddd xmm1, xmm3
XDIS 4d: DATAXFER SSE2 F30F7E1477 movq xmm2, qword ptr [rdi+rsi*2]
XDIS 52: DATAXFER SSE2 F30F7E5C7704 movq xmm3, qword ptr [rdi+rsi*2+0x4]
XDIS 58: SSE SSE2 660F61D7 punpcklwd xmm2, xmm7
XDIS 5c: SSE SSE2 660F61DF punpcklwd xmm3, xmm7
XDIS 60: DATAXFER SSE2 660F6FE2 movdqa xmm4, xmm2
XDIS 64: DATAXFER SSE2 660F6FEB movdqa xmm5, xmm3
XDIS 68: SSE SSE2 660F70E44E pshufd xmm4, xmm4, 0x4e
XDIS 6d: SSE SSE2 660F70ED4E pshufd xmm5, xmm5, 0x4e
XDIS 72: SSE SSE2 660FFEE2 paddd xmm4, xmm2
XDIS 76: SSE SSE2 660FFEEB paddd xmm5, xmm3
XDIS 7a: SSE SSE2 660FFED2 paddd xmm2, xmm2
XDIS 7e: SSE SSE2 660FFEDB paddd xmm3, xmm3
XDIS 82: SSE SSE2 660FFED4 paddd xmm2, xmm4
XDIS 86: SSE SSE2 660FFEDD paddd xmm3, xmm5
XDIS 8a: DATAXFER SSE2 660F6FE0 movdqa xmm4, xmm0
XDIS 8e: DATAXFER SSE2 660F6FEA movdqa xmm5, xmm2
XDIS 92: SSE SSE2 660FFEE0 paddd xmm4, xmm0
XDIS 96: SSE SSE2 660FFEEE paddd xmm5, xmm6
XDIS 9a: SSE SSE2 660FFEE0 paddd xmm4, xmm0
XDIS 9e: SSE SSE2 660FFEE5 paddd xmm4, xmm5
XDIS a2: SSE SSE2 660F72D404 psrld xmm4, 0x4
XDIS a7: DATAXFER SSE2 660F6FEA movdqa xmm5, xmm2
XDIS ab: SSE SSE2 660FFEEA paddd xmm5, xmm2
XDIS af: SSE SSE2 660FFEC6 paddd xmm0, xmm6
XDIS b3: SSE SSE2 660FFEEA paddd xmm5, xmm2
XDIS b7: SSE SSE2 660FFEE8 paddd xmm5, xmm0
XDIS bb: SSE SSE2 660F72D504 psrld xmm5, 0x4
XDIS c0: DATAXFER SSE2 660F6FC1 movdqa xmm0, xmm1
XDIS c4: DATAXFER SSE2 660F6FD3 movdqa xmm2, xmm3
XDIS c8: SSE SSE2 660FFEC1 paddd xmm0, xmm1
XDIS cc: SSE SSE2 660FFED6 paddd xmm2, xmm6
XDIS d0: SSE SSE2 660FFEC1 paddd xmm0, xmm1
XDIS d4: SSE SSE2 660FFEC2 paddd xmm0, xmm2
XDIS d8: SSE SSE2 660F72D004 psrld xmm0, 0x4
XDIS dd: DATAXFER SSE2 660F6FD3 movdqa xmm2, xmm3
XDIS e1: SSE SSE2 660FFED3 paddd xmm2, xmm3
XDIS e5: SSE SSE2 660FFECE paddd xmm1, xmm6
XDIS e9: SSE SSE2 660FFED3 paddd xmm2, xmm3
XDIS ed: SSE SSE2 660FFED1 paddd xmm2, xmm1
XDIS f1: SSE SSE2 660F72D204 psrld xmm2, 0x4
XDIS f6: SSE SSE4 660F382BE0 packusdw xmm4, xmm0
XDIS fb: DATAXFER SSE2 F30F7F22 movdqu xmmword ptr [rdx], xmm4
XDIS ff: SSE SSE4 660F382BEA packusdw xmm5, xmm2
XDIS 104: DATAXFER SSE2 F30F7F2C4A movdqu xmmword ptr [rdx+rcx*2], xmm5
XDIS 109: MISC BASE 488D7F08 lea rdi, ptr [rdi+0x8]
XDIS 10d: MISC BASE 488D5210 lea rdx, ptr [rdx+0x10]
XDIS 111: BINARY BASE 4183E804 sub r8d, 0x4
XDIS 115: COND_BR BASE 0F8FF7FEFFFF jnle 0x12 <ScaleUVRowUp2_Bilinear_16_SSE2+0x12>
XDIS 11b: RET BASE C3 ret
Change-Id: Ia20860e9c3c45368822cfd8877167ff0bf973dcc
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3587602
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/convert_argb.cc')
-rw-r--r-- | source/convert_argb.cc | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 11cda078..942df30a 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -6647,9 +6647,9 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2 - if (TestCpuFlag(kCpuHasSSE2)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE2; +#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41 + if (TestCpuFlag(kCpuHasSSE41)) { + Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; } #endif @@ -6737,9 +6737,9 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2 - if (TestCpuFlag(kCpuHasSSE2)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE2; +#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41 + if (TestCpuFlag(kCpuHasSSE41)) { + ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; } #endif @@ -6813,9 +6813,9 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2 - if (TestCpuFlag(kCpuHasSSE2)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE2; +#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41 + if (TestCpuFlag(kCpuHasSSE41)) { + Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; } #endif @@ -6903,9 +6903,9 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2 - if (TestCpuFlag(kCpuHasSSE2)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE2; +#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41 + if (TestCpuFlag(kCpuHasSSE41)) { + ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; } #endif |