aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/libyuv/scale_row.h36
-rw-r--r--source/scale.cc46
-rw-r--r--source/scale_any.cc22
-rw-r--r--source/scale_common.cc44
-rw-r--r--source/scale_win.cc107
-rw-r--r--unit_test/convert_test.cc30
6 files changed, 118 insertions, 167 deletions
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index b78a56bc..23b2471f 100644
--- a/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -30,13 +30,11 @@ extern "C" {
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
-
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_FIXEDDIV1_X86
#define HAS_FIXEDDIV_X86
-#define HAS_SCALEADDROWS_SSE2
#define HAS_SCALEARGBCOLS_SSE2
#define HAS_SCALEARGBCOLSUP2_SSE2
#define HAS_SCALEARGBFILTERCOLS_SSSE3
@@ -50,17 +48,21 @@ extern "C" {
#define HAS_SCALEROWDOWN4_SSE2
#endif
-// The following are available on VS2012.
+// The following are available on VS2012:
#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
-#define HAS_SCALEADDROWS_AVX2
+#define HAS_SCALEADDROW_AVX2
#define HAS_SCALEROWDOWN2_AVX2
#define HAS_SCALEROWDOWN4_AVX2
#endif
+// The following are available on Visual C:
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && !defined(__clang__)
+#define HAS_SCALEADDROW_SSE2
+#endif
+
// The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
-#define HAS_SCALEADDROWS_NEON
#define HAS_SCALEARGBCOLS_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
@@ -183,10 +185,8 @@ void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width);
-void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height);
-void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint32* dst_ptr, int src_width, int src_height);
+void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width);
void ScaleARGBRowDown2_C(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
@@ -289,14 +289,10 @@ void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height);
-void ScaleAddRows_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height);
-void ScaleAddRows_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height);
-void ScaleAddRows_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height);
+void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
@@ -442,10 +438,8 @@ void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
-void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height);
-void ScaleAddRows_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height);
+void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
diff --git a/source/scale.cc b/source/scale.cc
index 5460cc7e..0a01304c 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -733,7 +733,7 @@ static void ScalePlaneBox(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr) {
- int j;
+ int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -750,29 +750,29 @@ static void ScalePlaneBox(int src_width, int src_height,
const uint16* src_ptr, uint8* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_C:
((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
- void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
-#if defined(HAS_SCALEADDROWS_SSE2)
+ void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
+ ScaleAddRow_C;
+#if defined(HAS_SCALEADDROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleAddRows = ScaleAddRows_Any_SSE2;
+ ScaleAddRow = ScaleAddRow_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
- ScaleAddRows = ScaleAddRows_SSE2;
+ ScaleAddRow = ScaleAddRow_SSE2;
}
}
#endif
-#if defined(HAS_SCALEADDROWS_AVX2)
+#if defined(HAS_SCALEADDROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleAddRows = ScaleAddRows_Any_AVX2;
+ ScaleAddRow = ScaleAddRow_Any_AVX2;
if (IS_ALIGNED(src_width, 32)) {
- ScaleAddRows = ScaleAddRows_AVX2;
+ ScaleAddRow = ScaleAddRow_AVX2;
}
}
#endif
-#if defined(HAS_SCALEADDROWS_NEON)
+#if defined(HAS_SCALEADDROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleAddRows = ScaleAddRows_Any_NEON;
+ ScaleAddRow = ScaleAddRow_Any_NEON;
if (IS_ALIGNED(src_width, 16)) {
- ScaleAddRows = ScaleAddRows_NEON;
+ ScaleAddRow = ScaleAddRow_NEON;
}
}
#endif
@@ -786,7 +786,11 @@ static void ScalePlaneBox(int src_width, int src_height,
y = max_y;
}
boxheight = MIN1((y >> 16) - iy);
- ScaleAddRows(src, src_stride, (uint16*)(row16), src_width, boxheight);
+ memset(row16, 0, src_width * 2);
+ for (k = 0; k < boxheight; ++k) {
+ ScaleAddRow(src, (uint16 *)(row16), src_width);
+ src += src_stride;
+ }
ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
dst_ptr += dst_stride;
}
@@ -798,7 +802,7 @@ static void ScalePlaneBox_16(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr) {
- int j;
+ int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -814,12 +818,12 @@ static void ScalePlaneBox_16(int src_width, int src_height,
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
- void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;
+ void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
+ ScaleAddRow_16_C;
-#if defined(HAS_SCALEADDROWS_16_SSE2)
+#if defined(HAS_SCALEADDROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
- ScaleAddRows = ScaleAddRows_16_SSE2;
+ ScaleAddRow = ScaleAddRow_16_SSE2;
}
#endif
@@ -832,7 +836,11 @@ static void ScalePlaneBox_16(int src_width, int src_height,
y = max_y;
}
boxheight = MIN1((y >> 16) - iy);
- ScaleAddRows(src, src_stride, (uint32*)(row32), src_width, boxheight);
+ memset(row32, 0, src_width * 4);
+ for (k = 0; k < boxheight; ++k) {
+ ScaleAddRow(src, (uint32 *)(row32), src_width);
+ src += src_stride;
+ }
ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
dst_ptr += dst_stride;
}
diff --git a/source/scale_any.cc b/source/scale_any.cc
index b3f2ecf2..2f6a2c8b 100644
--- a/source/scale_any.cc
+++ b/source/scale_any.cc
@@ -169,25 +169,23 @@ SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON,
#endif
// Add rows box filter scale down.
-#define SAANY(NAMEANY, SCALEADDROWS_SIMD, SCALEADDROWS_C, MASK) \
- void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
- uint16* dst_ptr, int src_width, int src_height) { \
+#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
+ void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \
int n = src_width & ~MASK; \
if (n > 0) { \
- SCALEADDROWS_SIMD(src_ptr, src_stride, dst_ptr, n, src_height); \
+ SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
} \
- SCALEADDROWS_C(src_ptr + n, src_stride, \
- dst_ptr + n, src_width & MASK, src_height); \
+ SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
}
-#ifdef HAS_SCALEADDROWS_SSE2
-SAANY(ScaleAddRows_Any_SSE2, ScaleAddRows_SSE2, ScaleAddRows_C, 15)
+#ifdef HAS_SCALEADDROW_SSE2
+SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
#endif
-#ifdef HAS_SCALEADDROWS_AVX2
-SAANY(ScaleAddRows_Any_AVX2, ScaleAddRows_AVX2, ScaleAddRows_C, 31)
+#ifdef HAS_SCALEADDROW_AVX2
+SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
#endif
-#ifdef HAS_SCALEADDROWS_NEON
-SAANY(ScaleAddRows_Any_NEON, ScaleAddRows_NEON, ScaleAddRows_C, 15)
+#ifdef HAS_SCALEADDROW_NEON
+SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
#endif
#undef SAANY
diff --git a/source/scale_common.cc b/source/scale_common.cc
index 014d9566..1711f3d5 100644
--- a/source/scale_common.cc
+++ b/source/scale_common.cc
@@ -621,39 +621,31 @@ void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
}
}
-void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) {
+void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
int x;
assert(src_width > 0);
- assert(src_height > 0);
- for (x = 0; x < src_width; ++x) {
- const uint8* s = src_ptr + x;
- unsigned int sum = 0u;
- int y;
- for (y = 0; y < src_height; ++y) {
- sum += s[0];
- s += src_stride;
- }
- // TODO(fbarchard): Consider limiting height to 256 to avoid overflow.
- dst_ptr[x] = sum < 65535u ? sum : 65535u;
+ for (x = 0; x < src_width - 1; x += 2) {
+ dst_ptr[0] += src_ptr[0];
+ dst_ptr[1] += src_ptr[1];
+ src_ptr += 2;
+ dst_ptr += 2;
+ }
+ if (src_width & 1) {
+ dst_ptr[0] += src_ptr[0];
}
}
-void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint32* dst_ptr, int src_width, int src_height) {
+void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
int x;
assert(src_width > 0);
- assert(src_height > 0);
- for (x = 0; x < src_width; ++x) {
- const uint16* s = src_ptr + x;
- unsigned int sum = 0u;
- int y;
- for (y = 0; y < src_height; ++y) {
- sum += s[0];
- s += src_stride;
- }
- // No risk of overflow here now
- dst_ptr[x] = sum;
+ for (x = 0; x < src_width - 1; x += 2) {
+ dst_ptr[0] += src_ptr[0];
+ dst_ptr[1] += src_ptr[1];
+ src_ptr += 2;
+ dst_ptr += 2;
+ }
+ if (src_width & 1) {
+ dst_ptr[0] += src_ptr[0];
}
}
diff --git a/source/scale_win.cc b/source/scale_win.cc
index 4246f717..01a81635 100644
--- a/source/scale_win.cc
+++ b/source/scale_win.cc
@@ -800,104 +800,61 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
}
}
-// Reads 16xN bytes and produces 16 shorts at a time.
+// Reads 16 bytes and accumulates to 16 shorts at a time.
__declspec(naked)
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) {
+void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
__asm {
- push esi
- push edi
- push ebx
- push ebp
- mov esi, [esp + 16 + 4] // src_ptr
- mov edx, [esp + 16 + 8] // src_stride
- mov edi, [esp + 16 + 12] // dst_ptr
- mov ecx, [esp + 16 + 16] // dst_width
- mov ebx, [esp + 16 + 20] // height
- mov eax, esi // row pointer
- mov ebp, ebx // height
- pxor xmm0, xmm0 // clear accumulators
- pxor xmm1, xmm1
- pxor xmm4, xmm4
+ mov eax, [esp + 4] // src_ptr
+ mov edx, [esp + 8] // dst_ptr
+ mov ecx, [esp + 12] // src_width
+ pxor xmm5, xmm5
// sum rows
xloop:
- movdqu xmm2, [eax] // read 16 pixels
- lea eax, [eax + edx] // advance to next row
- movdqa xmm3, xmm2
- punpcklbw xmm2, xmm4
- punpckhbw xmm3, xmm4
+ movdqu xmm3, [eax] // read 16 bytes
+ lea eax, [eax + 16]
+ movdqu xmm0, [edx] // read 16 words from destination
+ movdqu xmm1, [edx + 16]
+ movdqa xmm2, xmm3
+ punpcklbw xmm2, xmm5
+ punpckhbw xmm3, xmm5
paddusw xmm0, xmm2 // sum 16 words
paddusw xmm1, xmm3
- sub ebp, 1
- jg xloop
-
- movdqu [edi], xmm0
- movdqu [edi + 16], xmm1
- lea edi, [edi + 32] // dst_ptr += 16
- lea esi, [esi + 16] // src_ptr += 16
- mov eax, esi // row pointer
- mov ebp, ebx // height
- pxor xmm0, xmm0 // clear accumulators
- pxor xmm1, xmm1
+ movdqu [edx], xmm0 // write 16 words to destination
+ movdqu [edx + 16], xmm1
+ lea edx, [edx + 32]
sub ecx, 16
jg xloop
-
- pop ebp
- pop ebx
- pop edi
- pop esi
ret
}
}
-// Reads 32xN bytes and produces 32 shorts at a time.
+// Reads 32 bytes and accumulates to 32 shorts at a time.
__declspec(naked)
-void ScaleAddRows_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) {
+void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
__asm {
- push esi
- push edi
- push ebx
- push ebp
- mov esi, [esp + 16 + 4] // src_ptr
- mov edx, [esp + 16 + 8] // src_stride
- mov edi, [esp + 16 + 12] // dst_ptr
- mov ecx, [esp + 16 + 16] // dst_width
- mov ebx, [esp + 16 + 20] // height
- mov eax, esi // row pointer
- mov ebp, ebx // height
- vpxor ymm0, ymm0, ymm0 // clear accumulators
- vpxor ymm1, ymm1, ymm1
- vpxor ymm4, ymm4, ymm4
+ mov eax, [esp + 4] // src_ptr
+ mov edx, [esp + 8] // dst_ptr
+ mov ecx, [esp + 12] // src_width
+ vpxor ymm5, ymm5, ymm5
// sum rows
xloop:
- vmovdqu ymm2, [eax] // read 16 pixels
- vpermq ymm2, ymm2, 0xd8 // unmutate for vpunpck
- lea eax, [eax + edx] // advance to next row
- vpunpckhbw ymm3, ymm2, ymm4
- vpunpcklbw ymm2, ymm2, ymm4
+ vmovdqu ymm3, [eax] // read 32 bytes
+ vpermq ymm3, ymm2, 0xd8 // unmutate for vpunpck
+ lea eax, [eax + 32]
+ vmovdqu ymm0, [edx] // read 32 words from destination
+ vmovdqu ymm1, [edx + 32]
+ vpunpcklbw ymm2, ymm3, ymm5
+ vpunpckhbw ymm3, ymm3, ymm5
vpaddusw ymm0, ymm0, ymm2 // sum 16 words
vpaddusw ymm1, ymm1, ymm3
- sub ebp, 1
- jg xloop
-
- vmovdqu [edi], ymm0
- vmovdqu [edi + 32], ymm1
- lea edi, [edi + 64] // dst_ptr
- lea esi, [esi + 32] // src_ptr
- mov eax, esi // row pointer
- mov ebp, ebx // height
- vpxor ymm0, ymm0, ymm0 // clear accumulators
- vpxor ymm1, ymm1, ymm1
+ vmovdqu [edx], ymm0 // write 32 words to destination
+ vmovdqu [edx + 32], ymm1
+ lea edx, [edx + 64]
sub ecx, 32
jg xloop
- pop ebp
- pop ebx
- pop edi
- pop esi
vzeroupper
ret
}
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index cfffcf8b..54822f06 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -78,7 +78,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_v_opt, 103, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \
src_u + OFF, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
@@ -211,7 +211,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
memset(dst_y_opt, 101, kWidth * kHeight); \
memset(dst_uv_opt, 102, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \
src_u + OFF, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
@@ -326,7 +326,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_v_opt, 103, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \
src_uv + OFF, \
2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
@@ -435,7 +435,7 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
} \
memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
@@ -538,7 +538,7 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
} \
memset(dst_argb_c, 1, kStrideB * kHeight); \
memset(dst_argb_opt, 101, kStrideB * kHeight); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \
dst_argb_c, kWidth * BPP_B, \
@@ -632,7 +632,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kStride; ++j) \
src_argb[(i * kStride) + j + OFF] = (random() & 0xff); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
dst_y_c, kWidth, \
dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), \
@@ -690,6 +690,8 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
#if defined(__arm__) || defined (__aarch64__)
+// arm version subsamples by summing 4 pixels then multiplying by matrix with
+// 4x smaller coefficients which are rounded to nearest integer.
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 4)
#else
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0)
@@ -738,7 +740,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
memset(dst_y_opt, 101, kWidth * kHeight); \
memset(dst_uv_opt, 102, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
dst_y_c, kWidth, \
dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \
@@ -814,7 +816,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \
} \
memset(dst_argb_c, 1, kStrideB * kHeightB); \
memset(dst_argb_opt, 101, kStrideB * kHeightB); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B(src_argb + OFF, kStrideA, \
dst_argb_c, kStrideB, \
kWidth, NEG kHeight); \
@@ -858,7 +860,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) { \
} \
memset(dst_argb_c, 123, kStrideB * kHeightB); \
memset(dst_argb_opt, 123, kStrideB * kHeightB); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B(src_argb, kStrideA, \
dst_argb_c, kStrideB, \
kWidth, kHeight); \
@@ -948,7 +950,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##Dither##N) { \
} \
memset(dst_argb_c, 1, kStrideB * kHeightB); \
memset(dst_argb_opt, 101, kStrideB * kHeightB); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, \
dst_argb_c, kStrideB, \
NULL, kWidth, NEG kHeight); \
@@ -992,7 +994,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##Dither_Random) { \
} \
memset(dst_argb_c, 123, kStrideB * kHeightB); \
memset(dst_argb_opt, 123, kStrideB * kHeightB); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B##Dither(src_argb, kStrideA, \
dst_argb_c, kStrideB, \
NULL, kWidth, kHeight); \
@@ -1051,7 +1053,7 @@ TEST_F(libyuvTest, FMT_ATOB##_Symetric##N) { \
} \
memset(dst_argb_c, 1, kStrideA * kHeightA); \
memset(dst_argb_opt, 101, kStrideA * kHeightA); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
FMT_ATOB(src_argb + OFF, kStrideA, \
dst_argb_c, kStrideA, \
kWidth, NEG kHeight); \
@@ -1061,7 +1063,7 @@ TEST_F(libyuvTest, FMT_ATOB##_Symetric##N) { \
dst_argb_opt, kStrideA, \
kWidth, NEG kHeight); \
} \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
FMT_ATOB(dst_argb_c, kStrideA, \
dst_argb_c, kStrideA, \
kWidth, NEG kHeight); \
@@ -1470,7 +1472,7 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##Dither##N) { \
} \
memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
- MaskCpuFlags(disable_cpu_flags_); \
+ MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \