aboutsummaryrefslogtreecommitdiff
path: root/files/source/scale_common.cc
diff options
context:
space:
mode:
Diffstat (limited to 'files/source/scale_common.cc')
-rw-r--r--files/source/scale_common.cc555
1 files changed, 528 insertions, 27 deletions
diff --git a/files/source/scale_common.cc b/files/source/scale_common.cc
index 63690271..b02bdafd 100644
--- a/files/source/scale_common.cc
+++ b/files/source/scale_common.cc
@@ -400,6 +400,95 @@ void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
}
}
+// Sample position: (O is src sample position, X is dst sample position)
+//
+// v dst_ptr at here v stop at here
+// X O X X O X X O X X O X X O X
+// ^ src_ptr at here
+void ScaleRowUp2_Linear_C(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ int src_width = dst_width >> 1;
+ int x;
+ assert((dst_width % 2 == 0) && (dst_width >= 0));
+ for (x = 0; x < src_width; ++x) {
+ dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
+ dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
+ }
+}
+
+// Sample position: (O is src sample position, X is dst sample position)
+//
+// src_ptr at here
+// X v X X X X X X X X X
+// O O O O O
+// X X X X X X X X X X
+// ^ dst_ptr at here ^ stop at here
+// X X X X X X X X X X
+// O O O O O
+// X X X X X X X X X X
+void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ ptrdiff_t dst_stride,
+ int dst_width) {
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ uint8_t* d = dst_ptr;
+ uint8_t* e = dst_ptr + dst_stride;
+ int src_width = dst_width >> 1;
+ int x;
+ assert((dst_width % 2 == 0) && (dst_width >= 0));
+ for (x = 0; x < src_width; ++x) {
+ d[2 * x + 0] =
+ (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
+ d[2 * x + 1] =
+ (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
+ e[2 * x + 0] =
+ (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
+ e[2 * x + 1] =
+ (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
+ }
+}
+
+// Only suitable for at most 14 bit range.
+void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ int dst_width) {
+ int src_width = dst_width >> 1;
+ int x;
+ assert((dst_width % 2 == 0) && (dst_width >= 0));
+ for (x = 0; x < src_width; ++x) {
+ dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
+ dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
+ }
+}
+
+// Only suitable for at most 12bit range.
+void ScaleRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst_ptr,
+ ptrdiff_t dst_stride,
+ int dst_width) {
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
+ uint16_t* d = dst_ptr;
+ uint16_t* e = dst_ptr + dst_stride;
+ int src_width = dst_width >> 1;
+ int x;
+ assert((dst_width % 2 == 0) && (dst_width >= 0));
+ for (x = 0; x < src_width; ++x) {
+ d[2 * x + 0] =
+ (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
+ d[2 * x + 1] =
+ (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
+ e[2 * x + 0] =
+ (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
+ e[2 * x + 1] =
+ (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
+ }
+}
+
// Scales a single row of pixels using point sampling.
void ScaleCols_C(uint8_t* dst_ptr,
const uint8_t* src_ptr,
@@ -677,18 +766,18 @@ void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
(src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
- (65536 / 9) >>
+ (65536u / 9u) >>
16;
dst_ptr[1] =
(src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
- (65536 / 9) >>
+ (65536u / 9u) >>
16;
dst_ptr[2] =
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
- (65536 / 6) >>
+ (65536u / 6u) >>
16;
src_ptr += 8;
dst_ptr += 3;
@@ -731,15 +820,15 @@ void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
for (i = 0; i < dst_width; i += 3) {
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
src_ptr[stride + 1] + src_ptr[stride + 2]) *
- (65536 / 6) >>
+ (65536u / 6u) >>
16;
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
src_ptr[stride + 4] + src_ptr[stride + 5]) *
- (65536 / 6) >>
+ (65536u / 6u) >>
16;
dst_ptr[2] =
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
- (65536 / 4) >>
+ (65536u / 4u) >>
16;
src_ptr += 8;
dst_ptr += 3;
@@ -776,6 +865,8 @@ void ScaleAddRow_16_C(const uint16_t* src_ptr,
}
}
+// ARGB scale row functions
+
void ScaleARGBRowDown2_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
@@ -1018,6 +1109,351 @@ void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
#undef BLENDERC
#undef BLENDER
+// UV scale row functions
+// same as ARGB but 2 channels
+
+void ScaleUVRowDown2_C(const uint8_t* src_uv,
+ ptrdiff_t src_stride,
+ uint8_t* dst_uv,
+ int dst_width) {
+ const uint16_t* src = (const uint16_t*)(src_uv);
+ uint16_t* dst = (uint16_t*)(dst_uv);
+ int x;
+ (void)src_stride;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = src[1];
+ dst[1] = src[3];
+ src += 2;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ dst[0] = src[1];
+ }
+}
+
+void ScaleUVRowDown2Linear_C(const uint8_t* src_uv,
+ ptrdiff_t src_stride,
+ uint8_t* dst_uv,
+ int dst_width) {
+ int x;
+ (void)src_stride;
+ for (x = 0; x < dst_width; ++x) {
+ dst_uv[0] = (src_uv[0] + src_uv[2] + 1) >> 1;
+ dst_uv[1] = (src_uv[1] + src_uv[3] + 1) >> 1;
+ src_uv += 4;
+ dst_uv += 2;
+ }
+}
+
+void ScaleUVRowDown2Box_C(const uint8_t* src_uv,
+ ptrdiff_t src_stride,
+ uint8_t* dst_uv,
+ int dst_width) {
+ int x;
+ for (x = 0; x < dst_width; ++x) {
+ dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
+ src_uv[src_stride + 2] + 2) >>
+ 2;
+ dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
+ src_uv[src_stride + 3] + 2) >>
+ 2;
+ src_uv += 4;
+ dst_uv += 2;
+ }
+}
+
+void ScaleUVRowDownEven_C(const uint8_t* src_uv,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_uv,
+ int dst_width) {
+ const uint16_t* src = (const uint16_t*)(src_uv);
+ uint16_t* dst = (uint16_t*)(dst_uv);
+ (void)src_stride;
+ int x;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = src[0];
+ dst[1] = src[src_stepx];
+ src += src_stepx * 2;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ dst[0] = src[0];
+ }
+}
+
+void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_uv,
+ int dst_width) {
+ int x;
+ for (x = 0; x < dst_width; ++x) {
+ dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
+ src_uv[src_stride + 2] + 2) >>
+ 2;
+ dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
+ src_uv[src_stride + 3] + 2) >>
+ 2;
+ src_uv += src_stepx * 2;
+ dst_uv += 2;
+ }
+}
+
+void ScaleUVRowUp2_Linear_C(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ int src_width = dst_width >> 1;
+ int x;
+ assert((dst_width % 2 == 0) && (dst_width >= 0));
+ for (x = 0; x < src_width; ++x) {
+ dst_ptr[4 * x + 0] =
+ (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
+ dst_ptr[4 * x + 1] =
+ (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
+ dst_ptr[4 * x + 2] =
+ (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
+ dst_ptr[4 * x + 3] =
+ (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
+ }
+}
+
+void ScaleUVRowUp2_Bilinear_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ ptrdiff_t dst_stride,
+ int dst_width) {
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ uint8_t* d = dst_ptr;
+ uint8_t* e = dst_ptr + dst_stride;
+ int src_width = dst_width >> 1;
+ int x;
+ assert((dst_width % 2 == 0) && (dst_width >= 0));
+ for (x = 0; x < src_width; ++x) {
+ d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
+ t[2 * x + 2] * 1 + 8) >>
+ 4;
+ d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
+ t[2 * x + 3] * 1 + 8) >>
+ 4;
+ d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
+ t[2 * x + 2] * 3 + 8) >>
+ 4;
+ d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
+ t[2 * x + 3] * 3 + 8) >>
+ 4;
+ e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
+ t[2 * x + 2] * 3 + 8) >>
+ 4;
+ e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
+ t[2 * x + 3] * 3 + 8) >>
+ 4;
+ e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
+ t[2 * x + 2] * 9 + 8) >>
+ 4;
+ e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
+ t[2 * x + 3] * 9 + 8) >>
+ 4;
+ }
+}
+
+void ScaleUVRowUp2_Linear_16_C(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ int dst_width) {
+ int src_width = dst_width >> 1;
+ int x;
+ assert((dst_width % 2 == 0) && (dst_width >= 0));
+ for (x = 0; x < src_width; ++x) {
+ dst_ptr[4 * x + 0] =
+ (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
+ dst_ptr[4 * x + 1] =
+ (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
+ dst_ptr[4 * x + 2] =
+ (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
+ dst_ptr[4 * x + 3] =
+ (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
+ }
+}
+
+void ScaleUVRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst_ptr,
+ ptrdiff_t dst_stride,
+ int dst_width) {
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
+ uint16_t* d = dst_ptr;
+ uint16_t* e = dst_ptr + dst_stride;
+ int src_width = dst_width >> 1;
+ int x;
+ assert((dst_width % 2 == 0) && (dst_width >= 0));
+ for (x = 0; x < src_width; ++x) {
+ d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
+ t[2 * x + 2] * 1 + 8) >>
+ 4;
+ d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
+ t[2 * x + 3] * 1 + 8) >>
+ 4;
+ d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
+ t[2 * x + 2] * 3 + 8) >>
+ 4;
+ d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
+ t[2 * x + 3] * 3 + 8) >>
+ 4;
+ e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
+ t[2 * x + 2] * 3 + 8) >>
+ 4;
+ e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
+ t[2 * x + 3] * 3 + 8) >>
+ 4;
+ e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
+ t[2 * x + 2] * 9 + 8) >>
+ 4;
+ e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
+ t[2 * x + 3] * 9 + 8) >>
+ 4;
+ }
+}
+
+// Scales a single row of pixels using point sampling.
+void ScaleUVCols_C(uint8_t* dst_uv,
+ const uint8_t* src_uv,
+ int dst_width,
+ int x,
+ int dx) {
+ const uint16_t* src = (const uint16_t*)(src_uv);
+ uint16_t* dst = (uint16_t*)(dst_uv);
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ dst[0] = src[x >> 16];
+ x += dx;
+ dst[1] = src[x >> 16];
+ x += dx;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ dst[0] = src[x >> 16];
+ }
+}
+
+void ScaleUVCols64_C(uint8_t* dst_uv,
+ const uint8_t* src_uv,
+ int dst_width,
+ int x32,
+ int dx) {
+ int64_t x = (int64_t)(x32);
+ const uint16_t* src = (const uint16_t*)(src_uv);
+ uint16_t* dst = (uint16_t*)(dst_uv);
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ dst[0] = src[x >> 16];
+ x += dx;
+ dst[1] = src[x >> 16];
+ x += dx;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ dst[0] = src[x >> 16];
+ }
+}
+
+// Scales a single row of pixels up by 2x using point sampling.
+void ScaleUVColsUp2_C(uint8_t* dst_uv,
+ const uint8_t* src_uv,
+ int dst_width,
+ int x,
+ int dx) {
+ const uint16_t* src = (const uint16_t*)(src_uv);
+ uint16_t* dst = (uint16_t*)(dst_uv);
+ int j;
+ (void)x;
+ (void)dx;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ dst[1] = dst[0] = src[0];
+ src += 1;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ dst[0] = src[0];
+ }
+}
+
+// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
+// Mimics SSSE3 blender
+#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
+#define BLENDERC(a, b, f, s) \
+ (uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
+#define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
+
+void ScaleUVFilterCols_C(uint8_t* dst_uv,
+ const uint8_t* src_uv,
+ int dst_width,
+ int x,
+ int dx) {
+ const uint16_t* src = (const uint16_t*)(src_uv);
+ uint16_t* dst = (uint16_t*)(dst_uv);
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ int xi = x >> 16;
+ int xf = (x >> 9) & 0x7f;
+ uint16_t a = src[xi];
+ uint16_t b = src[xi + 1];
+ dst[0] = BLENDER(a, b, xf);
+ x += dx;
+ xi = x >> 16;
+ xf = (x >> 9) & 0x7f;
+ a = src[xi];
+ b = src[xi + 1];
+ dst[1] = BLENDER(a, b, xf);
+ x += dx;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ int xi = x >> 16;
+ int xf = (x >> 9) & 0x7f;
+ uint16_t a = src[xi];
+ uint16_t b = src[xi + 1];
+ dst[0] = BLENDER(a, b, xf);
+ }
+}
+
+void ScaleUVFilterCols64_C(uint8_t* dst_uv,
+ const uint8_t* src_uv,
+ int dst_width,
+ int x32,
+ int dx) {
+ int64_t x = (int64_t)(x32);
+ const uint16_t* src = (const uint16_t*)(src_uv);
+ uint16_t* dst = (uint16_t*)(dst_uv);
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ int64_t xi = x >> 16;
+ int xf = (x >> 9) & 0x7f;
+ uint16_t a = src[xi];
+ uint16_t b = src[xi + 1];
+ dst[0] = BLENDER(a, b, xf);
+ x += dx;
+ xi = x >> 16;
+ xf = (x >> 9) & 0x7f;
+ a = src[xi];
+ b = src[xi + 1];
+ dst[1] = BLENDER(a, b, xf);
+ x += dx;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ int64_t xi = x >> 16;
+ int xf = (x >> 9) & 0x7f;
+ uint16_t a = src[xi];
+ uint16_t b = src[xi + 1];
+ dst[0] = BLENDER(a, b, xf);
+ }
+}
+#undef BLENDER1
+#undef BLENDERC
+#undef BLENDER
+
// Scale plane vertically with bilinear interpolation.
void ScalePlaneVertical(int src_height,
int dst_width,
@@ -1029,7 +1465,7 @@ void ScalePlaneVertical(int src_height,
int x,
int y,
int dy,
- int bpp,
+ int bpp, // bytes per pixel. 4 for ARGB.
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher bpp.
int dst_width_bytes = dst_width * bpp;
@@ -1075,11 +1511,11 @@ void ScalePlaneVertical(int src_height,
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_MMI)
- if (TestCpuFlag(kCpuHasMMI)) {
- InterpolateRow = InterpolateRow_Any_MMI;
- if (IS_ALIGNED(dst_width_bytes, 8)) {
- InterpolateRow = InterpolateRow_MMI;
+#if defined(HAS_INTERPOLATEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ InterpolateRow = InterpolateRow_Any_LSX;
+ if (IS_ALIGNED(dst_width_bytes, 32)) {
+ InterpolateRow = InterpolateRow_LSX;
}
}
#endif
@@ -1097,6 +1533,7 @@ void ScalePlaneVertical(int src_height,
y += dy;
}
}
+
void ScalePlaneVertical_16(int src_height,
int dst_width,
int dst_height,
@@ -1107,7 +1544,7 @@ void ScalePlaneVertical_16(int src_height,
int x,
int y,
int dy,
- int wpp,
+ int wpp, /* words per pixel. normally 1 */
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher wpp.
int dst_width_words = dst_width * wpp;
@@ -1123,32 +1560,32 @@ void ScalePlaneVertical_16(int src_height,
src_argb += (x >> 16) * wpp;
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_16_SSE2;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
+ InterpolateRow = InterpolateRow_16_Any_SSE2;
+ if (IS_ALIGNED(dst_width_words, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_16_SSSE3;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
+ InterpolateRow = InterpolateRow_16_Any_SSSE3;
+ if (IS_ALIGNED(dst_width_words, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_16_AVX2;
- if (IS_ALIGNED(dst_width_bytes, 32)) {
+ InterpolateRow = InterpolateRow_16_Any_AVX2;
+ if (IS_ALIGNED(dst_width_words, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_16_NEON;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
+ InterpolateRow = InterpolateRow_16_Any_NEON;
+ if (IS_ALIGNED(dst_width_words, 8)) {
InterpolateRow = InterpolateRow_16_NEON;
}
}
@@ -1168,6 +1605,70 @@ void ScalePlaneVertical_16(int src_height,
}
}
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 32768 = 9 bits
+// 16384 = 10 bits
+// 4096 = 12 bits
+// 256 = 16 bits
+// TODO(fbarchard): change scale to bits
+void ScalePlaneVertical_16To8(int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int y,
+ int dy,
+ int wpp, /* words per pixel. normally 1 */
+ int scale,
+ enum FilterMode filtering) {
+ // TODO(fbarchard): Allow higher wpp.
+ int dst_width_words = dst_width * wpp;
+ // TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions.
+ void (*InterpolateRow_16To8)(uint8_t * dst_argb, const uint16_t* src_argb,
+ ptrdiff_t src_stride, int scale, int dst_width,
+ int source_y_fraction) = InterpolateRow_16To8_C;
+ const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
+ int j;
+ assert(wpp >= 1 && wpp <= 2);
+ assert(src_height != 0);
+ assert(dst_width > 0);
+ assert(dst_height > 0);
+ src_argb += (x >> 16) * wpp;
+
+#if defined(HAS_INTERPOLATEROW_16TO8_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow_16To8 = InterpolateRow_16To8_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ InterpolateRow_16To8 = InterpolateRow_16To8_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_16TO8_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow_16To8 = InterpolateRow_16To8_Any_AVX2;
+ if (IS_ALIGNED(dst_width, 32)) {
+ InterpolateRow_16To8 = InterpolateRow_16To8_AVX2;
+ }
+ }
+#endif
+ for (j = 0; j < dst_height; ++j) {
+ int yi;
+ int yf;
+ if (y > max_y) {
+ y = max_y;
+ }
+ yi = y >> 16;
+ yf = filtering ? ((y >> 8) & 255) : 0;
+ InterpolateRow_16To8(dst_argb, src_argb + yi * src_stride, src_stride,
+ scale, dst_width_words, yf);
+ dst_argb += dst_stride;
+ y += dy;
+ }
+}
+
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width,
int src_height,
@@ -1181,8 +1682,8 @@ enum FilterMode ScaleFilterReduce(int src_width,
src_height = -src_height;
}
if (filtering == kFilterBox) {
- // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
- if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
+ // If scaling either axis to 0.5 or larger, switch from Box to Bilinear.
+ if (dst_width * 2 >= src_width || dst_height * 2 >= src_height) {
filtering = kFilterBilinear;
}
}
@@ -1217,7 +1718,7 @@ int FixedDiv_C(int num, int div) {
return (int)(((int64_t)(num) << 16) / div);
}
-// Divide num by div and return as 16.16 fixed point result.
+// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
int FixedDiv1_C(int num, int div) {
return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
}
@@ -1260,14 +1761,14 @@ void ScaleSlope(int src_width,
if (dst_width <= Abs(src_width)) {
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
- } else if (dst_width > 1) {
+ } else if (src_width > 1 && dst_width > 1) {
*dx = FixedDiv1(Abs(src_width), dst_width);
*x = 0;
}
if (dst_height <= src_height) {
*dy = FixedDiv(src_height, dst_height);
*y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
- } else if (dst_height > 1) {
+ } else if (src_height > 1 && dst_height > 1) {
*dy = FixedDiv1(src_height, dst_height);
*y = 0;
}
@@ -1276,7 +1777,7 @@ void ScaleSlope(int src_width,
if (dst_width <= Abs(src_width)) {
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
- } else if (dst_width > 1) {
+ } else if (src_width > 1 && dst_width > 1) {
*dx = FixedDiv1(Abs(src_width), dst_width);
*x = 0;
}