aboutsummaryrefslogtreecommitdiff
path: root/source/scale_common.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2022-06-28 16:31:22 -0700
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-06-29 00:00:46 +0000
commit6900494d90ae095d44405cd4cc3f346971fa69c9 (patch)
tree4a77f6084ee5c3f6b6a2d1d2de7587111175f8fe /source/scale_common.cc
parentfe4a50df8e2a787e2919a8321dbe1412b94b20c6 (diff)
downloadlibyuv-6900494d90ae095d44405cd4cc3f346971fa69c9.tar.gz
Merge/SplitRGB fix -mcmodel=large x86 and InterpolateRow_16To8_NEON
MergeRGB and SplitRGB use a register to point to 9 shuffle tables. - fixes an out of registers error with -mcmodel=large InterpolateRow_16To8_NEON improves performance for I210ToI420: On Pixel 4 for 720p x1000 images Was I210ToI420_Opt (608 ms) Now I210ToI420_Opt (336 ms) On Skylake Xeon Was I210ToI420_Opt (259 ms) Now I210ToI420_Opt (209 ms) Bug: libyuv:931, libyuv:930 Change-Id: I20f8244803f06da511299bf1a2ffc7945eb35221 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3717054 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Justin Green <greenjustin@google.com>
Diffstat (limited to 'source/scale_common.cc')
-rw-r--r--source/scale_common.cc24
1 files changed, 23 insertions, 1 deletions
diff --git a/source/scale_common.cc b/source/scale_common.cc
index 812d57ec..b02bdafd 100644
--- a/source/scale_common.cc
+++ b/source/scale_common.cc
@@ -1605,6 +1605,12 @@ void ScalePlaneVertical_16(int src_height,
}
}
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 32768 = 9 bits
+// 16384 = 10 bits
+// 4096 = 12 bits
+// 256 = 16 bits
+// TODO(fbarchard): change scale to bits
void ScalePlaneVertical_16To8(int src_height,
int dst_width,
int dst_height,
@@ -1620,7 +1626,7 @@ void ScalePlaneVertical_16To8(int src_height,
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher wpp.
int dst_width_words = dst_width * wpp;
- // TODO(https://crbug.com/libyuv/931): Add NEON and AVX2 versions.
+ // TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions.
void (*InterpolateRow_16To8)(uint8_t * dst_argb, const uint16_t* src_argb,
ptrdiff_t src_stride, int scale, int dst_width,
int source_y_fraction) = InterpolateRow_16To8_C;
@@ -1632,6 +1638,22 @@ void ScalePlaneVertical_16To8(int src_height,
assert(dst_height > 0);
src_argb += (x >> 16) * wpp;
+#if defined(HAS_INTERPOLATEROW_16TO8_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow_16To8 = InterpolateRow_16To8_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ InterpolateRow_16To8 = InterpolateRow_16To8_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_16TO8_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow_16To8 = InterpolateRow_16To8_Any_AVX2;
+ if (IS_ALIGNED(dst_width, 32)) {
+ InterpolateRow_16To8 = InterpolateRow_16To8_AVX2;
+ }
+ }
+#endif
for (j = 0; j < dst_height; ++j) {
int yi;
int yf;