diff options
author | Frank Barchard <fbarchard@google.com> | 2022-06-28 16:31:22 -0700 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-06-29 00:00:46 +0000 |
commit | 6900494d90ae095d44405cd4cc3f346971fa69c9 (patch) | |
tree | 4a77f6084ee5c3f6b6a2d1d2de7587111175f8fe /source/scale_common.cc | |
parent | fe4a50df8e2a787e2919a8321dbe1412b94b20c6 (diff) | |
download | libyuv-6900494d90ae095d44405cd4cc3f346971fa69c9.tar.gz |
Merge/SplitRGB fix -mcmodel=large x86 and InterpolateRow_16To8_NEON
MergeRGB and SplitRGB use a register to point to 9 shuffle tables.
- fixes an out of registers error with -mcmodel=large
InterpolateRow_16To8_NEON improves performance for I210ToI420:
On Pixel 4 for 720p x1000 images
Was I210ToI420_Opt (608 ms)
Now I210ToI420_Opt (336 ms)
On Skylake Xeon
Was I210ToI420_Opt (259 ms)
Now I210ToI420_Opt (209 ms)
Bug: libyuv:931, libyuv:930
Change-Id: I20f8244803f06da511299bf1a2ffc7945eb35221
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3717054
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Justin Green <greenjustin@google.com>
Diffstat (limited to 'source/scale_common.cc')
-rw-r--r-- | source/scale_common.cc | 24 |
1 files changed, 23 insertions, 1 deletions
diff --git a/source/scale_common.cc b/source/scale_common.cc index 812d57ec..b02bdafd 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -1605,6 +1605,12 @@ void ScalePlaneVertical_16(int src_height, } } +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 32768 = 9 bits +// 16384 = 10 bits +// 4096 = 12 bits +// 256 = 16 bits +// TODO(fbarchard): change scale to bits void ScalePlaneVertical_16To8(int src_height, int dst_width, int dst_height, @@ -1620,7 +1626,7 @@ void ScalePlaneVertical_16To8(int src_height, enum FilterMode filtering) { // TODO(fbarchard): Allow higher wpp. int dst_width_words = dst_width * wpp; - // TODO(https://crbug.com/libyuv/931): Add NEON and AVX2 versions. + // TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions. void (*InterpolateRow_16To8)(uint8_t * dst_argb, const uint16_t* src_argb, ptrdiff_t src_stride, int scale, int dst_width, int source_y_fraction) = InterpolateRow_16To8_C; @@ -1632,6 +1638,22 @@ void ScalePlaneVertical_16To8(int src_height, assert(dst_height > 0); src_argb += (x >> 16) * wpp; +#if defined(HAS_INTERPOLATEROW_16TO8_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + InterpolateRow_16To8 = InterpolateRow_16To8_Any_NEON; + if (IS_ALIGNED(dst_width, 8)) { + InterpolateRow_16To8 = InterpolateRow_16To8_NEON; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16TO8_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + InterpolateRow_16To8 = InterpolateRow_16To8_Any_AVX2; + if (IS_ALIGNED(dst_width, 32)) { + InterpolateRow_16To8 = InterpolateRow_16To8_AVX2; + } + } +#endif for (j = 0; j < dst_height; ++j) { int yi; int yf; |