aboutsummaryrefslogtreecommitdiff
path: root/source/scale_argb.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2016-10-24 15:37:08 -0700
committerFrank Barchard <fbarchard@google.com>2016-10-24 15:37:08 -0700
commitf5d5bd88d660232038fe06ed735fe95d2b9f9b61 (patch)
tree261e96410fe8dc820135178029b0e488b3f2bda6 /source/scale_argb.cc
parent451af5e922e026c266d25abc92e7519acfc9a4c5 (diff)
downloadlibyuv-f5d5bd88d660232038fe06ed735fe95d2b9f9b61.tar.gz
Add MSA optimized I422ToARGBRow_MSA and I422ToRGBARow_MSA functions
R=fbarchard@google.com BUG=libyuv:634 Performance Gains :- (vs C vectorized) I422ToARGBRow_MSA : ~1.6x I422ToRGBARow_MSA : ~1.6x I422ToARGBRow_Any_MSA : ~1.58x I422ToRGBARow_Any_MSA : ~1.6x Performance Gains :- (vs C non-vectorized) I422ToARGBRow_MSA : ~7x I422ToRGBARow_MSA : ~7x I422ToARGBRow_Any_MSA : ~6.9x I422ToRGBARow_Any_MSA : ~6.8x Regarding performance measurement, We have created standalone tests which pass in row's data from a 1920x1080 filled buffer to both the C and MSA functions. And such N iterations are executed to get more accurate timings of C vs MSA. Review URL: https://codereview.chromium.org/2430313005 .
Diffstat (limited to 'source/scale_argb.cc')
-rw-r--r--source/scale_argb.cc8
1 files changed, 8 insertions, 0 deletions
diff --git a/source/scale_argb.cc b/source/scale_argb.cc
index 17f51ae9..b2a13967 100644
--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -474,6 +474,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
I422ToARGBRow = I422ToARGBRow_DSPR2;
}
#endif
+#if defined(HAS_I422TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToARGBRow = I422ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(src_width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_MSA;
+ }
+ }
+#endif
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =