aboutsummaryrefslogtreecommitdiff
path: root/source/scale_common.cc
diff options
context:
space:
mode:
authorfbarchard@google.com <fbarchard@google.com>2015-06-09 01:05:18 +0000
committerfbarchard@google.com <fbarchard@google.com>2015-06-09 01:05:18 +0000
commit05416e2d9a35669884bef0af7190525156bcfa79 (patch)
treedbb89f6c071f4f5386b1a679387e8a8439329cf1 /source/scale_common.cc
parentb07de879b6553968cccd9d51afe4d2b3d4886a4d (diff)
downloadlibyuv-05416e2d9a35669884bef0af7190525156bcfa79.tar.gz
Box filter for YUV use rows with accumulation buffer for better memory behavior. The old code would do columns accumulated into registers, and then store the result once. This was slow from a memory point of view. The new code does a row of source at a time, updating an accumulation buffer every row. The accumulation buffer is small, and should fit cache. Before each accumulation of N rows, the buffer needs to be reset to zero. If the memset is a bottleneck, it would be faster to do the first row without an add, storing to the accumulation buffer, and then add for the remaining rows.
BUG=425 TESTED=out\release\libyuv_unittest --gtest_filter=*ScaleTo1x1* R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/52659004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1428 16f28f9a-4ce2-e073-06de-1de4eb20be90
Diffstat (limited to 'source/scale_common.cc')
-rw-r--r--source/scale_common.cc44
1 files changed, 18 insertions, 26 deletions
diff --git a/source/scale_common.cc b/source/scale_common.cc
index 014d9566..1711f3d5 100644
--- a/source/scale_common.cc
+++ b/source/scale_common.cc
@@ -621,39 +621,31 @@ void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
}
}
-void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) {
+void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
int x;
assert(src_width > 0);
- assert(src_height > 0);
- for (x = 0; x < src_width; ++x) {
- const uint8* s = src_ptr + x;
- unsigned int sum = 0u;
- int y;
- for (y = 0; y < src_height; ++y) {
- sum += s[0];
- s += src_stride;
- }
- // TODO(fbarchard): Consider limiting height to 256 to avoid overflow.
- dst_ptr[x] = sum < 65535u ? sum : 65535u;
+ for (x = 0; x < src_width - 1; x += 2) {
+ dst_ptr[0] += src_ptr[0];
+ dst_ptr[1] += src_ptr[1];
+ src_ptr += 2;
+ dst_ptr += 2;
+ }
+ if (src_width & 1) {
+ dst_ptr[0] += src_ptr[0];
}
}
-void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint32* dst_ptr, int src_width, int src_height) {
+void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
int x;
assert(src_width > 0);
- assert(src_height > 0);
- for (x = 0; x < src_width; ++x) {
- const uint16* s = src_ptr + x;
- unsigned int sum = 0u;
- int y;
- for (y = 0; y < src_height; ++y) {
- sum += s[0];
- s += src_stride;
- }
- // No risk of overflow here now
- dst_ptr[x] = sum;
+ for (x = 0; x < src_width - 1; x += 2) {
+ dst_ptr[0] += src_ptr[0];
+ dst_ptr[1] += src_ptr[1];
+ src_ptr += 2;
+ dst_ptr += 2;
+ }
+ if (src_width & 1) {
+ dst_ptr[0] += src_ptr[0];
}
}