Box filter for YUV use rows with accumulation buffer for better memory behavior. The old code would do columns accumulated into registers, and then store the result once. This was slow from a memory point of view. The new code does a row of source at a time, updating an accumulation buffer every row. The accumulation buffer is small, and should fit cache. Before each accumulation of N rows, the buffer needs to be reset to zero. If the memset is a bottleneck, it would be faster to do the first row without an add, storing to the accumulation buffer, and then add for the remaining rows.

BUG=425 TESTED=out\release\libyuv_unittest --gtest_filter=*ScaleTo1x1* R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/52659004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1428 16f28f9a-4ce2-e073-06de-1de4eb20be90
author: fbarchard@google.com <fbarchard@google.com> 2015-06-09 01:05:18 +0000
committer: fbarchard@google.com <fbarchard@google.com> 2015-06-09 01:05:18 +0000
commit: 05416e2d9a35669884bef0af7190525156bcfa79 (patch)
tree: dbb89f6c071f4f5386b1a679387e8a8439329cf1 /source/scale_common.cc
parent: b07de879b6553968cccd9d51afe4d2b3d4886a4d (diff)
download: libyuv-05416e2d9a35669884bef0af7190525156bcfa79.tar.gz
1 files changed, 18 insertions, 26 deletions
diff --git a/source/scale_common.cc b/source/scale_common.cc
index 014d9566..1711f3d5 100644
--- a/source/scale_common.cc
+++ b/source/scale_common.cc
@@ -621,39 +621,31 @@ void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
   }
 }
 
-void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                    uint16* dst_ptr, int src_width, int src_height) {
+void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
   int x;
   assert(src_width > 0);
-  assert(src_height > 0);
-  for (x = 0; x < src_width; ++x) {
-    const uint8* s = src_ptr + x;
-    unsigned int sum = 0u;
-    int y;
-    for (y = 0; y < src_height; ++y) {
-      sum += s[0];
-      s += src_stride;
-    }
-    // TODO(fbarchard): Consider limiting height to 256 to avoid overflow.
-    dst_ptr[x] = sum < 65535u ? sum : 65535u;
+  for (x = 0; x < src_width - 1; x += 2) {
+    dst_ptr[0] += src_ptr[0];
+    dst_ptr[1] += src_ptr[1];
+    src_ptr += 2;
+    dst_ptr += 2;
+  }
+  if (src_width & 1) {
+    dst_ptr[0] += src_ptr[0];
   }
 }
 
-void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
-                       uint32* dst_ptr, int src_width, int src_height) {
+void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
   int x;
   assert(src_width > 0);
-  assert(src_height > 0);
-  for (x = 0; x < src_width; ++x) {
-    const uint16* s = src_ptr + x;
-    unsigned int sum = 0u;
-    int y;
-    for (y = 0; y < src_height; ++y) {
-      sum += s[0];
-      s += src_stride;
-    }
-    // No risk of overflow here now
-    dst_ptr[x] = sum;
+  for (x = 0; x < src_width - 1; x += 2) {
+    dst_ptr[0] += src_ptr[0];
+    dst_ptr[1] += src_ptr[1];
+    src_ptr += 2;
+    dst_ptr += 2;
+  }
+  if (src_width & 1) {
+    dst_ptr[0] += src_ptr[0];
   }
 }
author	fbarchard@google.com <fbarchard@google.com>	2015-06-09 01:05:18 +0000
committer	fbarchard@google.com <fbarchard@google.com>	2015-06-09 01:05:18 +0000
commit	05416e2d9a35669884bef0af7190525156bcfa79 (patch)
tree	dbb89f6c071f4f5386b1a679387e8a8439329cf1 /source/scale_common.cc
parent	b07de879b6553968cccd9d51afe4d2b3d4886a4d (diff)
download	libyuv-05416e2d9a35669884bef0af7190525156bcfa79.tar.gz