Use ScalePlaneDown2_16To8 for avoiding the 2 step process

Bug: libyuv:950 Change-Id: I5a77bca9a0230fe00abd810939e217833a14683f Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4134524 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
author: Sergio Garcia Murillo <sergio.garcia.murillo@gmail.com> 2023-01-03 22:09:09 +0100
committer: libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> 2023-01-03 21:41:21 +0000
commit: 22a579c438410710f627e67b38b3df9968efafb9 (patch)
tree: 6ebcb28dc9e25236ec27eefe15557867740d646c
parent: f583b1b4b82ef71eba776c3591c82227db615c75 (diff)
download: libyuv-22a579c438410710f627e67b38b3df9968efafb9.tar.gz
4 files changed, 147 insertions, 13 deletions
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index 6cb5e128..356da19d 100644
--- a/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -214,6 +214,17 @@ void ScalePlaneVertical_16To8(int src_height,
                               int scale,
                               enum FilterMode filtering);
 
+void ScalePlaneDown2_16To8(int src_width,
+                           int src_height,
+                           int dst_width,
+                           int dst_height,
+                           int src_stride,
+                           int dst_stride,
+                           const uint16_t* src_ptr,
+                           uint8_t* dst_ptr,
+                           int scale,
+                           enum FilterMode filtering);
+
 // Simplify the filtering based on scale factors.
 enum FilterMode ScaleFilterReduce(int src_width,
                                   int src_height,
@@ -259,6 +270,11 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
                         ptrdiff_t src_stride,
                         uint16_t* dst,
                         int dst_width);
+void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
+                           ptrdiff_t src_stride,
+                           uint8_t* dst,
+                           int dst_width,
+                           int scale);
 void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
                            ptrdiff_t src_stride,
                            uint8_t* dst,
@@ -267,6 +283,11 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
                               ptrdiff_t src_stride,
                               uint16_t* dst,
                               int dst_width);
+void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
+                                 ptrdiff_t src_stride,
+                                 uint8_t* dst,
+                                 int dst_width,
+                                 int scale);
 void ScaleRowDown2Box_C(const uint8_t* src_ptr,
                         ptrdiff_t src_stride,
                         uint8_t* dst,
@@ -279,6 +300,11 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
                            ptrdiff_t src_stride,
                            uint16_t* dst,
                            int dst_width);
+void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst,
+                              int dst_width,
+                              int scale);
 void ScaleRowDown4_C(const uint8_t* src_ptr,
                      ptrdiff_t src_stride,
                      uint8_t* dst,
diff --git a/source/convert.cc b/source/convert.cc
index 8b2373ec..5ee3dab5 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -325,23 +325,14 @@ int I410ToI420(const uint16_t* src_y,
 
   {
     const int uv_width = SUBSAMPLE(width, 1, 1);
-    const int uv_stride = uv_width;
     const int uv_height = SUBSAMPLE(height, 1, 1);
 
-    // Scale uv planes using Y plane as temp buffer and then convert to 8 bits.
-    ScalePlane_12(src_u, src_stride_u, width, height, (uint16_t*)dst_y,
-                  uv_stride, uv_width, uv_height, kFilterBilinear);
-    Convert16To8Plane((uint16_t*)dst_y, uv_stride, dst_u, dst_stride_u, scale,
-                      uv_width, uv_height);
-
-    ScalePlane_12(src_v, src_stride_v, width, height, (uint16_t*)dst_y,
-                  uv_stride, uv_width, uv_height, kFilterBilinear);
-    Convert16To8Plane((uint16_t*)dst_y, uv_stride, dst_v, dst_stride_v, scale,
-                      uv_width, uv_height);
-
-    // Convert Y plane last.
     Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
                       height);
+    ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_u,
+                          dst_stride_u, src_u, dst_u, scale, kFilterBilinear);
+    ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_v,
+                          dst_stride_v, src_v, dst_v, scale, kFilterBilinear);
   }
   return 0;
 }
diff --git a/source/scale.cc b/source/scale.cc
index 2a7e308d..7cdebc23 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -198,6 +198,42 @@ static void ScalePlaneDown2_16(int src_width,
   }
 }
 
+void ScalePlaneDown2_16To8(int src_width,
+                           int src_height,
+                           int dst_width,
+                           int dst_height,
+                           int src_stride,
+                           int dst_stride,
+                           const uint16_t* src_ptr,
+                           uint8_t* dst_ptr,
+                           int scale,
+                           enum FilterMode filtering) {
+  int y;
+  void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+                        uint8_t* dst_ptr, int dst_width, int scale) =
+      filtering == kFilterNone
+          ? ScaleRowDown2_16To8_C
+          : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
+                                        : ScaleRowDown2Box_16To8_C);
+  int row_stride = src_stride * 2;
+  (void)src_width;
+  (void)src_height;
+  if (!filtering) {
+    src_ptr += src_stride;  // Point to odd rows.
+    src_stride = 0;
+  }
+
+  if (filtering == kFilterLinear) {
+    src_stride = 0;
+  }
+  // TODO(fbarchard): Loop through source height to allow odd height.
+  for (y = 0; y < dst_height; ++y) {
+    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale);
+    src_ptr += row_stride;
+    dst_ptr += dst_stride;
+  }
+}
+
 // Scale plane, 1/4
 // This is an optimized version for scaling down a plane to 1/4 of
 // its original size.
diff --git a/source/scale_common.cc b/source/scale_common.cc
index b02bdafd..398f3f65 100644
--- a/source/scale_common.cc
+++ b/source/scale_common.cc
@@ -23,6 +23,25 @@ namespace libyuv {
 extern "C" {
 #endif
 
+#ifdef __cplusplus
+#define STATIC_CAST(type, expr) static_cast<type>(expr)
+#else
+#define STATIC_CAST(type, expr) (type)(expr)
+#endif
+
+// TODO(fbarchard): make clamp255 preserve negative values.
+static __inline int32_t clamp255(int32_t v) {
+  return (-(v >= 255) | v) & 255;
+}
+
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 32768 = 9 bits
+// 16384 = 10 bits
+// 4096 = 12 bits
+// 256 = 16 bits
+// TODO(fbarchard): change scale to bits
+#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
+
 static __inline int Abs(int v) {
   return v >= 0 ? v : -v;
 }
@@ -62,6 +81,26 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
   }
 }
 
+void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
+                           ptrdiff_t src_stride,
+                           uint8_t* dst,
+                           int dst_width,
+                           int scale) {
+  int x;
+  (void)src_stride;
+  assert(scale >= 256);
+  assert(scale <= 32768);
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+    dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
+    dst += 2;
+    src_ptr += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+  }
+}
+
 void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
                            ptrdiff_t src_stride,
                            uint8_t* dst,
@@ -98,6 +137,25 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
   }
 }
 
+void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
+                                 ptrdiff_t src_stride,
+                                 uint8_t* dst,
+                                 int dst_width,
+                                 int scale) {
+  const uint16_t* s = src_ptr;
+  int x;
+  (void)src_stride;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+    dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
+    dst += 2;
+    s += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+  }
+}
+
 void ScaleRowDown2Box_C(const uint8_t* src_ptr,
                         ptrdiff_t src_stride,
                         uint8_t* dst,
@@ -160,6 +218,29 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
   }
 }
 
+void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
+                              ptrdiff_t src_stride,
+                              uint8_t* dst,
+                              int dst_width,
+                              int scale) {
+  const uint16_t* s = src_ptr;
+  const uint16_t* t = src_ptr + src_stride;
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = STATIC_CAST(uint8_t,
+                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+    dst[1] = STATIC_CAST(uint8_t,
+                         C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
+    dst += 2;
+    s += 4;
+    t += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = STATIC_CAST(uint8_t,
+                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+  }
+}
+
 void ScaleRowDown4_C(const uint8_t* src_ptr,
                      ptrdiff_t src_stride,
                      uint8_t* dst,
author	Sergio Garcia Murillo <sergio.garcia.murillo@gmail.com>	2023-01-03 22:09:09 +0100
committer	libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>	2023-01-03 21:41:21 +0000
commit	22a579c438410710f627e67b38b3df9968efafb9 (patch)
tree	6ebcb28dc9e25236ec27eefe15557867740d646c
parent	f583b1b4b82ef71eba776c3591c82227db615c75 (diff)
download	libyuv-22a579c438410710f627e67b38b3df9968efafb9.tar.gz