Casting for scale functions

- MT2T support for source strides added, but only works for positive values. - Reduced casting in row_common - one cast per assignment. - scaling functions use intptr_t for intermediate calculations, then cast strides to ptrdiff_t Bug: libyuv:948, b/257266635, b/262468594 Change-Id: I0409a0ce916b777da2a01c0ab0b56dccefed3b33 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4102203 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Wan-Teh Chang <wtc@google.com> Reviewed-by: Justin Green <greenjustin@google.com> Reviewed-by: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Ernest Hua <ernesthua@google.com>
author: Frank Barchard <fbarchard@google.com> 2022-12-15 14:11:52 -0800
committer: libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> 2022-12-15 22:34:22 +0000
commit: 3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e (patch)
tree: 3fc173f8288f4457858919c806bac14593dc3d1b /source/convert.cc
parent: 610e0cdead3fcd3288693d18eab8c7323805ad9e (diff)
download: libyuv-3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e.tar.gz
1 files changed, 58 insertions, 33 deletions
diff --git a/source/convert.cc b/source/convert.cc
index 65d4ba16..b62e513a 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -735,12 +735,10 @@ int MM21ToYUY2(const uint8_t* src_y,
 // Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format
 // documentation.
 // TODO(greenjustin): Add an MT2T to I420 conversion.
-// TODO(greenjustin): Investigate if there are valid stride parameters other
-// than width.
 LIBYUV_API
-int MT2TToP010(const uint16_t* src_y,
+int MT2TToP010(const uint8_t* src_y,
                int src_stride_y,
-               const uint16_t* src_uv,
+               const uint8_t* src_uv,
                int src_stride_uv,
                uint16_t* dst_y,
                int dst_stride_y,
@@ -748,48 +746,75 @@ int MT2TToP010(const uint16_t* src_y,
                int dst_stride_uv,
                int width,
                int height) {
-  if (width <= 0 || height <= 0 || !src_y || !src_uv || !dst_y || !dst_uv) {
+  if (width <= 0 || !height || !src_uv || !dst_uv) {
     return -1;
   }
 
-  // TODO(greenjustin): Investigate if we can allow arbitrary sizes. This may
-  // not be semantically meaningful in this format, but we do not have samples
-  // of unaligned data to conclude that yet. This format is 16x32 tiled, so we
-  // must pad the width and height to reflect that.
-  int aligned_width = (width + 15) & ~15;
-  int aligned_height = (height + 31) & ~31;
-
   {
-    size_t y_size = aligned_width * aligned_height * 10 / 8;
-    size_t uv_size = aligned_width * ((aligned_height + 1) / 2) * 10 / 8;
-    size_t tmp_y_size = aligned_width * aligned_height * sizeof(uint16_t);
-    size_t tmp_uv_size =
-        aligned_width * ((aligned_height + 1) / 2) * sizeof(uint16_t);
-    void (*UnpackMT2T)(const uint16_t* src, uint16_t* dst, size_t size) =
+    int u_width = (width + 1) / 2;
+    int uv_width = 2 * u_width;
+    int y = 0;
+    int uv_height = uv_height = (height + 1) / 2;
+    const int tile_width = 16;
+    const int y_tile_height = 32;
+    const int uv_tile_height = 16;
+    int padded_width = (width + tile_width - 1) & ~(tile_width - 1);
+    int y_tile_row_size = padded_width * y_tile_height * 10 / 8;
+    int uv_tile_row_size = padded_width * uv_tile_height * 10 / 8;
+    size_t row_buf_size = padded_width * y_tile_height * sizeof(uint16_t);
+    void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) =
         UnpackMT2T_C;
-    align_buffer_64(tmp_y, tmp_y_size);
-    align_buffer_64(tmp_uv, tmp_uv_size);
+    align_buffer_64(row_buf, row_buf_size);
 
 #if defined(HAS_UNPACKMT2T_NEON)
     if (TestCpuFlag(kCpuHasNEON)) {
       UnpackMT2T = UnpackMT2T_NEON;
     }
 #endif
+    // Negative height means invert the image.
+    if (height < 0) {
+      height = -height;
+      uv_height = (height + 1) / 2;
+      if (dst_y) {
+        dst_y = dst_y + (height - 1) * dst_stride_y;
+        dst_stride_y = -dst_stride_y;
+      }
+      dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv;
+      dst_stride_uv = -dst_stride_uv;
+    }
+
+    // Unpack and detile Y in rows of tiles
+    if (src_y && dst_y) {
+      for (y = 0; y < (height & ~(y_tile_height - 1)); y += y_tile_height) {
+        UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
+        DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
+                       width, y_tile_height, y_tile_height);
+        src_y += src_stride_y * y_tile_height;
+        dst_y += dst_stride_y * y_tile_height;
+      }
+      if (height & (y_tile_height - 1)) {
+        UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
+        DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
+                       width, height & (y_tile_height - 1), y_tile_height);
+      }
+    }
 
-    // TODO(greenjustin): Unpack and detile in rows rather than planes to keep
-    // the caches hot.
-    UnpackMT2T(src_y, (uint16_t*)tmp_y, y_size);
-    UnpackMT2T(src_uv, (uint16_t*)tmp_uv, uv_size);
-
-    DetilePlane_16((uint16_t*)tmp_y, src_stride_y, dst_y, dst_stride_y, width,
-                   height, 32);
-    DetilePlane_16((uint16_t*)tmp_uv, src_stride_uv, dst_uv, dst_stride_uv,
-                   width, (height + 1) / 2, 16);
-
-    free_aligned_buffer_64(tmp_y);
-    free_aligned_buffer_64(tmp_uv);
+    // Unpack and detile UV plane
+    for (y = 0; y < (uv_height & ~(uv_tile_height - 1)); y += uv_tile_height) {
+      UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
+      DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
+                     uv_width, uv_tile_height, uv_tile_height);
+      src_uv += src_stride_uv * uv_tile_height;
+      dst_uv += dst_stride_uv * uv_tile_height;
+    }
+    if (uv_height & (uv_tile_height - 1)) {
+      UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
+      DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
+                     uv_width, uv_height & (uv_tile_height - 1),
+                     uv_tile_height);
+    }
+    free_aligned_buffer_64(row_buf);
   }
-
   return 0;
 }
author	Frank Barchard <fbarchard@google.com>	2022-12-15 14:11:52 -0800
committer	libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>	2022-12-15 22:34:22 +0000
commit	3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e (patch)
tree	3fc173f8288f4457858919c806bac14593dc3d1b /source/convert.cc
parent	610e0cdead3fcd3288693d18eab8c7323805ad9e (diff)
download	libyuv-3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e.tar.gz