diff options
author | Frank Barchard <fbarchard@google.com> | 2022-12-15 14:11:52 -0800 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-12-15 22:34:22 +0000 |
commit | 3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e (patch) | |
tree | 3fc173f8288f4457858919c806bac14593dc3d1b /source/convert.cc | |
parent | 610e0cdead3fcd3288693d18eab8c7323805ad9e (diff) | |
download | libyuv-3abd6f36b6e4f5a2e0ce236580a8bc1da3c7cf7e.tar.gz |
Casting for scale functions
- MT2T support for source strides added, but only works for positive values.
- Reduced casting in row_common - one cast per assignment.
- scaling functions use intptr_t for intermediate calculations, then cast strides to ptrdiff_t
Bug: libyuv:948, b/257266635, b/262468594
Change-Id: I0409a0ce916b777da2a01c0ab0b56dccefed3b33
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4102203
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Reviewed-by: Justin Green <greenjustin@google.com>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Ernest Hua <ernesthua@google.com>
Diffstat (limited to 'source/convert.cc')
-rw-r--r-- | source/convert.cc | 91 |
1 files changed, 58 insertions, 33 deletions
diff --git a/source/convert.cc b/source/convert.cc index 65d4ba16..b62e513a 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -735,12 +735,10 @@ int MM21ToYUY2(const uint8_t* src_y, // Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format // documentation. // TODO(greenjustin): Add an MT2T to I420 conversion. -// TODO(greenjustin): Investigate if there are valid stride parameters other -// than width. LIBYUV_API -int MT2TToP010(const uint16_t* src_y, +int MT2TToP010(const uint8_t* src_y, int src_stride_y, - const uint16_t* src_uv, + const uint8_t* src_uv, int src_stride_uv, uint16_t* dst_y, int dst_stride_y, @@ -748,48 +746,75 @@ int MT2TToP010(const uint16_t* src_y, int dst_stride_uv, int width, int height) { - if (width <= 0 || height <= 0 || !src_y || !src_uv || !dst_y || !dst_uv) { + if (width <= 0 || !height || !src_uv || !dst_uv) { return -1; } - // TODO(greenjustin): Investigate if we can allow arbitrary sizes. This may - // not be semantically meaningful in this format, but we do not have samples - // of unaligned data to conclude that yet. This format is 16x32 tiled, so we - // must pad the width and height to reflect that. - int aligned_width = (width + 15) & ~15; - int aligned_height = (height + 31) & ~31; - { - size_t y_size = aligned_width * aligned_height * 10 / 8; - size_t uv_size = aligned_width * ((aligned_height + 1) / 2) * 10 / 8; - size_t tmp_y_size = aligned_width * aligned_height * sizeof(uint16_t); - size_t tmp_uv_size = - aligned_width * ((aligned_height + 1) / 2) * sizeof(uint16_t); - void (*UnpackMT2T)(const uint16_t* src, uint16_t* dst, size_t size) = + int u_width = (width + 1) / 2; + int uv_width = 2 * u_width; + int y = 0; + int uv_height = uv_height = (height + 1) / 2; + const int tile_width = 16; + const int y_tile_height = 32; + const int uv_tile_height = 16; + int padded_width = (width + tile_width - 1) & ~(tile_width - 1); + int y_tile_row_size = padded_width * y_tile_height * 10 / 8; + int uv_tile_row_size = padded_width * uv_tile_height * 10 / 8; + size_t row_buf_size = padded_width * y_tile_height * sizeof(uint16_t); + void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) = UnpackMT2T_C; - align_buffer_64(tmp_y, tmp_y_size); - align_buffer_64(tmp_uv, tmp_uv_size); + align_buffer_64(row_buf, row_buf_size); #if defined(HAS_UNPACKMT2T_NEON) if (TestCpuFlag(kCpuHasNEON)) { UnpackMT2T = UnpackMT2T_NEON; } #endif + // Negative height means invert the image. + if (height < 0) { + height = -height; + uv_height = (height + 1) / 2; + if (dst_y) { + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv; + dst_stride_uv = -dst_stride_uv; + } + + // Unpack and detile Y in rows of tiles + if (src_y && dst_y) { + for (y = 0; y < (height & ~(y_tile_height - 1)); y += y_tile_height) { + UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y, + width, y_tile_height, y_tile_height); + src_y += src_stride_y * y_tile_height; + dst_y += dst_stride_y * y_tile_height; + } + if (height & (y_tile_height - 1)) { + UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y, + width, height & (y_tile_height - 1), y_tile_height); + } + } - // TODO(greenjustin): Unpack and detile in rows rather than planes to keep - // the caches hot. - UnpackMT2T(src_y, (uint16_t*)tmp_y, y_size); - UnpackMT2T(src_uv, (uint16_t*)tmp_uv, uv_size); - - DetilePlane_16((uint16_t*)tmp_y, src_stride_y, dst_y, dst_stride_y, width, - height, 32); - DetilePlane_16((uint16_t*)tmp_uv, src_stride_uv, dst_uv, dst_stride_uv, - width, (height + 1) / 2, 16); - - free_aligned_buffer_64(tmp_y); - free_aligned_buffer_64(tmp_uv); + // Unpack and detile UV plane + for (y = 0; y < (uv_height & ~(uv_tile_height - 1)); y += uv_tile_height) { + UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv, + uv_width, uv_tile_height, uv_tile_height); + src_uv += src_stride_uv * uv_tile_height; + dst_uv += dst_stride_uv * uv_tile_height; + } + if (uv_height & (uv_tile_height - 1)) { + UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv, + uv_width, uv_height & (uv_tile_height - 1), + uv_tile_height); + } + free_aligned_buffer_64(row_buf); } - return 0; } |