diff options
author | Sergio Garcia Murillo <sergio.garcia.murillo@gmail.com> | 2023-01-04 08:53:51 +0100 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2023-01-04 21:10:01 +0000 |
commit | f8626a72248f7063c9bf3bbe96333a4af6e8b36f (patch) | |
tree | e66d03495dd5c81beba3d509a0b7aa90e2f069eb | |
parent | 22a579c438410710f627e67b38b3df9968efafb9 (diff) | |
download | libyuv-f8626a72248f7063c9bf3bbe96333a4af6e8b36f.tar.gz |
Add 10 bit rotate methods.
This initial implementation is based on current unoptimized code in webrtc using just plain for loops.
Bug: libyuv:949
Change-Id: Ic87ee49c3a0b62edbaaa4255c263c1f7be4ea02b
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4110782
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
-rw-r--r-- | include/libyuv/rotate.h | 64 | ||||
-rw-r--r-- | include/libyuv/rotate_row.h | 16 | ||||
-rw-r--r-- | include/libyuv/row.h | 10 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/rotate.cc | 373 | ||||
-rw-r--r-- | source/rotate_common.cc | 66 | ||||
-rw-r--r-- | source/row_common.cc | 13 | ||||
-rw-r--r-- | unit_test/rotate_argb_test.cc | 106 | ||||
-rw-r--r-- | unit_test/rotate_test.cc | 262 | ||||
-rw-r--r-- | unit_test/unit_test.h | 12 |
10 files changed, 911 insertions, 13 deletions
diff --git a/include/libyuv/rotate.h b/include/libyuv/rotate.h index 684ed5e6..37460c4a 100644 --- a/include/libyuv/rotate.h +++ b/include/libyuv/rotate.h @@ -85,6 +85,60 @@ int I444Rotate(const uint8_t* src_y, int height, enum RotationMode mode); +// Rotate I010 frame. +LIBYUV_API +int I010Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); + +// Rotate I210 frame. +LIBYUV_API +int I210Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); + +// Rotate I410 frame. +LIBYUV_API +int I410Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); + // Rotate NV12 input and store in I420. LIBYUV_API int NV12ToI420Rotate(const uint8_t* src_y, @@ -156,6 +210,16 @@ void RotatePlane270(const uint8_t* src, int width, int height); +// Rotate a plane by 0, 90, 180, or 270. +LIBYUV_API +int RotatePlane_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height, + enum RotationMode mode); + // Rotations for when U and V are interleaved. // These functions take one UV input pointer and // split the data into two buffers while diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index aa8528a9..b773f886 100644 --- a/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -215,7 +215,23 @@ void TransposeUVWx16_Any_LSX(const uint8_t* src, uint8_t* dst_b, int dst_stride_b, int width); +void TransposeWxH_16_C(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height); +void TransposeWx8_16_C(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width); +void TransposeWx1_16_C(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width); #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 45b172b2..ff93406b 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -837,6 +837,14 @@ struct YuvConstants { free(var##_mem); \ var = 0 +#define align_buffer_64_16(var, size) \ + uint8_t* var##_mem = (uint8_t*)(malloc((size * 2) + 63)); /* NOLINT */ \ + uint16_t* var = (uint16_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ + +#define free_aligned_buffer_64_16(var) \ + free(var##_mem); \ + var = 0 + #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP #else @@ -1934,6 +1942,8 @@ void MirrorSplitUVRow_C(const uint8_t* src_uv, uint8_t* dst_v, int width); +void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width); + void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index efad56b3..dfcef8d8 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1855 +#define LIBYUV_VERSION 1856 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/rotate.cc b/source/rotate.cc index f1e83cbd..82621cd8 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -477,6 +477,120 @@ int RotatePlane(const uint8_t* src, } LIBYUV_API +void TransposePlane_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height) { + int i = height; + // Work across the source in 8x8 tiles + while (i >= 8) { + TransposeWx8_16_C(src, src_stride, dst, dst_stride, width); + src += 8 * src_stride; // Go down 8 rows. + dst += 8; // Move over 8 columns. + i -= 8; + } + + if (i > 0) { + TransposeWxH_16_C(src, src_stride, dst, dst_stride, width, i); + } +} + +static void RotatePlane90_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height) { + // Rotate by 90 is a transpose with the source read + // from bottom to top. So set the source pointer to the end + // of the buffer and flip the sign of the source stride. + src += src_stride * (height - 1); + src_stride = -src_stride; + TransposePlane_16(src, src_stride, dst, dst_stride, width, height); +} + +static void RotatePlane270_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height) { + // Rotate by 270 is a transpose with the destination written + // from bottom to top. So set the destination pointer to the end + // of the buffer and flip the sign of the destination stride. + dst += dst_stride * (width - 1); + dst_stride = -dst_stride; + TransposePlane_16(src, src_stride, dst, dst_stride, width, height); +} + +static void RotatePlane180_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height) { + // Swap first and last row and mirror the content. Uses a temporary row. + align_buffer_64_16(row, width); + const uint16_t* src_bot = src + src_stride * (height - 1); + uint16_t* dst_bot = dst + dst_stride * (height - 1); + int half_height = (height + 1) >> 1; + int y; + + // Odd height will harmlessly mirror the middle row twice. + for (y = 0; y < half_height; ++y) { + CopyRow_16_C(src, row, width); // Copy first row into buffer + MirrorRow_16_C(src_bot, dst, width); // Mirror last row into first row + MirrorRow_16_C(row, dst_bot, width); // Mirror buffer into last row + src += src_stride; + dst += dst_stride; + src_bot -= src_stride; + dst_bot -= dst_stride; + } + free_aligned_buffer_64_16(row); +} + +LIBYUV_API +int RotatePlane_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height, + enum RotationMode mode) { + if (!src || width <= 0 || height == 0 || !dst) { + return -1; + } + + // Negative height means invert the image. + if (height < 0) { + height = -height; + src = src + (height - 1) * src_stride; + src_stride = -src_stride; + } + + switch (mode) { + case kRotate0: + // copy frame + CopyPlane_16(src, src_stride, dst, dst_stride, width, height); + return 0; + case kRotate90: + RotatePlane90_16(src, src_stride, dst, dst_stride, width, height); + return 0; + case kRotate270: + RotatePlane270_16(src, src_stride, dst, dst_stride, width, height); + return 0; + case kRotate180: + RotatePlane180_16(src, src_stride, dst, dst_stride, width, height); + return 0; + default: + break; + } + return -1; +} + +LIBYUV_API int I420Rotate(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, @@ -544,6 +658,8 @@ int I420Rotate(const uint8_t* src_y, return -1; } +// I422 has half width x full height UV planes, so rotate by 90 and 270 +// require scaling to maintain 422 subsampling. LIBYUV_API int I422Rotate(const uint8_t* src_y, int src_stride_y, @@ -579,31 +695,42 @@ int I422Rotate(const uint8_t* src_y, switch (mode) { case kRotate0: - // copy frame + // Copy frame. CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); return 0; + + // Note on temporary Y plane for UV. + // Rotation of UV first fits within the Y destination plane rows. + // Y plane is width x height + // Y plane rotated is height x width + // UV plane is (width / 2) x height + // UV plane rotated is height x (width / 2) + // UV plane rotated+scaled is (height / 2) x width. + // UV plane rotated is a temporary that fits within the Y plane rotated. + case kRotate90: - // We need to rotate and rescale, we use plane Y as temporal storage. - RotatePlane90(src_u, src_stride_u, dst_y, height, halfwidth, height); - ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight, + RotatePlane90(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, + height); + ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, halfheight, width, kFilterBilinear); - RotatePlane90(src_v, src_stride_v, dst_y, height, halfwidth, height); - ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight, + RotatePlane90(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, + height); + ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, halfheight, width, kFilterLinear); RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; case kRotate270: - // We need to rotate and rescale, we use plane Y as temporal storage. - RotatePlane270(src_u, src_stride_u, dst_y, height, halfwidth, height); - ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight, + RotatePlane270(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, + height); + ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, halfheight, width, kFilterBilinear); - RotatePlane270(src_v, src_stride_v, dst_y, height, halfwidth, height); - ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight, + RotatePlane270(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, + height); + ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, halfheight, width, kFilterLinear); RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - return 0; case kRotate180: RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height); @@ -828,6 +955,228 @@ int Android420ToI420Rotate(const uint8_t* src_y, return -1; } +LIBYUV_API +int I010Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v || dst_stride_y < 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + switch (mode) { + case kRotate0: + // copy frame + return I010Copy(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height); + case kRotate90: + RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, + halfheight); + RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, + halfheight); + return 0; + case kRotate270: + RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, + halfheight); + RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, + halfheight); + return 0; + case kRotate180: + RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, + halfheight); + RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, + halfheight); + return 0; + default: + break; + } + return -1; +} + +// I210 has half width x full height UV planes, so rotate by 90 and 270 +// require scaling to maintain 422 subsampling. +LIBYUV_API +int I210Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v || dst_stride_y < 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + switch (mode) { + case kRotate0: + // Copy frame. + CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); + CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); + return 0; + + // Note on temporary Y plane for UV. + // Rotation of UV first fits within the Y destination plane rows. + // Y plane is width x height + // Y plane rotated is height x width + // UV plane is (width / 2) x height + // UV plane rotated is height x (width / 2) + // UV plane rotated+scaled is (height / 2) x width. + // UV plane rotated is a temporary that fits within the Y plane rotated. + + case kRotate90: + RotatePlane90_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, + height); + ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, + halfheight, width, kFilterBilinear); + RotatePlane90_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, + height); + ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, + halfheight, width, kFilterLinear); + RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + return 0; + case kRotate270: + RotatePlane270_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, + height); + ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u, + halfheight, width, kFilterBilinear); + RotatePlane270_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, + height); + ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v, + halfheight, width, kFilterLinear); + RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + return 0; + case kRotate180: + RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, + height); + RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, + height); + return 0; + default: + break; + } + return -1; +} + +LIBYUV_API +int I410Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode) { + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v || dst_stride_y < 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + switch (mode) { + case kRotate0: + // copy frame + CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height); + CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height); + return 0; + case kRotate90: + RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height); + RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height); + return 0; + case kRotate270: + RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, width, + height); + RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, width, + height); + return 0; + case kRotate180: + RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, width, + height); + RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, width, + height); + return 0; + default: + break; + } + return -1; +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/rotate_common.cc b/source/rotate_common.cc index ff212ade..f24accfb 100644 --- a/source/rotate_common.cc +++ b/source/rotate_common.cc @@ -100,6 +100,72 @@ void TransposeUVWxH_C(const uint8_t* src, } } +void TransposeWx8_16_C(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width) { + int i; + for (i = 0; i < width; ++i) { + dst[0] = src[0 * src_stride]; + dst[1] = src[1 * src_stride]; + dst[2] = src[2 * src_stride]; + dst[3] = src[3 * src_stride]; + dst[4] = src[4 * src_stride]; + dst[5] = src[5 * src_stride]; + dst[6] = src[6 * src_stride]; + dst[7] = src[7 * src_stride]; + ++src; + dst += dst_stride; + } +} + +void TransposeUVWx8_16_C(const uint16_t* src, + int src_stride, + uint16_t* dst_a, + int dst_stride_a, + uint16_t* dst_b, + int dst_stride_b, + int width) { + int i; + for (i = 0; i < width; ++i) { + dst_a[0] = src[0 * src_stride + 0]; + dst_b[0] = src[0 * src_stride + 1]; + dst_a[1] = src[1 * src_stride + 0]; + dst_b[1] = src[1 * src_stride + 1]; + dst_a[2] = src[2 * src_stride + 0]; + dst_b[2] = src[2 * src_stride + 1]; + dst_a[3] = src[3 * src_stride + 0]; + dst_b[3] = src[3 * src_stride + 1]; + dst_a[4] = src[4 * src_stride + 0]; + dst_b[4] = src[4 * src_stride + 1]; + dst_a[5] = src[5 * src_stride + 0]; + dst_b[5] = src[5 * src_stride + 1]; + dst_a[6] = src[6 * src_stride + 0]; + dst_b[6] = src[6 * src_stride + 1]; + dst_a[7] = src[7 * src_stride + 0]; + dst_b[7] = src[7 * src_stride + 1]; + src += 2; + dst_a += dst_stride_a; + dst_b += dst_stride_b; + } +} + +void TransposeWxH_16_C(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height) { + int i; + for (i = 0; i < width; ++i) { + int j; + for (j = 0; j < height; ++j) { + dst[i * dst_stride + j] = src[j * src_stride + i]; + } + } +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_common.cc b/source/row_common.cc index 3d1e705e..84afd35b 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2688,6 +2688,19 @@ void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) { } } +void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width) { + int x; + src += width - 1; + for (x = 0; x < width - 1; x += 2) { + dst[x] = src[0]; + dst[x + 1] = src[-1]; + src -= 2; + } + if (width & 1) { + dst[width - 1] = src[0]; + } +} + void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) { int x; src_uv += (width - 1) << 1; diff --git a/unit_test/rotate_argb_test.cc b/unit_test/rotate_argb_test.cc index 01ed69ca..74952c4e 100644 --- a/unit_test/rotate_argb_test.cc +++ b/unit_test/rotate_argb_test.cc @@ -225,4 +225,110 @@ TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) { free_aligned_buffer_page_end(src_argb); } +static void TestRotatePlane_16(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height < 1) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_stride = src_width; + int src_plane_size = src_stride * abs(src_height); + align_buffer_page_end_16(src, src_plane_size); + for (int i = 0; i < src_plane_size; ++i) { + src[i] = fastrand() & 0xff; + } + + int dst_stride = dst_width; + int dst_plane_size = dst_stride * dst_height; + align_buffer_page_end_16(dst_c, dst_plane_size); + align_buffer_page_end_16(dst_opt, dst_plane_size); + memset(dst_c, 2, dst_plane_size); + memset(dst_opt, 3, dst_plane_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + RotatePlane_16(src, src_stride, dst_c, dst_stride, src_width, src_height, + mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + RotatePlane_16(src, src_stride, dst_opt, dst_stride, src_width, src_height, + mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_c); + free_aligned_buffer_page_end_16(dst_opt); + free_aligned_buffer_page_end_16(src); +} + +TEST_F(LibYUVRotateTest, RotatePlane0_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane90_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane180_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane270_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane0_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate0, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane90_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate90, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane180_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate180, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane270_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate270, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + } // namespace libyuv diff --git a/unit_test/rotate_test.cc b/unit_test/rotate_test.cc index d3887414..9dec7811 100644 --- a/unit_test/rotate_test.cc +++ b/unit_test/rotate_test.cc @@ -596,4 +596,266 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) #undef TESTAPLANARTOP #undef TESTAPLANARTOPI +static void I010TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i010_y_size = src_width * Abs(src_height); + int src_i010_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2); + int src_i010_size = src_i010_y_size + src_i010_uv_size * 2; + align_buffer_page_end_16(src_i010, src_i010_size); + for (int i = 0; i < src_i010_size; ++i) { + src_i010[i] = fastrand() & 0x3ff; + } + + int dst_i010_y_size = dst_width * dst_height; + int dst_i010_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2); + int dst_i010_size = dst_i010_y_size + dst_i010_uv_size * 2; + align_buffer_page_end_16(dst_i010_c, dst_i010_size); + align_buffer_page_end_16(dst_i010_opt, dst_i010_size); + memset(dst_i010_c, 2, dst_i010_size * 2); + memset(dst_i010_opt, 3, dst_i010_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I010Rotate(src_i010, src_width, src_i010 + src_i010_y_size, + (src_width + 1) / 2, src_i010 + src_i010_y_size + src_i010_uv_size, + (src_width + 1) / 2, dst_i010_c, dst_width, + dst_i010_c + dst_i010_y_size, (dst_width + 1) / 2, + dst_i010_c + dst_i010_y_size + dst_i010_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I010Rotate( + src_i010, src_width, src_i010 + src_i010_y_size, (src_width + 1) / 2, + src_i010 + src_i010_y_size + src_i010_uv_size, (src_width + 1) / 2, + dst_i010_opt, dst_width, dst_i010_opt + dst_i010_y_size, + (dst_width + 1) / 2, dst_i010_opt + dst_i010_y_size + dst_i010_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i010_size; ++i) { + EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i010_c); + free_aligned_buffer_page_end_16(dst_i010_opt); + free_aligned_buffer_page_end_16(src_i010); +} + +TEST_F(LibYUVRotateTest, I010Rotate0_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate90_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate180_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate270_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +static void I210TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i210_y_size = src_width * Abs(src_height); + int src_i210_uv_size = ((src_width + 1) / 2) * Abs(src_height); + int src_i210_size = src_i210_y_size + src_i210_uv_size * 2; + align_buffer_page_end_16(src_i210, src_i210_size); + for (int i = 0; i < src_i210_size; ++i) { + src_i210[i] = fastrand() & 0x3ff; + } + + int dst_i210_y_size = dst_width * dst_height; + int dst_i210_uv_size = ((dst_width + 1) / 2) * dst_height; + int dst_i210_size = dst_i210_y_size + dst_i210_uv_size * 2; + align_buffer_page_end_16(dst_i210_c, dst_i210_size); + align_buffer_page_end_16(dst_i210_opt, dst_i210_size); + memset(dst_i210_c, 2, dst_i210_size * 2); + memset(dst_i210_opt, 3, dst_i210_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I210Rotate(src_i210, src_width, src_i210 + src_i210_y_size, + (src_width + 1) / 2, src_i210 + src_i210_y_size + src_i210_uv_size, + (src_width + 1) / 2, dst_i210_c, dst_width, + dst_i210_c + dst_i210_y_size, (dst_width + 1) / 2, + dst_i210_c + dst_i210_y_size + dst_i210_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I210Rotate( + src_i210, src_width, src_i210 + src_i210_y_size, (src_width + 1) / 2, + src_i210 + src_i210_y_size + src_i210_uv_size, (src_width + 1) / 2, + dst_i210_opt, dst_width, dst_i210_opt + dst_i210_y_size, + (dst_width + 1) / 2, dst_i210_opt + dst_i210_y_size + dst_i210_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i210_size; ++i) { + EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i210_c); + free_aligned_buffer_page_end_16(dst_i210_opt); + free_aligned_buffer_page_end_16(src_i210); +} + +TEST_F(LibYUVRotateTest, I210Rotate0_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate90_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate180_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate270_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +static void I410TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i410_y_size = src_width * Abs(src_height); + int src_i410_uv_size = src_width * Abs(src_height); + int src_i410_size = src_i410_y_size + src_i410_uv_size * 2; + align_buffer_page_end_16(src_i410, src_i410_size); + for (int i = 0; i < src_i410_size; ++i) { + src_i410[i] = fastrand() & 0x3ff; + } + + int dst_i410_y_size = dst_width * dst_height; + int dst_i410_uv_size = dst_width * dst_height; + int dst_i410_size = dst_i410_y_size + dst_i410_uv_size * 2; + align_buffer_page_end_16(dst_i410_c, dst_i410_size); + align_buffer_page_end_16(dst_i410_opt, dst_i410_size); + memset(dst_i410_c, 2, dst_i410_size * 2); + memset(dst_i410_opt, 3, dst_i410_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width, + src_i410 + src_i410_y_size + src_i410_uv_size, src_width, + dst_i410_c, dst_width, dst_i410_c + dst_i410_y_size, dst_width, + dst_i410_c + dst_i410_y_size + dst_i410_uv_size, dst_width, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width, + src_i410 + src_i410_y_size + src_i410_uv_size, src_width, + dst_i410_opt, dst_width, dst_i410_opt + dst_i410_y_size, + dst_width, dst_i410_opt + dst_i410_y_size + dst_i410_uv_size, + dst_width, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i410_size; ++i) { + EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i410_c); + free_aligned_buffer_page_end_16(dst_i410_opt); + free_aligned_buffer_page_end_16(src_i410); +} + +TEST_F(LibYUVRotateTest, I410Rotate0_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate90_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate180_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate270_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + } // namespace libyuv diff --git a/unit_test/unit_test.h b/unit_test/unit_test.h index 6cc99a65..42e40d12 100644 --- a/unit_test/unit_test.h +++ b/unit_test/unit_test.h @@ -78,6 +78,18 @@ static inline bool SizeValid(int src_width, free(var##_mem); \ var = 0 +#define align_buffer_page_end_16(var, size) \ + uint8_t* var##_mem = \ + reinterpret_cast<uint8_t*>(malloc(((size * 2) + 4095 + 63) & ~4095)); \ + uint16_t* var = reinterpret_cast<uint16_t*>( \ + (intptr_t)(var##_mem + (((size * 2) + 4095 + 63) & ~4095) - \ + (size * 2)) & \ + ~63) + +#define free_aligned_buffer_page_end_16(var) \ + free(var##_mem); \ + var = 0 + #ifdef WIN32 static inline double get_time() { LARGE_INTEGER t, f; |