aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuan Tong <tongyuan200097@gmail.com>2021-02-25 15:21:28 +0800
committerFrank Barchard <fbarchard@chromium.org>2021-02-25 23:16:54 +0000
commita8c181050c202854ae32433164e6bd5d1e7c4368 (patch)
treec300dbf9bfa59d0dc2772c311b0dfd154e08d54a
parent08815a29766a78398a8e2b9ed095280e9d0a73c2 (diff)
downloadlibyuv-a8c181050c202854ae32433164e6bd5d1e7c4368.tar.gz
Add 10/12 bit YUV To YUV functions
The following functions (and their 12 bit variant) are added: planar, 10->10: I410ToI010, I210ToI010 planar, 10->8: I410ToI444, I210ToI422 planar<->biplanar, 10->10: I010ToP010, I210ToP210, I410ToP410 P010ToI010, P210ToI210, P410ToI410 R=fbarchard@chromium.org Change-Id: I9aa2bafa0d6a6e1e38ce4e20cbb437e10f9b0158 Bug: libyuv:834, libyuv:873 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2709822 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com>
-rw-r--r--README.chromium2
-rw-r--r--include/libyuv/convert.h183
-rw-r--r--include/libyuv/convert_from.h18
-rw-r--r--include/libyuv/planar_functions.h44
-rw-r--r--include/libyuv/row.h92
-rw-r--r--include/libyuv/version.h2
-rw-r--r--include/libyuv/video_common.h11
-rw-r--r--source/convert.cc349
-rw-r--r--source/convert_from.cc44
-rw-r--r--source/planar_functions.cc210
-rw-r--r--source/row_any.cc76
-rw-r--r--source/row_common.cc46
-rw-r--r--source/row_gcc.cc108
-rw-r--r--source/row_neon.cc115
-rw-r--r--source/row_neon64.cc120
-rw-r--r--unit_test/convert_test.cc387
-rw-r--r--unit_test/video_common_test.cc5
17 files changed, 1577 insertions, 235 deletions
diff --git a/README.chromium b/README.chromium
index 01b05888..51b4a11e 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1778
+Version: 1779
License: BSD
License File: LICENSE
diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h
index 4e58ad6e..40869ef2 100644
--- a/include/libyuv/convert.h
+++ b/include/libyuv/convert.h
@@ -193,6 +193,129 @@ int I010ToI420(const uint16_t* src_y,
int width,
int height);
+#define H210ToH422 I210ToI422
+LIBYUV_API
+int I210ToI422(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+#define H410ToH444 I410ToI444
+LIBYUV_API
+int I410ToI444(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+#define H012ToH420 I012ToI420
+LIBYUV_API
+int I012ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+#define H212ToH422 I212ToI422
+LIBYUV_API
+int I212ToI422(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+#define H412ToH444 I412ToI444
+LIBYUV_API
+int I412ToI444(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+#define I412ToI012 I410ToI010
+#define H410ToH010 I410ToI010
+#define H412ToH012 I410ToI010
+LIBYUV_API
+int I410ToI010(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+#define I212ToI012 I210ToI010
+#define H210ToH010 I210ToI010
+#define H212ToH012 I210ToI010
+LIBYUV_API
+int I210ToI010(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
// Convert I010 to I410
LIBYUV_API
int I010ToI410(const uint16_t* src_y,
@@ -233,6 +356,66 @@ int I210ToI410(const uint16_t* src_y,
// Convert I212 to I412
#define I212ToI412 I210ToI410
+// Convert I010 to P010
+LIBYUV_API
+int I010ToP010(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
+// Convert I210 to P210
+LIBYUV_API
+int I210ToP210(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
+// Convert I012 to P012
+LIBYUV_API
+int I012ToP012(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
+// Convert I212 to P212
+LIBYUV_API
+int I212ToP212(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
// Convert I400 (grey) to I420.
LIBYUV_API
int I400ToI420(const uint8_t* src_y,
diff --git a/include/libyuv/convert_from.h b/include/libyuv/convert_from.h
index 5140ed4f..32f42a63 100644
--- a/include/libyuv/convert_from.h
+++ b/include/libyuv/convert_from.h
@@ -39,6 +39,24 @@ int I420ToI010(const uint8_t* src_y,
int width,
int height);
+// Convert 8 bit YUV to 12 bit.
+#define H420ToH012 I420ToI012
+LIBYUV_API
+int I420ToI012(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
LIBYUV_API
int I420ToI422(const uint8_t* src_y,
int src_stride_y,
diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h
index ce94e162..ebefb568 100644
--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -105,6 +105,50 @@ void MergeUVPlane(const uint8_t* src_u,
int width,
int height);
+// Split interleaved msb UV plane into separate lsb U and V planes.
+LIBYUV_API
+void SplitUVPlane_16(const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int depth);
+
+// Merge separate lsb U and V planes into one interleaved msb UV plane.
+LIBYUV_API
+void MergeUVPlane_16(const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height,
+ int depth);
+
+// Convert lsb plane to msb plane
+LIBYUV_API
+void ConvertToMSBPlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int depth);
+
+// Convert msb plane to lsb plane
+LIBYUV_API
+void ConvertToLSBPlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int depth);
+
// Scale U and V to half width and height and merge into interleaved UV plane.
// width and height are source size, allowing odd sizes.
// Use for converting I444 or I422 to NV12.
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 12233856..68fb88b3 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -305,6 +305,7 @@ extern "C" {
#define HAS_ARGBTORGB24ROW_AVX2
#define HAS_CONVERT16TO8ROW_AVX2
#define HAS_CONVERT8TO16ROW_AVX2
+#define HAS_DIVIDEROW_16_AVX2
#define HAS_HALFMERGEUVROW_AVX2
#define HAS_MERGEARGBROW_AVX2
#define HAS_I210TOAR30ROW_AVX2
@@ -318,6 +319,7 @@ extern "C" {
#define HAS_MULTIPLYROW_16_AVX2
#define HAS_RGBATOYJROW_AVX2
#define HAS_SPLITARGBROW_AVX2
+#define HAS_SPLITUVROW_16_AVX2
#define HAS_SWAPUVROW_AVX2
// TODO(fbarchard): Fix AVX2 version of YUV24
// #define HAS_NV21TOYUV24ROW_AVX2
@@ -363,6 +365,7 @@ extern "C" {
#define HAS_BGRATOYROW_NEON
#define HAS_BYTETOFLOATROW_NEON
#define HAS_COPYROW_NEON
+#define HAS_DIVIDEROW_16_NEON
#define HAS_HALFFLOATROW_NEON
#define HAS_HALFMERGEUVROW_NEON
#define HAS_I400TOARGBROW_NEON
@@ -380,9 +383,11 @@ extern "C" {
#define HAS_J400TOARGBROW_NEON
#define HAS_MERGEARGBROW_NEON
#define HAS_MERGEUVROW_NEON
+#define HAS_MERGEUVROW_16_NEON
#define HAS_MIRRORROW_NEON
#define HAS_MIRRORUVROW_NEON
#define HAS_MIRRORSPLITUVROW_NEON
+#define HAS_MULTIPLYROW_16_NEON
#define HAS_NV12TOARGBROW_NEON
#define HAS_NV12TORGB24ROW_NEON
#define HAS_NV12TORGB565ROW_NEON
@@ -409,6 +414,7 @@ extern "C" {
#define HAS_SPLITARGBROW_NEON
#define HAS_SPLITRGBROW_NEON
#define HAS_SPLITUVROW_NEON
+#define HAS_SPLITUVROW_16_NEON
#define HAS_SWAPUVROW_NEON
#define HAS_UYVYTOARGBROW_NEON
#define HAS_UYVYTOUV422ROW_NEON
@@ -2010,22 +2016,96 @@ void SplitXRGBRow_Any_NEON(const uint8_t* src_argb,
void MergeUVRow_16_C(const uint16_t* src_u,
const uint16_t* src_v,
uint16_t* dst_uv,
- int scale, /* 64 for 10 bit */
+ int depth,
int width);
void MergeUVRow_16_AVX2(const uint16_t* src_u,
const uint16_t* src_v,
uint16_t* dst_uv,
- int scale,
+ int depth,
int width);
+void MergeUVRow_16_Any_AVX2(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
+ int depth,
+ int width);
+void MergeUVRow_16_NEON(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
+ int depth,
+ int width);
+void MergeUVRow_16_Any_NEON(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
+ int depth,
+ int width);
+
+void SplitUVRow_16_C(const uint16_t* src_uv,
+ uint16_t* dst_u,
+ uint16_t* dst_v,
+ int depth,
+ int width);
+void SplitUVRow_16_AVX2(const uint16_t* src_uv,
+ uint16_t* dst_u,
+ uint16_t* dst_v,
+ int depth,
+ int width);
+void SplitUVRow_16_Any_AVX2(const uint16_t* src_uv,
+ uint16_t* dst_u,
+ uint16_t* dst_v,
+ int depth,
+ int width);
+void SplitUVRow_16_NEON(const uint16_t* src_uv,
+ uint16_t* dst_u,
+ uint16_t* dst_v,
+ int depth,
+ int width);
+void SplitUVRow_16_Any_NEON(const uint16_t* src_uv,
+ uint16_t* dst_u,
+ uint16_t* dst_v,
+ int depth,
+ int width);
-void MultiplyRow_16_AVX2(const uint16_t* src_y,
- uint16_t* dst_y,
- int scale,
- int width);
void MultiplyRow_16_C(const uint16_t* src_y,
uint16_t* dst_y,
int scale,
int width);
+void MultiplyRow_16_AVX2(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void MultiplyRow_16_Any_AVX2(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void MultiplyRow_16_NEON(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void MultiplyRow_16_Any_NEON(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+
+void DivideRow_16_C(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void DivideRow_16_AVX2(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void DivideRow_16_Any_AVX2(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void DivideRow_16_NEON(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void DivideRow_16_Any_NEON(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
void Convert8To16Row_C(const uint8_t* src_y,
uint16_t* dst_y,
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index ff6531bd..e59b316a 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1778
+#define LIBYUV_VERSION 1779
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/include/libyuv/video_common.h b/include/libyuv/video_common.h
index 6e408eda..0da3fb55 100644
--- a/include/libyuv/video_common.h
+++ b/include/libyuv/video_common.h
@@ -60,7 +60,7 @@ enum FourCC {
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
FOURCC_I010 = FOURCC('I', '0', '1', '0'), // bt.601 10 bit 420
- FOURCC_I210 = FOURCC('I', '0', '1', '0'), // bt.601 10 bit 422
+ FOURCC_I210 = FOURCC('I', '2', '1', '0'), // bt.601 10 bit 422
// 1 Secondary YUV format: row biplanar. deprecated.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
@@ -109,6 +109,8 @@ enum FourCC {
FOURCC_F210 = FOURCC('F', '2', '1', '0'), // bt.709 full range 10 bit 422
FOURCC_H210 = FOURCC('H', '2', '1', '0'), // bt.709 10 bit 422
FOURCC_U210 = FOURCC('U', '2', '1', '0'), // bt.2020 10 bit 422
+ FOURCC_P010 = FOURCC('P', '0', '1', '0'),
+ FOURCC_P210 = FOURCC('P', '2', '1', '0'),
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
@@ -178,7 +180,12 @@ enum FourCCBpp {
FOURCC_BPP_J400 = 8,
FOURCC_BPP_H420 = 12,
FOURCC_BPP_H422 = 16,
- FOURCC_BPP_H010 = 24,
+ FOURCC_BPP_I010 = 15,
+ FOURCC_BPP_I210 = 20,
+ FOURCC_BPP_H010 = 15,
+ FOURCC_BPP_H210 = 20,
+ FOURCC_BPP_P010 = 15,
+ FOURCC_BPP_P210 = 20,
FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12,
diff --git a/source/convert.cc b/source/convert.cc
index b0314df4..1bd59659 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -149,6 +149,52 @@ int I010Copy(const uint16_t* src_y,
return 0;
}
+static int Planar16bitTo8bit(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int subsample_x,
+ int subsample_y,
+ int depth) {
+ int uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
+ int uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
+ int scale = 1 << (24 - depth);
+ if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ uv_height = -uv_height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (uv_height - 1) * src_stride_u;
+ src_v = src_v + (uv_height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ // Convert Y plane.
+ Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
+ height);
+ // Convert UV planes.
+ Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, scale, uv_width,
+ uv_height);
+ Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, scale, uv_width,
+ uv_height);
+ return 0;
+}
+
// Convert 10 bit YUV to 8 bit.
LIBYUV_API
int I010ToI420(const uint16_t* src_y,
@@ -165,34 +211,295 @@ int I010ToI420(const uint16_t* src_y,
int dst_stride_v,
int width,
int height) {
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, width, height, 1,
+ 1, 10);
+}
+
+LIBYUV_API
+int I210ToI422(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, width, height, 1,
+ 0, 10);
+}
+
+LIBYUV_API
+int I410ToI444(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, width, height, 0,
+ 0, 10);
+}
+
+LIBYUV_API
+int I012ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, width, height, 1,
+ 1, 12);
+}
+
+LIBYUV_API
+int I212ToI422(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, width, height, 1,
+ 0, 12);
+}
+
+LIBYUV_API
+int I412ToI444(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, width, height, 0,
+ 0, 12);
+}
+
+// Any Ix10 To I010 format with mirroring.
+static int Ix10ToI010(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int subsample_x,
+ int subsample_y) {
+ const int dst_y_width = Abs(width);
+ const int dst_y_height = Abs(height);
+ const int src_uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
+ const int src_uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
+ const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
+ const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
+ if (width <= 0 || height == 0) {
return -1;
}
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (halfheight - 1) * src_stride_u;
- src_v = src_v + (halfheight - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
+ if (dst_y) {
+ ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+ dst_y_width, dst_y_height, kFilterBilinear);
}
+ ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+ dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+ ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+ dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+ return 0;
+}
- // Convert Y plane.
- Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, 16384, width,
- height);
- // Convert UV planes.
- Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, 16384, halfwidth,
- halfheight);
- Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, 16384, halfwidth,
- halfheight);
+LIBYUV_API
+int I410ToI010(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return Ix10ToI010(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, 0, 0);
+}
+
+LIBYUV_API
+int I210ToI010(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return Ix10ToI010(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, 1, 0);
+}
+
+// Any I[420]1[02] to P[420]1[02] format with mirroring.
+static int Ix1xToPx1x(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height,
+ int subsample_x,
+ int subsample_y,
+ int depth) {
+ const int uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
+ const int uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
+ if (width <= 0 || height == 0) {
+ return -1;
+ }
+
+ ConvertToMSBPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height,
+ depth);
+ MergeUVPlane_16(src_u, src_stride_u, src_v, src_stride_v, dst_uv,
+ dst_stride_uv, uv_width, uv_height, depth);
return 0;
}
+LIBYUV_API
+int I010ToP010(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ return Ix1xToPx1x(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
+ width, height, 1, 1, 10);
+}
+
+LIBYUV_API
+int I210ToP210(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ return Ix1xToPx1x(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
+ width, height, 1, 0, 10);
+}
+
+LIBYUV_API
+int I012ToP012(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ return Ix1xToPx1x(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
+ width, height, 1, 1, 12);
+}
+
+LIBYUV_API
+int I212ToP212(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ return Ix1xToPx1x(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
+ width, height, 1, 0, 12);
+}
+
// 422 chroma is 1/2 width, 1x height
// 420 chroma is 1/2 width, 1/2 height
LIBYUV_API
diff --git a/source/convert_from.cc b/source/convert_from.cc
index 591e2782..687f0a72 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -111,6 +111,50 @@ int I420ToI010(const uint8_t* src_y,
return 0;
}
+// Convert 8 bit YUV to 12 bit.
+LIBYUV_API
+int I420ToI012(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (halfheight - 1) * src_stride_u;
+ src_v = src_v + (halfheight - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ // Convert Y plane.
+ Convert8To16Plane(src_y, src_stride_y, dst_y, dst_stride_y, 4096, width,
+ height);
+ // Convert UV planes.
+ Convert8To16Plane(src_u, src_stride_u, dst_u, dst_stride_u, 4096, halfwidth,
+ halfheight);
+ Convert8To16Plane(src_v, src_stride_v, dst_v, dst_stride_v, 4096, halfwidth,
+ halfheight);
+ return 0;
+}
+
// 420 chroma is 1/2 width, 1/2 height
// 422 chroma is 1/2 width, 1x height
LIBYUV_API
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 069be7fd..219c2165 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -550,6 +550,216 @@ void MergeUVPlane(const uint8_t* src_u,
}
}
+// Support function for P010 etc UV channels.
+// Width and height are plane sizes (typically half pixel width).
+LIBYUV_API
+void SplitUVPlane_16(const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int depth) {
+ int y;
+ int scale = 1 << depth;
+ void (*SplitUVRow)(const uint16_t* src_uv, uint16_t* dst_u, uint16_t* dst_v,
+ int scale, int width) = SplitUVRow_16_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_u = dst_u + (height - 1) * dst_stride_u;
+ dst_v = dst_v + (height - 1) * dst_stride_v;
+ dst_stride_u = -dst_stride_u;
+ dst_stride_v = -dst_stride_v;
+ }
+ // Coalesce rows.
+ if (src_stride_uv == width * 2 && dst_stride_u == width &&
+ dst_stride_v == width) {
+ width *= height;
+ height = 1;
+ src_stride_uv = dst_stride_u = dst_stride_v = 0;
+ }
+#if defined(HAS_SPLITUVROW_16_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ SplitUVRow = SplitUVRow_16_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ SplitUVRow = SplitUVRow_16_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SplitUVRow = SplitUVRow_16_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ SplitUVRow = SplitUVRow_16_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ // Copy a row of UV.
+ SplitUVRow(src_uv, dst_u, dst_v, scale, width);
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ src_uv += src_stride_uv;
+ }
+}
+
+LIBYUV_API
+void MergeUVPlane_16(const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height,
+ int depth) {
+ int y;
+ int scale = 1 << (16 - depth);
+ void (*MergeUVRow)(const uint16_t* src_u, const uint16_t* src_v,
+ uint16_t* dst_uv, int scale, int width) = MergeUVRow_16_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_uv = dst_uv + (height - 1) * dst_stride_uv;
+ dst_stride_uv = -dst_stride_uv;
+ }
+ // Coalesce rows.
+ if (src_stride_u == width && src_stride_v == width &&
+ dst_stride_uv == width * 2) {
+ width *= height;
+ height = 1;
+ src_stride_u = src_stride_v = dst_stride_uv = 0;
+ }
+#if defined(HAS_MERGEUVROW_16_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MergeUVRow = MergeUVRow_16_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ MergeUVRow = MergeUVRow_16_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MergeUVRow = MergeUVRow_16_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ MergeUVRow = MergeUVRow_16_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ // Merge a row of U and V into a row of UV.
+ MergeUVRow(src_u, src_v, dst_uv, scale, width);
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ dst_uv += dst_stride_uv;
+ }
+}
+
+// Convert plane from lsb to msb
+LIBYUV_API
+void ConvertToMSBPlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int depth) {
+ int y;
+ int scale = 1 << (16 - depth);
+ void (*MultiplyRow)(const uint16_t* src_y, uint16_t* dst_y, int scale,
+ int width) = MultiplyRow_16_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_y = dst_stride_y = 0;
+ }
+
+#if defined(HAS_MULTIPLYROW_16_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MultiplyRow = MultiplyRow_16_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ MultiplyRow = MultiplyRow_16_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_MULTIPLYROW_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MultiplyRow = MultiplyRow_16_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ MultiplyRow = MultiplyRow_16_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ MultiplyRow(src_y, dst_y, scale, width);
+ src_y += src_stride_y;
+ dst_y += dst_stride_y;
+ }
+}
+
+// Convert plane from msb to lsb
+LIBYUV_API
+void ConvertToLSBPlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int depth) {
+ int y;
+ int scale = 1 << depth;
+ void (*DivideRow)(const uint16_t* src_y, uint16_t* dst_y, int scale,
+ int width) = DivideRow_16_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_y = dst_stride_y = 0;
+ }
+
+#if defined(HAS_DIVIDEROW_16_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ DivideRow = DivideRow_16_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ DivideRow = DivideRow_16_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_DIVIDEROW_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ DivideRow = DivideRow_16_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ DivideRow = DivideRow_16_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ DivideRow(src_y, dst_y, scale, width);
+ src_y += src_stride_y;
+ dst_y += dst_stride_y;
+ }
+}
+
// Swap U and V channels in interleaved UV plane.
LIBYUV_API
void SwapUVPlane(const uint8_t* src_uv,
diff --git a/source/row_any.cc b/source/row_any.cc
index 57c39d5d..08ae1d2a 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -546,6 +546,32 @@ ANY21C(NV12ToRGB565Row_Any_MMI, NV12ToRGB565Row_MMI, 1, 1, 2, 2, 7)
#endif
#undef ANY21C
+// Any 2 16 bit planes with parameter to 1
+#define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \
+ void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \
+ int width) { \
+ SIMD_ALIGNED(T temp[16 * 4]); \
+ memset(temp, 0, 16 * 4); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_u, src_v, dst_uv, depth, n); \
+ } \
+ memcpy(temp, src_u + n, r * BPP); \
+ memcpy(temp + 16, src_v + n, r * BPP); \
+ ANY_SIMD(temp, temp + 16, temp + 32, depth, MASK + 1); \
+ memcpy(dst_uv + n * 2, temp + 32, r * BPP * 2); \
+ }
+
+#ifdef HAS_MERGEUVROW_16_AVX2
+ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 15)
+#endif
+#ifdef HAS_MERGEUVROW_16_NEON
+ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7)
+#endif
+
+#undef ANY21CT
+
// Any 1 to 1.
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
@@ -1126,6 +1152,30 @@ ANY11C(Convert8To16Row_Any_AVX2,
uint16_t,
31)
#endif
+#ifdef HAS_MULTIPLYROW_16_AVX2
+ANY11C(MultiplyRow_16_Any_AVX2,
+ MultiplyRow_16_AVX2,
+ 2,
+ 2,
+ uint16_t,
+ uint16_t,
+ 31)
+#endif
+#ifdef HAS_MULTIPLYROW_16_NEON
+ANY11C(MultiplyRow_16_Any_NEON,
+ MultiplyRow_16_NEON,
+ 2,
+ 2,
+ uint16_t,
+ uint16_t,
+ 15)
+#endif
+#ifdef HAS_DIVIDEROW_16_AVX2
+ANY11C(DivideRow_16_Any_AVX2, DivideRow_16_AVX2, 2, 2, uint16_t, uint16_t, 31)
+#endif
+#ifdef HAS_DIVIDEROW_16_NEON
+ANY11C(DivideRow_16_Any_NEON, DivideRow_16_NEON, 2, 2, uint16_t, uint16_t, 15)
+#endif
#undef ANY11C
// Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts.
@@ -1405,6 +1455,32 @@ ANY12(YUY2ToUV422Row_Any_MMI, YUY2ToUV422Row_MMI, 1, 4, 1, 15)
#endif
#undef ANY12
+// Any 2 16 bit planes with parameter to 1
+#define ANY12PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \
+ void NAMEANY(const T* src_uv, T* dst_u, T* dst_v, int depth, int width) { \
+ SIMD_ALIGNED(T temp[16 * 4]); \
+ memset(temp, 0, 16 * 4 * BPP); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_uv, dst_u, dst_v, depth, n); \
+ } \
+ memcpy(temp, src_uv + n * 2, r * BPP * 2); \
+ ANY_SIMD(temp, temp + 32, temp + 48, depth, MASK + 1); \
+ memcpy(dst_u + n, temp + 32, r * BPP); \
+ memcpy(dst_v + n, temp + 48, r * BPP); \
+ }
+
+#ifdef HAS_SPLITUVROW_16_AVX2
+ANY12PT(SplitUVRow_16_Any_AVX2, SplitUVRow_16_AVX2, uint16_t, 2, 15)
+#endif
+
+#ifdef HAS_SPLITUVROW_16_NEON
+ANY12PT(SplitUVRow_16_Any_NEON, SplitUVRow_16_NEON, uint16_t, 2, 7)
+#endif
+
+#undef ANY21CT
+
// Any 1 to 3. Outputs RGB planes.
#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
diff --git a/source/row_common.cc b/source/row_common.cc
index eb889c83..a941c3f5 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -2521,27 +2521,33 @@ void MergeXRGBRow_C(const uint8_t* src_r,
}
}
-// Use scale to convert lsb formats to msb, depending how many bits there are:
-// 128 = 9 bits
-// 64 = 10 bits
-// 16 = 12 bits
-// 1 = 16 bits
+// Convert lsb formats to msb, depending on sample depth.
void MergeUVRow_16_C(const uint16_t* src_u,
const uint16_t* src_v,
uint16_t* dst_uv,
- int scale,
+ int depth,
int width) {
+ int shift = 16 - depth;
int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_uv[0] = src_u[x] * scale;
- dst_uv[1] = src_v[x] * scale;
- dst_uv[2] = src_u[x + 1] * scale;
- dst_uv[3] = src_v[x + 1] * scale;
- dst_uv += 4;
+ for (x = 0; x < width; ++x) {
+ dst_uv[0] = src_u[x] << shift;
+ dst_uv[1] = src_v[x] << shift;
+ dst_uv += 2;
}
- if (width & 1) {
- dst_uv[0] = src_u[width - 1] * scale;
- dst_uv[1] = src_v[width - 1] * scale;
+}
+
+// Convert msb formats to lsb, depending on sample depth.
+void SplitUVRow_16_C(const uint16_t* src_uv,
+ uint16_t* dst_u,
+ uint16_t* dst_v,
+ int depth,
+ int width) {
+ int shift = 16 - depth;
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_u[x] = src_uv[0] >> shift;
+ dst_v[x] = src_uv[1] >> shift;
+ src_uv += 2;
}
}
@@ -2555,6 +2561,16 @@ void MultiplyRow_16_C(const uint16_t* src_y,
}
}
+void DivideRow_16_C(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_y[x] = (src_y[x] * scale) >> 16;
+ }
+}
+
// Use scale to convert lsb formats to msb, depending how many bits there are:
// 32768 = 9 bits
// 16384 = 10 bits
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index cf87d46e..faf0fc91 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -3653,22 +3653,18 @@ void MergeUVRow_SSE2(const uint8_t* src_u,
}
#endif // HAS_MERGEUVROW_SSE2
-// Use scale to convert lsb formats to msb, depending how many bits there are:
-// 128 = 9 bits
-// 64 = 10 bits
-// 16 = 12 bits
-// 1 = 16 bits
#ifdef HAS_MERGEUVROW_16_AVX2
void MergeUVRow_16_AVX2(const uint16_t* src_u,
const uint16_t* src_v,
uint16_t* dst_uv,
- int scale,
+ int depth,
int width) {
+ depth = 16 - depth;
// clang-format off
asm volatile (
"vmovd %4,%%xmm3 \n"
"vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
- "vbroadcastss %%xmm3,%%ymm3 \n"
+ "vbroadcastss %%xmm3,%%xmm3 \n"
"sub %0,%1 \n"
// 16 pixels per loop.
@@ -3678,8 +3674,8 @@ void MergeUVRow_16_AVX2(const uint16_t* src_u,
"vmovdqu (%0,%1,1),%%ymm1 \n"
"add $0x20,%0 \n"
- "vpmullw %%ymm3,%%ymm0,%%ymm0 \n"
- "vpmullw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpsllw %%xmm3,%%ymm0,%%ymm0 \n"
+ "vpsllw %%xmm3,%%ymm1,%%ymm1 \n"
"vpunpcklwd %%ymm1,%%ymm0,%%ymm2 \n" // mutates
"vpunpckhwd %%ymm1,%%ymm0,%%ymm0 \n"
"vextractf128 $0x0,%%ymm2,(%2) \n"
@@ -3694,12 +3690,62 @@ void MergeUVRow_16_AVX2(const uint16_t* src_u,
"+r"(src_v), // %1
"+r"(dst_uv), // %2
"+r"(width) // %3
- : "r"(scale) // %4
+ : "r"(depth) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3");
// clang-format on
}
#endif // HAS_MERGEUVROW_AVX2
+#ifdef HAS_MERGEUVROW_16_AVX2
+const uvec8 kSplitUVShuffle16 = {0, 1, 4, 5, 8, 9, 12, 13,
+ 2, 3, 6, 7, 10, 11, 14, 15};
+void SplitUVRow_16_AVX2(const uint16_t* src_uv,
+ uint16_t* dst_u,
+ uint16_t* dst_v,
+ int depth,
+ int width) {
+ depth = 16 - depth;
+ // clang-format off
+ asm volatile (
+ "vmovd %4,%%xmm3 \n"
+ "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
+ "vbroadcastss %%xmm3,%%xmm3 \n"
+ "vbroadcastf128 %5,%%ymm4 \n"
+ "sub %1,%2 \n"
+
+ // 16 pixels per loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "add $0x40,%0 \n"
+
+ "vpsrlw %%xmm3,%%ymm0,%%ymm0 \n"
+ "vpsrlw %%xmm3,%%ymm1,%%ymm1 \n"
+ "vpshufb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpshufb %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vextractf128 $0x0,%%ymm0,(%1) \n"
+ "vextractf128 $0x0,%%ymm1,0x10(%1) \n"
+ "vextractf128 $0x1,%%ymm0,(%1,%2) \n"
+ "vextractf128 $0x1,%%ymm1,0x10(%1,%2) \n"
+ "add $0x20,%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width), // %3
+ "+r"(depth) // %4
+ :
+ "m"(kSplitUVShuffle16) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
+ // clang-format on
+}
+#endif // HAS_MERGEUVROW_AVX2
+
// Use scale to convert lsb formats to msb, depending how many bits there are:
// 128 = 9 bits
// 64 = 10 bits
@@ -3717,7 +3763,7 @@ void MultiplyRow_16_AVX2(const uint16_t* src_y,
"vbroadcastss %%xmm3,%%ymm3 \n"
"sub %0,%1 \n"
- // 16 pixels per loop.
+ // 32 pixels per loop.
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
@@ -3739,6 +3785,46 @@ void MultiplyRow_16_AVX2(const uint16_t* src_y,
}
#endif // HAS_MULTIPLYROW_16_AVX2
+// Use scale to convert msb formats to lsb, depending how many bits there are:
+// 512 = 9 bits
+// 1024 = 10 bits
+// 4096 = 12 bits
+// 65536 = 16 bits
+#ifdef HAS_DIVIDEROW_16_AVX2
+void DivideRow_16_AVX2(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ // clang-format off
+ asm volatile (
+ "vmovd %3,%%xmm3 \n"
+ "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
+ "vbroadcastss %%xmm3,%%ymm3 \n"
+ "sub %0,%1 \n"
+
+ // 32 pixels per loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vpmulhuw %%ymm3,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vmovdqu %%ymm0,(%0,%1) \n"
+ "vmovdqu %%ymm1,0x20(%0,%1) \n"
+ "add $0x40,%0 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(width), // %2
+ "+r"(scale) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm3");
+ // clang-format on
+}
+#endif // HAS_MULTIPLYROW_16_AVX2
+
// Use scale to convert lsb formats to msb, depending how many bits there are:
// 32768 = 9 bits
// 16384 = 10 bits
diff --git a/source/row_neon.cc b/source/row_neon.cc
index e54cb12b..43a2cac7 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -3166,6 +3166,121 @@ void HalfMergeUVRow_NEON(const uint8_t* src_u,
: "cc", "memory", "q0", "q1", "q2", "q3");
}
+void SplitUVRow_16_NEON(const uint16_t* src_uv,
+ uint16_t* dst_u,
+ uint16_t* dst_v,
+ int depth,
+ int width) {
+ asm volatile(
+ "vdup.32 q0, %3 \n"
+ "1: \n"
+ "vld2.16 {q1, q2}, [%0]! \n" // load 8 UV
+ "vmovl.u16 q3, d2 \n"
+ "vmovl.u16 q4, d3 \n"
+ "vshl.u32 q3, q3, q0 \n"
+ "vshl.u32 q4, q4, q0 \n"
+ "vmovn.u32 d2, q3 \n"
+ "vmovn.u32 d3, q4 \n"
+ "vmovl.u16 q3, d4 \n"
+ "vmovl.u16 q4, d5 \n"
+ "vshl.u32 q3, q3, q0 \n"
+ "vshl.u32 q4, q4, q0 \n"
+ "vmovn.u32 d4, q3 \n"
+ "vmovn.u32 d5, q4 \n"
+ "subs %4, %4, #8 \n" // 8 src pixels per loop
+ "vst1.16 {q1}, [%1]! \n" // store 8 U pixels
+ "vst1.16 {q2}, [%2]! \n" // store 8 V pixels
+ "bgt 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(depth), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4");
+}
+
+void MergeUVRow_16_NEON(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
+ int depth,
+ int width) {
+ int shift = 16 - depth;
+ asm volatile(
+ "vdup.16 q2, %3 \n"
+ "1: \n"
+ "vld1.16 {q0}, [%0]! \n" // load 8 U
+ "vld1.16 {q1}, [%1]! \n" // load 8 V
+ "vshl.u16 q0, q0, q2 \n"
+ "vshl.u16 q1, q1, q2 \n"
+ "subs %4, %4, #8 \n" // 8 src pixels per loop
+ "vst2.16 {q0, q1}, [%2]! \n" // store 8 UV pixels
+ "bgt 1b \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(shift), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2");
+}
+
+void MultiplyRow_16_NEON(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ asm volatile(
+ "vdup.16 q2, %2 \n"
+ "1: \n"
+ "vld1.16 {q0}, [%0]! \n"
+ "vld1.16 {q1}, [%0]! \n"
+ "vmul.u16 q0, q0, q2 \n"
+ "vmul.u16 q1, q1, q2 \n"
+ "vst1.16 {q0}, [%1]! \n"
+ "vst1.16 {q1}, [%1]! \n"
+ "subs %3, %3, #16 \n" // 16 src pixels per loop
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(scale), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2");
+}
+
+void DivideRow_16_NEON(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ asm volatile(
+ "vdup.16 q0, %2 \n"
+ "1: \n"
+ "vld1.16 {q1}, [%0]! \n"
+ "vld1.16 {q2}, [%0]! \n"
+ "vmovl.u16 q3, d2 \n"
+ "vmovl.u16 q1, d3 \n"
+ "vmovl.u16 q4, d4 \n"
+ "vmovl.u16 q2, d5 \n"
+ "vshl.u32 q3, q3, q0 \n"
+ "vshl.u32 q4, q4, q0 \n"
+ "vshl.u32 q1, q1, q0 \n"
+ "vshl.u32 q2, q2, q0 \n"
+ "vmovn.u32 d2, q3 \n"
+ "vmovn.u32 d3, q1 \n"
+ "vmovn.u32 d4, q4 \n"
+ "vmovn.u32 d5, q2 \n"
+ "vst1.16 {q1}, [%1]! \n"
+ "vst1.16 {q2}, [%1]! \n"
+ "subs %3, %3, #16 \n" // 16 src pixels per loop
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(scale), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4");
+}
+
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)..
#ifdef __cplusplus
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index acefd96d..941c9b98 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -3526,6 +3526,126 @@ void HalfMergeUVRow_NEON(const uint8_t* src_u,
: "cc", "memory", "v0", "v1", "v2", "v3");
}
+void SplitUVRow_16_NEON(const uint16_t* src_uv,
+ uint16_t* dst_u,
+ uint16_t* dst_v,
+ int depth,
+ int width) {
+ asm volatile(
+ "dup v0.4s, %w3 \n"
+ "1: \n"
+ "ld2 {v1.8h, v2.8h}, [%0], #32 \n" // load 8 UV
+ "prfm pldl1keep, [%0, 448] \n"
+ "ushll v3.4s, v1.4h, #0 \n"
+ "ushll2 v4.4s, v1.8h, #0 \n"
+ "ushl v3.4s, v3.4s, v0.4s \n"
+ "ushl v4.4s, v4.4s, v0.4s \n"
+ "xtn v1.4h, v3.4s \n"
+ "xtn2 v1.8h, v4.4s \n"
+ "ushll v3.4s, v2.4h, #0 \n"
+ "ushll2 v4.4s, v2.8h, #0 \n"
+ "ushl v3.4s, v3.4s, v0.4s \n"
+ "ushl v4.4s, v4.4s, v0.4s \n"
+ "xtn v2.4h, v3.4s \n"
+ "xtn2 v2.8h, v4.4s \n"
+ "subs %w4, %w4, #8 \n" // 8 src pixels per loop
+ "st1 {v1.8h}, [%1], #16 \n" // store 8 U pixels
+ "st1 {v2.8h}, [%2], #16 \n" // store 8 V pixels
+ "b.gt 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(depth), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4");
+}
+
+void MergeUVRow_16_NEON(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
+ int depth,
+ int width) {
+ int shift = 16 - depth;
+ asm volatile(
+ "dup v2.8h, %w3 \n"
+ "1: \n"
+ "ld1 {v0.8h}, [%0], #16 \n" // load 8 U
+ "prfm pldl1keep, [%0, 448] \n"
+ "ld1 {v1.8h}, [%1], #16 \n" // load 8 V
+ "prfm pldl1keep, [%1, 448] \n"
+ "ushl v0.8h, v0.8h, v2.8h \n"
+ "ushl v1.8h, v1.8h, v2.8h \n"
+ "subs %w4, %w4, #8 \n" // 8 src pixels per loop
+ "st2 {v0.8h, v1.8h}, [%2], #32 \n" // store 8 UV pixels
+ "b.gt 1b \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(shift), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2");
+}
+
+void MultiplyRow_16_NEON(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ asm volatile(
+ "dup v2.8h, %w2 \n"
+ "1: \n"
+ "ldp q0, q1, [%0] \n"
+ "add %0, %0, #32 \n"
+ "prfm pldl1keep, [%0, 448] \n"
+ "mul v0.8h, v0.8h, v2.8h \n"
+ "mul v1.8h, v1.8h, v2.8h \n"
+ "stp q0, q1, [%1] \n" // store 16 pixels
+ "add %1, %1, #32 \n"
+ "subs %w3, %w3, #16 \n" // 16 src pixels per loop
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(scale), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2");
+}
+
+void DivideRow_16_NEON(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ asm volatile(
+ "dup v0.8h, %w2 \n"
+ "1: \n"
+ "ldp q1, q2, [%0] \n"
+ "add %0, %0, #32 \n"
+ "prfm pldl1keep, [%0, 448] \n"
+ "ushll v3.4s, v1.4h, #0 \n"
+ "ushll v4.4s, v2.4h, #0 \n"
+ "ushll2 v1.4s, v1.8h, #0 \n"
+ "ushll2 v2.4s, v2.8h, #0 \n"
+ "mul v3.4s, v0.4s, v3.4s \n"
+ "mul v4.4s, v0.4s, v4.4s \n"
+ "mul v1.4s, v0.4s, v1.4s \n"
+ "mul v2.4s, v0.4s, v2.4s \n"
+ "shrn v3.4h, v3.4s, #16 \n"
+ "shrn v4.4h, v4.4s, #16 \n"
+ "shrn2 v3.8h, v1.4s, #16 \n"
+ "shrn2 v4.8h, v2.4s, #16 \n"
+ "stp q3, q3, [%1] \n" // store 16 pixels
+ "add %1, %1, #32 \n"
+ "subs %w3, %w3, #16 \n" // 16 src pixels per loop
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(scale), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4");
+}
+
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index 50593160..8638a84c 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -158,15 +158,26 @@ TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1, 8)
TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I444, uint8_t, 1, 1, 1, 8)
TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1, 8)
TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10)
-TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I012, uint16_t, 2, 2, 2, 8)
TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2, 10)
TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2, 10)
TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H012, uint16_t, 2, 2, 2, 8)
TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I410, uint16_t, 2, 1, 1, 10)
TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I410, uint16_t, 2, 1, 1, 10)
TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I412, uint16_t, 2, 1, 1, 12)
TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I412, uint16_t, 2, 1, 1, 12)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
+TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
// Test Android 420 to I420
#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \
@@ -292,63 +303,74 @@ int I400ToNV21(const uint8_t* src_y,
dst_stride_vu, width, height);
}
-#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
+#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
+ SRC_DEPTH) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
+ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
+ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
+ static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
+ "SRC_SUBSAMP_X unsupported"); \
+ static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
+ "SRC_SUBSAMP_Y unsupported"); \
+ static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
+ "DST_SUBSAMP_X unsupported"); \
+ static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
+ "DST_SUBSAMP_Y unsupported"); \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
- align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
- align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
- OFF); \
- align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
- OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- for (int i = 0; i < kHeight; ++i) \
- for (int j = 0; j < kWidth; ++j) \
- src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
- for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
- src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
- (fastrand() & 0xff); \
- src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
- (fastrand() & 0xff); \
- } \
+ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
+ const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
+ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
+ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(src_u, \
+ kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(src_v, \
+ kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_c, \
+ kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_opt, \
+ kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
+ MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
+ MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
+ MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
+ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
+ SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \
+ SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \
} \
- memset(dst_y_c, 1, kWidth* kHeight); \
- memset(dst_uv_c, 2, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_y_opt, 101, kWidth* kHeight); \
- memset(dst_uv_opt, 102, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \
+ src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \
+ src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
+ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
MaskCpuFlags(disable_cpu_flags_); \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
- src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
- dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
+ SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \
+ src_v_p, kSrcHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_c), \
+ kDstHalfWidth * 2, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
- src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
- dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
+ src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_opt), kDstHalfWidth * 2, kWidth, \
+ NEG kHeight); \
} \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
- } \
+ for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
+ EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
} \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \
- EXPECT_EQ(dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j], \
- dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]); \
- } \
+ for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) { \
+ EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \
} \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_uv_c); \
@@ -359,23 +381,33 @@ int I400ToNV21(const uint8_t* src_y,
free_aligned_buffer_page_end(src_v); \
}
-#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0)
-
-TESTPLANARTOBP(I420, 2, 2, NV12, 2, 2)
-TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2)
-TESTPLANARTOBP(I422, 2, 1, NV21, 2, 2)
-TESTPLANARTOBP(I444, 1, 1, NV12, 2, 2)
-TESTPLANARTOBP(I444, 1, 1, NV21, 2, 2)
-TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
+#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0, SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1, \
+ SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+
+TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I422, uint8_t, 1, 2, 1, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV12, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I400, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I010, uint16_t, 2, 2, 2, P010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
+TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
@@ -385,13 +417,13 @@ TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
- "SRC_SUBSAMP_X unsupported"); \
+ "SRC_SUBSAMP_X unsupported"); \
static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
- "SRC_SUBSAMP_Y unsupported"); \
+ "SRC_SUBSAMP_Y unsupported"); \
static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
- "DST_SUBSAMP_X unsupported"); \
+ "DST_SUBSAMP_X unsupported"); \
static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
- "DST_SUBSAMP_Y unsupported"); \
+ "DST_SUBSAMP_Y unsupported"); \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
@@ -407,15 +439,15 @@ TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_uv_opt, \
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
- MemRandomize(src_uv + OFF, 2 * kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
- src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \
+ src_y_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
- for (int i = 0; i < 2 * kSrcHalfWidth * kSrcHalfHeight; ++i) { \
- src_uv_p[i] = src_uv_p[i] & ((1 << SRC_DEPTH) - 1); \
+ for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight * 2; ++i) { \
+ src_uv_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
@@ -483,112 +515,111 @@ TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8)
TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8)
TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8)
TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8)
-// These formats put data in high bits, so test on full 16bit range.
-TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 16)
-TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 16)
-TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 16)
-TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 16)
-TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 16)
-TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 16)
-
-#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \
- DOY) \
- TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
- const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
- const int kHeight = benchmark_height_; \
- align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
- align_buffer_page_end(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
- OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight); \
- align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
- align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- for (int i = 0; i < kHeight; ++i) \
- for (int j = 0; j < kWidth; ++j) \
- src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
- for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
- for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
- src_uv[(i * 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
- (fastrand() & 0xff); \
- } \
- } \
- memset(dst_y_c, 1, kWidth* kHeight); \
- memset(dst_u_c, 2, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_v_c, 3, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_y_opt, 101, kWidth* kHeight); \
- memset(dst_u_opt, 102, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_v_opt, 103, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- MaskCpuFlags(disable_cpu_flags_); \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_uv + OFF, \
- 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_c : NULL, kWidth, \
- dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \
- SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_uv + OFF, \
- 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_opt : NULL, \
- kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_opt, \
- SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
- } \
- if (DOY) { \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
- } \
- } \
- } \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
- EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
- dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
- } \
- } \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
- EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
- dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
- } \
- } \
- free_aligned_buffer_page_end(dst_y_c); \
- free_aligned_buffer_page_end(dst_u_c); \
- free_aligned_buffer_page_end(dst_v_c); \
- free_aligned_buffer_page_end(dst_y_opt); \
- free_aligned_buffer_page_end(dst_u_opt); \
- free_aligned_buffer_page_end(dst_v_opt); \
- free_aligned_buffer_page_end(src_y); \
- free_aligned_buffer_page_end(src_uv); \
+TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10)
+TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10)
+TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10)
+TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12)
+TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12)
+TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12)
+
+#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
+ SRC_DEPTH) \
+ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
+ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
+ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
+ static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
+ "SRC_SUBSAMP_X unsupported"); \
+ static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
+ "SRC_SUBSAMP_Y unsupported"); \
+ static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
+ "DST_SUBSAMP_X unsupported"); \
+ static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
+ "DST_SUBSAMP_Y unsupported"); \
+ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kHeight = benchmark_height_; \
+ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
+ const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
+ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
+ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(src_uv, \
+ kSrcHalfWidth* kSrcHalfHeight* SRC_BPC * 2 + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
+ SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ src_y_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
+ } \
+ for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight * 2; ++i) { \
+ src_uv_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
+ memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
+ memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \
+ reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
+ reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \
+ NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \
+ reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
+ reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \
+ NEG kHeight); \
+ } \
+ for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
+ EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
+ } \
+ for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \
+ EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \
+ EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_u_c); \
+ free_aligned_buffer_page_end(dst_v_c); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_u_opt); \
+ free_aligned_buffer_page_end(dst_v_opt); \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_uv); \
}
-#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0, 1) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1, \
- 1) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
- SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0)
-
-TESTBIPLANARTOP(NV12, 2, 2, I420, 2, 2)
-TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2)
+#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \
+ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0, SRC_DEPTH) \
+ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1, \
+ SRC_DEPTH) \
+ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH) \
+ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+
+TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8)
+TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8)
// Provide matrix wrappers for full range bt.709
#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
diff --git a/unit_test/video_common_test.cc b/unit_test/video_common_test.cc
index eb183aaa..6c6a384d 100644
--- a/unit_test/video_common_test.cc
+++ b/unit_test/video_common_test.cc
@@ -81,6 +81,11 @@ TEST_F(LibYUVBaseTest, TestFourCC) {
EXPECT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420));
EXPECT_TRUE(TestValidFourCC(FOURCC_H422, FOURCC_BPP_H422));
EXPECT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_H210, FOURCC_BPP_H210));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_I010, FOURCC_BPP_I010));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_I210, FOURCC_BPP_I210));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_P010, FOURCC_BPP_P010));
+ EXPECT_TRUE(TestValidFourCC(FOURCC_P210, FOURCC_BPP_P210));
EXPECT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG));
EXPECT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12));
EXPECT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16));