aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2022-11-16 18:02:34 -0800
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-11-17 02:47:57 +0000
commit2d2cee418a18b9f1bfa6b8037d4f8da095720695 (patch)
tree049f9ca258653794581a45af7a11a764e7e1ad44
parent6f21862f1b741088b0c2c3ff894af6b82634015c (diff)
downloadlibyuv-2d2cee418a18b9f1bfa6b8037d4f8da095720695.tar.gz
Add Detile_16 planar function for 10 bit MT2T format
- Neon and SSE2 - Any for odd widths Pixel 2 little core AArch32 build C TestDetilePlane_16 (1275 ms) TestDetilePlane (1203 ms) Neon TestDetilePlane_16 (693 ms) TestDetilePlane (660 ms) Bug: b/258474032 Change-Id: Idbd09c5e9324e4deef5f1d54090d4b63cc7db812 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4031848 Reviewed-by: Wan-Teh Chang <wtc@google.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
-rw-r--r--README.chromium2
-rw-r--r--include/libyuv/planar_functions.h27
-rw-r--r--include/libyuv/row.h26
-rw-r--r--include/libyuv/version.h2
-rw-r--r--source/planar_functions.cc83
-rw-r--r--source/row_any.cc35
-rw-r--r--source/row_common.cc15
-rw-r--r--source/row_gcc.cc23
-rw-r--r--source/row_neon.cc20
-rw-r--r--source/row_neon64.cc20
-rw-r--r--unit_test/planar_test.cc93
11 files changed, 279 insertions, 67 deletions
diff --git a/README.chromium b/README.chromium
index 3c221afa..98d8ad7d 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1848
+Version: 1849
License: BSD
License File: LICENSE
diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h
index f7c6db80..ffe63705 100644
--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -85,13 +85,23 @@ void SetPlane(uint8_t* dst_y,
// Convert a plane of tiles of 16 x H to linear.
LIBYUV_API
-void DetilePlane(const uint8_t* src_y,
- int src_stride_y,
- uint8_t* dst_y,
- int dst_stride_y,
- int width,
- int height,
- int tile_height);
+int DetilePlane(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height);
+
+// Convert a plane of 16 bit tiles of 16 x H to linear.
+LIBYUV_API
+int DetilePlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height);
// Convert a UV plane of tiles of 16 x H into linear U and V planes.
LIBYUV_API
@@ -106,6 +116,7 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
int tile_height);
// Convert a Y and UV plane of tiles into interlaced YUY2.
+LIBYUV_API
void DetileToYUY2(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@@ -382,6 +393,7 @@ int I210Copy(const uint16_t* src_y,
int height);
// Copy NV12. Supports inverting.
+LIBYUV_API
int NV12Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@@ -394,6 +406,7 @@ int NV12Copy(const uint8_t* src_y,
int height);
// Copy NV21. Supports inverting.
+LIBYUV_API
int NV21Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 7f69e2b4..aa196c86 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -290,6 +290,7 @@ extern "C" {
#define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2
#define HAS_DETILEROW_SSE2
+#define HAS_DETILEROW_16_SSE2
#define HAS_DETILESPLITUVROW_SSSE3
#define HAS_DETILETOYUY2_SSE2
#define HAS_HALFMERGEUVROW_SSSE3
@@ -449,6 +450,7 @@ extern "C" {
#define HAS_BYTETOFLOATROW_NEON
#define HAS_CONVERT16TO8ROW_NEON
#define HAS_COPYROW_NEON
+#define HAS_DETILEROW_16_NEON
#define HAS_DETILEROW_NEON
#define HAS_DETILESPLITUVROW_NEON
#define HAS_DETILETOYUY2_NEON
@@ -823,7 +825,8 @@ struct YuvConstants {
#endif
-#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
+#define IS_POWEROFTWO(x) (!((x) & ((x) - 1)))
+#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
#define align_buffer_64(var, size) \
uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \
@@ -2012,7 +2015,6 @@ void DetileRow_C(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
-
void DetileRow_NEON(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
@@ -2029,6 +2031,26 @@ void DetileRow_Any_SSE2(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
+void DetileRow_16_C(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_Any_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_SSE2(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_Any_SSE2(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 62a7257e..cc91e04d 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1848
+#define LIBYUV_VERSION 1849
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 67229ee7..1e3dfaed 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -385,6 +385,7 @@ int I420ToI400(const uint8_t* src_y,
}
// Copy NV12. Supports inverting.
+LIBYUV_API
int NV12Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@@ -418,6 +419,7 @@ int NV12Copy(const uint8_t* src_y,
}
// Copy NV21. Supports inverting.
+LIBYUV_API
int NV21Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
@@ -916,26 +918,22 @@ int NV21ToNV12(const uint8_t* src_y,
// tile_height is 16 or 32 for MM21.
// src_stride_y is bytes per row of source ignoring tiling. e.g. 640
// TODO: More detile row functions.
-
LIBYUV_API
-void DetilePlane(const uint8_t* src_y,
- int src_stride_y,
- uint8_t* dst_y,
- int dst_stride_y,
- int width,
- int height,
- int tile_height) {
+int DetilePlane(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height) {
const ptrdiff_t src_tile_stride = 16 * tile_height;
int y;
void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst,
int width) = DetileRow_C;
- assert(src_stride_y >= 0);
- assert(tile_height > 0);
- assert(src_stride_y > 0);
-
- if (width <= 0 || height == 0) {
- return;
+ if (!src_y || !dst_y || width <= 0 || height == 0 || !IS_POWEROFTWO(tile_height)) {
+ return -1;
}
+
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -970,6 +968,63 @@ void DetilePlane(const uint8_t* src_y,
src_y = src_y - src_tile_stride + src_stride_y * tile_height;
}
}
+ return 0;
+}
+
+// Convert a plane of 16 bit tiles of 16 x H to linear.
+// tile width is 16 and assumed.
+// tile_height is 16 or 32 for MT2T.
+LIBYUV_API
+int DetilePlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height) {
+ const ptrdiff_t src_tile_stride = 16 * tile_height;
+ int y;
+ void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride,
+ uint16_t* dst, int width) = DetileRow_16_C;
+ if (!src_y || !dst_y || width <= 0 || height == 0 || !IS_POWEROFTWO(tile_height)) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+
+#if defined(HAS_DETILEROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ DetileRow_16 = DetileRow_16_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ DetileRow_16 = DetileRow_16_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_DETILEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ DetileRow_16 = DetileRow_16_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ DetileRow_16 = DetileRow_16_NEON;
+ }
+ }
+#endif
+
+ // Detile plane
+ for (y = 0; y < height; ++y) {
+ DetileRow_16(src_y, src_tile_stride, dst_y, width);
+ dst_y += dst_stride_y;
+ src_y += 16;
+ // Advance to next row of tiles.
+ if ((y & (tile_height - 1)) == (tile_height - 1)) {
+ src_y = src_y - src_tile_stride + src_stride_y * tile_height;
+ }
+ }
+ return 0;
}
LIBYUV_API
diff --git a/source/row_any.cc b/source/row_any.cc
index 413080fd..4b60fa0f 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -2242,26 +2242,31 @@ ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
#endif
#undef ANY11S
-#define ANYDETILE(NAMEANY, ANY_SIMD, MASK) \
- void NAMEANY(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, \
- int width) { \
- SIMD_ALIGNED(uint8_t temp[16 * 2]); \
- memset(temp, 0, 16); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src, src_tile_stride, dst, n); \
- } \
- memcpy(temp, src + (n / 16) * src_tile_stride, r); \
- ANY_SIMD(temp, src_tile_stride, temp + 16, MASK + 1); \
- memcpy(dst + n, temp + 16, r); \
+#define ANYDETILE(NAMEANY, ANY_SIMD, T, BPP, MASK) \
+ void NAMEANY(const T* src, ptrdiff_t src_tile_stride, T* dst, int width) { \
+ SIMD_ALIGNED(T temp[16 * 2]); \
+ memset(temp, 0, 16 * BPP); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src, src_tile_stride, dst, n); \
+ } \
+ memcpy(temp, src + (n / 16) * src_tile_stride, r * BPP); \
+ ANY_SIMD(temp, src_tile_stride, temp + 16, MASK + 1); \
+ memcpy(dst + n, temp + 16, r * BPP); \
}
#ifdef HAS_DETILEROW_NEON
-ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, 15)
+ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, uint8_t, 1, 15)
#endif
#ifdef HAS_DETILEROW_SSE2
-ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, 15)
+ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, uint8_t, 1, 15)
+#endif
+#ifdef HAS_DETILEROW_16_NEON
+ANYDETILE(DetileRow_16_Any_NEON, DetileRow_16_NEON, uint16_t, 2, 15)
+#endif
+#ifdef HAS_DETILEROW_16_SSE2
+ANYDETILE(DetileRow_16_Any_SSE2, DetileRow_16_SSE2, uint16_t, 2, 15)
#endif
#define ANYDETILESPLITUV(NAMEANY, ANY_SIMD, MASK) \
diff --git a/source/row_common.cc b/source/row_common.cc
index 8bfa4b8c..5ee5b17f 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -2748,6 +2748,21 @@ void DetileRow_C(const uint8_t* src,
}
}
+void DetileRow_16_C(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ int x;
+ for (x = 0; x < width - 15; x += 16) {
+ memcpy(dst, src, 16 * sizeof(uint16_t));
+ dst += 16;
+ src += src_tile_stride;
+ }
+ if (width & 15) {
+ memcpy(dst, src, (width & 15) * sizeof(uint16_t));
+ }
+}
+
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index ad1c052e..4dc56a5f 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -5030,6 +5030,29 @@ void DetileRow_SSE2(const uint8_t* src,
}
#endif // HAS_DETILEROW_SSE2
+#ifdef HAS_DETILEROW_16_SSE2
+void DetileRow_16_SSE2(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea (%0,%3,2),%0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride) // %3
+ : "cc", "memory", "xmm0", "xmm1");
+}
+#endif // HAS_DETILEROW_SSE2
+
#ifdef HAS_DETILETOYUY2_SSE2
// Read 16 Y, 8 UV, and write 8 YUYV.
void DetileToYUY2_SSE2(const uint8_t* src_y,
diff --git a/source/row_neon.cc b/source/row_neon.cc
index b777a0e1..d2815d17 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -622,6 +622,26 @@ void DetileRow_NEON(const uint8_t* src,
);
}
+// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's.
+void DetileRow_16_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.16 {q0, q1}, [%0], %3 \n" // load 16 pixels
+ "subs %2, %2, #16 \n" // 16 processed per loop
+ "pld [%0, #3584] \n"
+ "vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride * 2) // %3
+ : "cc", "memory", "q0", "q1" // Clobber List
+ );
+}
+
// Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
void DetileSplitUVRow_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 880a5f06..85d1c1b9 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -650,6 +650,26 @@ void DetileRow_NEON(const uint8_t* src,
);
}
+// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's.
+void DetileRow_16_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.8h,v1.8h}, [%0], %3 \n" // load 16 pixels
+ "subs %w2, %w2, #16 \n" // 16 processed per loop
+ "prfm pldl1keep, [%0, 3584] \n" // 7 tiles of 512b ahead
+ "st1 {v0.8h,v1.8h}, [%1], #32 \n" // store 16 pixels
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride * 2) // %3
+ : "cc", "memory", "v0", "v1" // Clobber List
+ );
+}
+
// Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
void DetileSplitUVRow_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc
index 3a8c470b..4f462d0a 100644
--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -1638,29 +1638,29 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int y_plane_size = benchmark_width_ * benchmark_height_;
- align_buffer_page_end(orig_y, orig_plane_size);
+ align_buffer_page_end(tile_y, tile_plane_size);
align_buffer_page_end(dst_c, y_plane_size);
align_buffer_page_end(dst_opt, y_plane_size);
- MemRandomize(orig_y, orig_plane_size);
+ MemRandomize(tile_y, tile_plane_size);
memset(dst_c, 0, y_plane_size);
memset(dst_opt, 0, y_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_,
+ DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_,
+ DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
@@ -1668,7 +1668,46 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
EXPECT_EQ(dst_c[i], dst_opt[i]);
}
- free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(tile_y);
+ free_aligned_buffer_page_end(dst_c);
+ free_aligned_buffer_page_end(dst_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestDetilePlane_16) {
+ int i, j;
+
+ // orig is tiled. Allocate enough memory for tiles.
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height * 2;
+ int y_plane_size = benchmark_width_ * benchmark_height_ * 2;
+ align_buffer_page_end(tile_y, tile_plane_size);
+ align_buffer_page_end(dst_c, y_plane_size);
+ align_buffer_page_end(dst_opt, y_plane_size);
+
+ MemRandomize(tile_y, tile_plane_size);
+ memset(dst_c, 0, y_plane_size);
+ memset(dst_opt, 0, y_plane_size);
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c,
+ benchmark_width_, benchmark_width_, benchmark_height_, 16);
+ }
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt,
+ benchmark_width_, benchmark_width_, benchmark_height_, 16);
+ }
+
+ for (i = 0; i < y_plane_size; ++i) {
+ EXPECT_EQ(dst_c[i], dst_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(tile_y);
free_aligned_buffer_page_end(dst_c);
free_aligned_buffer_page_end(dst_opt);
}
@@ -1678,33 +1717,33 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
- align_buffer_page_end(orig_uv, orig_plane_size);
- align_buffer_page_end(detiled_uv, orig_plane_size);
+ align_buffer_page_end(tile_uv, tile_plane_size);
+ align_buffer_page_end(detiled_uv, tile_plane_size);
align_buffer_page_end(dst_u_two_stage, uv_plane_size);
align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_two_stage, uv_plane_size);
align_buffer_page_end(dst_v_opt, uv_plane_size);
- MemRandomize(orig_uv, orig_plane_size);
- memset(detiled_uv, 0, orig_plane_size);
+ MemRandomize(tile_uv, tile_plane_size);
+ memset(detiled_uv, 0, tile_plane_size);
memset(dst_u_two_stage, 0, uv_plane_size);
memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_two_stage, 0, uv_plane_size);
memset(dst_v_opt, 0, uv_plane_size);
- DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2,
+ DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2,
dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
// Benchmark 2 step conversion for comparison.
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
+ DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_,
benchmark_width_, benchmark_height_, 16);
- SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
+ SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage,
(benchmark_width_ + 1) / 2, dst_v_two_stage,
(benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
benchmark_height_);
@@ -1715,7 +1754,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
}
- free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(tile_uv);
free_aligned_buffer_page_end(detiled_uv);
free_aligned_buffer_page_end(dst_u_two_stage);
free_aligned_buffer_page_end(dst_u_opt);
@@ -1727,17 +1766,17 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
- align_buffer_page_end(orig_uv, orig_plane_size);
+ align_buffer_page_end(tile_uv, tile_plane_size);
align_buffer_page_end(dst_u_c, uv_plane_size);
align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_c, uv_plane_size);
align_buffer_page_end(dst_v_opt, uv_plane_size);
- MemRandomize(orig_uv, orig_plane_size);
+ MemRandomize(tile_uv, tile_plane_size);
memset(dst_u_c, 0, uv_plane_size);
memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_c, 0, uv_plane_size);
@@ -1746,7 +1785,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
- DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
+ DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2,
dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
@@ -1755,7 +1794,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane(
- orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
+ tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
}
@@ -1764,7 +1803,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
}
- free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(tile_uv);
free_aligned_buffer_page_end(dst_u_c);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_c);