diff options
author | Frank Barchard <fbarchard@google.com> | 2022-11-16 18:02:34 -0800 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-11-17 02:47:57 +0000 |
commit | 2d2cee418a18b9f1bfa6b8037d4f8da095720695 (patch) | |
tree | 049f9ca258653794581a45af7a11a764e7e1ad44 | |
parent | 6f21862f1b741088b0c2c3ff894af6b82634015c (diff) | |
download | libyuv-2d2cee418a18b9f1bfa6b8037d4f8da095720695.tar.gz |
Add Detile_16 planar function for 10 bit MT2T format
- Neon and SSE2
- Any for odd widths
Pixel 2 little core AArch32 build
C
TestDetilePlane_16 (1275 ms)
TestDetilePlane (1203 ms)
Neon
TestDetilePlane_16 (693 ms)
TestDetilePlane (660 ms)
Bug: b/258474032
Change-Id: Idbd09c5e9324e4deef5f1d54090d4b63cc7db812
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4031848
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
-rw-r--r-- | README.chromium | 2 | ||||
-rw-r--r-- | include/libyuv/planar_functions.h | 27 | ||||
-rw-r--r-- | include/libyuv/row.h | 26 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/planar_functions.cc | 83 | ||||
-rw-r--r-- | source/row_any.cc | 35 | ||||
-rw-r--r-- | source/row_common.cc | 15 | ||||
-rw-r--r-- | source/row_gcc.cc | 23 | ||||
-rw-r--r-- | source/row_neon.cc | 20 | ||||
-rw-r--r-- | source/row_neon64.cc | 20 | ||||
-rw-r--r-- | unit_test/planar_test.cc | 93 |
11 files changed, 279 insertions, 67 deletions
diff --git a/README.chromium b/README.chromium index 3c221afa..98d8ad7d 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1848 +Version: 1849 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index f7c6db80..ffe63705 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -85,13 +85,23 @@ void SetPlane(uint8_t* dst_y, // Convert a plane of tiles of 16 x H to linear. LIBYUV_API -void DetilePlane(const uint8_t* src_y, - int src_stride_y, - uint8_t* dst_y, - int dst_stride_y, - int width, - int height, - int tile_height); +int DetilePlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height); + +// Convert a plane of 16 bit tiles of 16 x H to linear. +LIBYUV_API +int DetilePlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height); // Convert a UV plane of tiles of 16 x H into linear U and V planes. LIBYUV_API @@ -106,6 +116,7 @@ void DetileSplitUVPlane(const uint8_t* src_uv, int tile_height); // Convert a Y and UV plane of tiles into interlaced YUY2. +LIBYUV_API void DetileToYUY2(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, @@ -382,6 +393,7 @@ int I210Copy(const uint16_t* src_y, int height); // Copy NV12. Supports inverting. +LIBYUV_API int NV12Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, @@ -394,6 +406,7 @@ int NV12Copy(const uint8_t* src_y, int height); // Copy NV21. Supports inverting. +LIBYUV_API int NV21Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 7f69e2b4..aa196c86 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -290,6 +290,7 @@ extern "C" { #define HAS_CONVERT16TO8ROW_SSSE3 #define HAS_CONVERT8TO16ROW_SSE2 #define HAS_DETILEROW_SSE2 +#define HAS_DETILEROW_16_SSE2 #define HAS_DETILESPLITUVROW_SSSE3 #define HAS_DETILETOYUY2_SSE2 #define HAS_HALFMERGEUVROW_SSSE3 @@ -449,6 +450,7 @@ extern "C" { #define HAS_BYTETOFLOATROW_NEON #define HAS_CONVERT16TO8ROW_NEON #define HAS_COPYROW_NEON +#define HAS_DETILEROW_16_NEON #define HAS_DETILEROW_NEON #define HAS_DETILESPLITUVROW_NEON #define HAS_DETILETOYUY2_NEON @@ -823,7 +825,8 @@ struct YuvConstants { #endif -#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) +#define IS_POWEROFTWO(x) (!((x) & ((x) - 1))) +#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) #define align_buffer_64(var, size) \ uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \ @@ -2012,7 +2015,6 @@ void DetileRow_C(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, int width); - void DetileRow_NEON(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, @@ -2029,6 +2031,26 @@ void DetileRow_Any_SSE2(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, int width); +void DetileRow_16_C(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); +void DetileRow_16_NEON(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); +void DetileRow_16_Any_NEON(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); +void DetileRow_16_SSE2(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); +void DetileRow_16_Any_SSE2(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); void DetileSplitUVRow_C(const uint8_t* src_uv, ptrdiff_t src_tile_stride, uint8_t* dst_u, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 62a7257e..cc91e04d 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1848 +#define LIBYUV_VERSION 1849 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 67229ee7..1e3dfaed 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -385,6 +385,7 @@ int I420ToI400(const uint8_t* src_y, } // Copy NV12. Supports inverting. +LIBYUV_API int NV12Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, @@ -418,6 +419,7 @@ int NV12Copy(const uint8_t* src_y, } // Copy NV21. Supports inverting. +LIBYUV_API int NV21Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, @@ -916,26 +918,22 @@ int NV21ToNV12(const uint8_t* src_y, // tile_height is 16 or 32 for MM21. // src_stride_y is bytes per row of source ignoring tiling. e.g. 640 // TODO: More detile row functions. - LIBYUV_API -void DetilePlane(const uint8_t* src_y, - int src_stride_y, - uint8_t* dst_y, - int dst_stride_y, - int width, - int height, - int tile_height) { +int DetilePlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height) { const ptrdiff_t src_tile_stride = 16 * tile_height; int y; void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, int width) = DetileRow_C; - assert(src_stride_y >= 0); - assert(tile_height > 0); - assert(src_stride_y > 0); - - if (width <= 0 || height == 0) { - return; + if (!src_y || !dst_y || width <= 0 || height == 0 || !IS_POWEROFTWO(tile_height)) { + return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; @@ -970,6 +968,63 @@ void DetilePlane(const uint8_t* src_y, src_y = src_y - src_tile_stride + src_stride_y * tile_height; } } + return 0; +} + +// Convert a plane of 16 bit tiles of 16 x H to linear. +// tile width is 16 and assumed. +// tile_height is 16 or 32 for MT2T. +LIBYUV_API +int DetilePlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height) { + const ptrdiff_t src_tile_stride = 16 * tile_height; + int y; + void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride, + uint16_t* dst, int width) = DetileRow_16_C; + if (!src_y || !dst_y || width <= 0 || height == 0 || !IS_POWEROFTWO(tile_height)) { + return -1; + } + + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + +#if defined(HAS_DETILEROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + DetileRow_16 = DetileRow_16_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + DetileRow_16 = DetileRow_16_SSE2; + } + } +#endif +#if defined(HAS_DETILEROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + DetileRow_16 = DetileRow_16_Any_NEON; + if (IS_ALIGNED(width, 16)) { + DetileRow_16 = DetileRow_16_NEON; + } + } +#endif + + // Detile plane + for (y = 0; y < height; ++y) { + DetileRow_16(src_y, src_tile_stride, dst_y, width); + dst_y += dst_stride_y; + src_y += 16; + // Advance to next row of tiles. + if ((y & (tile_height - 1)) == (tile_height - 1)) { + src_y = src_y - src_tile_stride + src_stride_y * tile_height; + } + } + return 0; } LIBYUV_API diff --git a/source/row_any.cc b/source/row_any.cc index 413080fd..4b60fa0f 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -2242,26 +2242,31 @@ ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15) #endif #undef ANY11S -#define ANYDETILE(NAMEANY, ANY_SIMD, MASK) \ - void NAMEANY(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, \ - int width) { \ - SIMD_ALIGNED(uint8_t temp[16 * 2]); \ - memset(temp, 0, 16); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src, src_tile_stride, dst, n); \ - } \ - memcpy(temp, src + (n / 16) * src_tile_stride, r); \ - ANY_SIMD(temp, src_tile_stride, temp + 16, MASK + 1); \ - memcpy(dst + n, temp + 16, r); \ +#define ANYDETILE(NAMEANY, ANY_SIMD, T, BPP, MASK) \ + void NAMEANY(const T* src, ptrdiff_t src_tile_stride, T* dst, int width) { \ + SIMD_ALIGNED(T temp[16 * 2]); \ + memset(temp, 0, 16 * BPP); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src, src_tile_stride, dst, n); \ + } \ + memcpy(temp, src + (n / 16) * src_tile_stride, r * BPP); \ + ANY_SIMD(temp, src_tile_stride, temp + 16, MASK + 1); \ + memcpy(dst + n, temp + 16, r * BPP); \ } #ifdef HAS_DETILEROW_NEON -ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, 15) +ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, uint8_t, 1, 15) #endif #ifdef HAS_DETILEROW_SSE2 -ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, 15) +ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, uint8_t, 1, 15) +#endif +#ifdef HAS_DETILEROW_16_NEON +ANYDETILE(DetileRow_16_Any_NEON, DetileRow_16_NEON, uint16_t, 2, 15) +#endif +#ifdef HAS_DETILEROW_16_SSE2 +ANYDETILE(DetileRow_16_Any_SSE2, DetileRow_16_SSE2, uint16_t, 2, 15) #endif #define ANYDETILESPLITUV(NAMEANY, ANY_SIMD, MASK) \ diff --git a/source/row_common.cc b/source/row_common.cc index 8bfa4b8c..5ee5b17f 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2748,6 +2748,21 @@ void DetileRow_C(const uint8_t* src, } } +void DetileRow_16_C(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width) { + int x; + for (x = 0; x < width - 15; x += 16) { + memcpy(dst, src, 16 * sizeof(uint16_t)); + dst += 16; + src += src_tile_stride; + } + if (width & 15) { + memcpy(dst, src, (width & 15) * sizeof(uint16_t)); + } +} + void DetileSplitUVRow_C(const uint8_t* src_uv, ptrdiff_t src_tile_stride, uint8_t* dst_u, diff --git a/source/row_gcc.cc b/source/row_gcc.cc index ad1c052e..4dc56a5f 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -5030,6 +5030,29 @@ void DetileRow_SSE2(const uint8_t* src, } #endif // HAS_DETILEROW_SSE2 +#ifdef HAS_DETILEROW_16_SSE2 +void DetileRow_16_SSE2(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width) { + asm volatile( + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea (%0,%3,2),%0 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"(src_tile_stride) // %3 + : "cc", "memory", "xmm0", "xmm1"); +} +#endif // HAS_DETILEROW_SSE2 + #ifdef HAS_DETILETOYUY2_SSE2 // Read 16 Y, 8 UV, and write 8 YUYV. void DetileToYUY2_SSE2(const uint8_t* src_y, diff --git a/source/row_neon.cc b/source/row_neon.cc index b777a0e1..d2815d17 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -622,6 +622,26 @@ void DetileRow_NEON(const uint8_t* src, ); } +// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's. +void DetileRow_16_NEON(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width) { + asm volatile( + "1: \n" + "vld1.16 {q0, q1}, [%0], %3 \n" // load 16 pixels + "subs %2, %2, #16 \n" // 16 processed per loop + "pld [%0, #3584] \n" + "vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels + "bgt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"(src_tile_stride * 2) // %3 + : "cc", "memory", "q0", "q1" // Clobber List + ); +} + // Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V. void DetileSplitUVRow_NEON(const uint8_t* src_uv, ptrdiff_t src_tile_stride, diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 880a5f06..85d1c1b9 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -650,6 +650,26 @@ void DetileRow_NEON(const uint8_t* src, ); } +// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's. +void DetileRow_16_NEON(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width) { + asm volatile( + "1: \n" + "ld1 {v0.8h,v1.8h}, [%0], %3 \n" // load 16 pixels + "subs %w2, %w2, #16 \n" // 16 processed per loop + "prfm pldl1keep, [%0, 3584] \n" // 7 tiles of 512b ahead + "st1 {v0.8h,v1.8h}, [%1], #32 \n" // store 16 pixels + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"(src_tile_stride * 2) // %3 + : "cc", "memory", "v0", "v1" // Clobber List + ); +} + // Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V. void DetileSplitUVRow_NEON(const uint8_t* src_uv, ptrdiff_t src_tile_stride, diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 3a8c470b..4f462d0a 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1638,29 +1638,29 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) { int i, j; // orig is tiled. Allocate enough memory for tiles. - int orig_width = (benchmark_width_ + 15) & ~15; - int orig_height = (benchmark_height_ + 15) & ~15; - int orig_plane_size = orig_width * orig_height; + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; int y_plane_size = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_y, orig_plane_size); + align_buffer_page_end(tile_y, tile_plane_size); align_buffer_page_end(dst_c, y_plane_size); align_buffer_page_end(dst_opt, y_plane_size); - MemRandomize(orig_y, orig_plane_size); + MemRandomize(tile_y, tile_plane_size); memset(dst_c, 0, y_plane_size); memset(dst_opt, 0, y_plane_size); // Disable all optimizations. MaskCpuFlags(disable_cpu_flags_); for (j = 0; j < benchmark_iterations_; j++) { - DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_, + DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_, benchmark_height_, 16); } // Enable optimizations. MaskCpuFlags(benchmark_cpu_info_); for (j = 0; j < benchmark_iterations_; j++) { - DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_, + DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_, benchmark_height_, 16); } @@ -1668,7 +1668,46 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) { EXPECT_EQ(dst_c[i], dst_opt[i]); } - free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(tile_y); + free_aligned_buffer_page_end(dst_c); + free_aligned_buffer_page_end(dst_opt); +} + +TEST_F(LibYUVPlanarTest, TestDetilePlane_16) { + int i, j; + + // orig is tiled. Allocate enough memory for tiles. + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height * 2; + int y_plane_size = benchmark_width_ * benchmark_height_ * 2; + align_buffer_page_end(tile_y, tile_plane_size); + align_buffer_page_end(dst_c, y_plane_size); + align_buffer_page_end(dst_opt, y_plane_size); + + MemRandomize(tile_y, tile_plane_size); + memset(dst_c, 0, y_plane_size); + memset(dst_opt, 0, y_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c, + benchmark_width_, benchmark_width_, benchmark_height_, 16); + } + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt, + benchmark_width_, benchmark_width_, benchmark_height_, 16); + } + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end(tile_y); free_aligned_buffer_page_end(dst_c); free_aligned_buffer_page_end(dst_opt); } @@ -1678,33 +1717,33 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) { int i, j; // orig is tiled. Allocate enough memory for tiles. - int orig_width = (benchmark_width_ + 15) & ~15; - int orig_height = (benchmark_height_ + 15) & ~15; - int orig_plane_size = orig_width * orig_height; + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; - align_buffer_page_end(orig_uv, orig_plane_size); - align_buffer_page_end(detiled_uv, orig_plane_size); + align_buffer_page_end(tile_uv, tile_plane_size); + align_buffer_page_end(detiled_uv, tile_plane_size); align_buffer_page_end(dst_u_two_stage, uv_plane_size); align_buffer_page_end(dst_u_opt, uv_plane_size); align_buffer_page_end(dst_v_two_stage, uv_plane_size); align_buffer_page_end(dst_v_opt, uv_plane_size); - MemRandomize(orig_uv, orig_plane_size); - memset(detiled_uv, 0, orig_plane_size); + MemRandomize(tile_uv, tile_plane_size); + memset(detiled_uv, 0, tile_plane_size); memset(dst_u_two_stage, 0, uv_plane_size); memset(dst_u_opt, 0, uv_plane_size); memset(dst_v_two_stage, 0, uv_plane_size); memset(dst_v_opt, 0, uv_plane_size); - DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, + DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); // Benchmark 2 step conversion for comparison. for (j = 0; j < benchmark_iterations_; j++) { - DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_, + DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_, benchmark_width_, benchmark_height_, 16); - SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage, + SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage, (benchmark_width_ + 1) / 2, dst_v_two_stage, (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2, benchmark_height_); @@ -1715,7 +1754,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) { EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]); } - free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(tile_uv); free_aligned_buffer_page_end(detiled_uv); free_aligned_buffer_page_end(dst_u_two_stage); free_aligned_buffer_page_end(dst_u_opt); @@ -1727,17 +1766,17 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { int i, j; // orig is tiled. Allocate enough memory for tiles. - int orig_width = (benchmark_width_ + 15) & ~15; - int orig_height = (benchmark_height_ + 15) & ~15; - int orig_plane_size = orig_width * orig_height; + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; - align_buffer_page_end(orig_uv, orig_plane_size); + align_buffer_page_end(tile_uv, tile_plane_size); align_buffer_page_end(dst_u_c, uv_plane_size); align_buffer_page_end(dst_u_opt, uv_plane_size); align_buffer_page_end(dst_v_c, uv_plane_size); align_buffer_page_end(dst_v_opt, uv_plane_size); - MemRandomize(orig_uv, orig_plane_size); + MemRandomize(tile_uv, tile_plane_size); memset(dst_u_c, 0, uv_plane_size); memset(dst_u_opt, 0, uv_plane_size); memset(dst_v_c, 0, uv_plane_size); @@ -1746,7 +1785,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { // Disable all optimizations. MaskCpuFlags(disable_cpu_flags_); - DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2, + DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2, dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); @@ -1755,7 +1794,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { for (j = 0; j < benchmark_iterations_; j++) { DetileSplitUVPlane( - orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, + tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); } @@ -1764,7 +1803,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); } - free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(tile_uv); free_aligned_buffer_page_end(dst_u_c); free_aligned_buffer_page_end(dst_u_opt); free_aligned_buffer_page_end(dst_v_c); |