diff options
-rw-r--r-- | README.chromium | 2 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/convert.cc | 5 | ||||
-rw-r--r-- | source/rotate_neon.cc | 1 | ||||
-rw-r--r-- | source/row_common.cc | 2 | ||||
-rw-r--r-- | source/row_neon.cc | 48 | ||||
-rw-r--r-- | source/row_neon64.cc | 52 |
7 files changed, 55 insertions, 57 deletions
diff --git a/README.chromium b/README.chromium index 674775cd..f4a2fc6f 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1862 +Version: 1863 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index cc1e66e7..e3905b46 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1862 +#define LIBYUV_VERSION 1863 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index 37b7091b..0bcfbf20 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -801,10 +801,9 @@ int MT2TToP010(const uint8_t* src_y, } { - int u_width = (width + 1) / 2; - int uv_width = 2 * u_width; + int uv_width = (width + 1) & ~1; + int uv_height = (height + 1) / 2; int y = 0; - int uv_height = uv_height = (height + 1) / 2; const int tile_width = 16; const int y_tile_height = 32; const int uv_tile_height = 16; diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc index 1acee22f..569a7318 100644 --- a/source/rotate_neon.cc +++ b/source/rotate_neon.cc @@ -411,7 +411,6 @@ void TransposeUVWx8_NEON(const uint8_t* src, : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); } - // Transpose 32 bit values (ARGB) void Transpose4x4_32_NEON(const uint8_t* src, int src_stride, diff --git a/source/row_common.cc b/source/row_common.cc index 478d8ac6..afccdb8d 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2881,7 +2881,7 @@ void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) { *dst++ = ((src_lower_bits[k] >> (j * 2)) & 0x3) << 6 | (uint16_t)*src_upper_bits << 8 | (uint16_t)*src_upper_bits >> 2; - src_upper_bits++; + src_upper_bits++; } } diff --git a/source/row_neon.cc b/source/row_neon.cc index 59b3e05a..b3e800d9 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -722,37 +722,37 @@ void DetileToYUY2_NEON(const uint8_t* src_y, void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) { asm volatile( - "1: \n" - "vld1.8 q14, [%0]! \n" // Load lower bits. - "vld1.8 q9, [%0]! \n" // Load upper bits row + "1: \n" + "vld1.8 q14, [%0]! \n" // Load lower bits. + "vld1.8 q9, [%0]! \n" // Load upper bits row // by row. - "vld1.8 q11, [%0]! \n" - "vld1.8 q13, [%0]! \n" - "vld1.8 q15, [%0]! \n" - "vshl.u8 q8, q14, #6 \n" // Shift lower bit data + "vld1.8 q11, [%0]! \n" + "vld1.8 q13, [%0]! \n" + "vld1.8 q15, [%0]! \n" + "vshl.u8 q8, q14, #6 \n" // Shift lower bit data // appropriately. - "vshl.u8 q10, q14, #4 \n" - "vshl.u8 q12, q14, #2 \n" - "vzip.u8 q8, q9 \n" // Interleave upper and + "vshl.u8 q10, q14, #4 \n" + "vshl.u8 q12, q14, #2 \n" + "vzip.u8 q8, q9 \n" // Interleave upper and // lower bits. - "vzip.u8 q10, q11 \n" - "vzip.u8 q12, q13 \n" - "vzip.u8 q14, q15 \n" - "vsri.u16 q8, q8, #10 \n" // Copy upper 6 bits + "vzip.u8 q10, q11 \n" + "vzip.u8 q12, q13 \n" + "vzip.u8 q14, q15 \n" + "vsri.u16 q8, q8, #10 \n" // Copy upper 6 bits // into lower 6 bits for // better accuracy in // conversions. - "vsri.u16 q9, q9, #10 \n" - "vsri.u16 q10, q10, #10 \n" - "vsri.u16 q11, q11, #10 \n" - "vsri.u16 q12, q12, #10 \n" - "vsri.u16 q13, q13, #10 \n" - "vsri.u16 q14, q14, #10 \n" - "vsri.u16 q15, q15, #10 \n" - "vstmia %1!, {q8-q15} \n" // Store pixel block (64 + "vsri.u16 q9, q9, #10 \n" + "vsri.u16 q10, q10, #10 \n" + "vsri.u16 q11, q11, #10 \n" + "vsri.u16 q12, q12, #10 \n" + "vsri.u16 q13, q13, #10 \n" + "vsri.u16 q14, q14, #10 \n" + "vsri.u16 q15, q15, #10 \n" + "vstmia %1!, {q8-q15} \n" // Store pixel block (64 // pixels). - "subs %2, %2, #80 \n" - "bgt 1b \n" + "subs %2, %2, #80 \n" + "bgt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(size) // %2 diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 3afb5a20..2b5522f0 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -753,32 +753,32 @@ void DetileToYUY2_NEON(const uint8_t* src_y, // tinyurl.com/mtk-10bit-video-format for format documentation. void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) { asm volatile( - "1: \n" - "ld1 {v7.16b}, [%0], #16 \n" - "ld1 {v0.16b-v3.16b}, [%0], #64 \n" - "shl v4.16b, v7.16b, #6 \n" - "shl v5.16b, v7.16b, #4 \n" - "shl v6.16b, v7.16b, #2 \n" - "subs %2, %2, #80 \n" - "zip1 v16.16b, v4.16b, v0.16b \n" - "zip1 v18.16b, v5.16b, v1.16b \n" - "zip1 v20.16b, v6.16b, v2.16b \n" - "zip1 v22.16b, v7.16b, v3.16b \n" - "zip2 v17.16b, v4.16b, v0.16b \n" - "zip2 v19.16b, v5.16b, v1.16b \n" - "zip2 v21.16b, v6.16b, v2.16b \n" - "zip2 v23.16b, v7.16b, v3.16b \n" - "sri v16.8h, v16.8h, #10 \n" - "sri v17.8h, v17.8h, #10 \n" - "sri v18.8h, v18.8h, #10 \n" - "sri v19.8h, v19.8h, #10 \n" - "st1 {v16.8h-v19.8h}, [%1], #64 \n" - "sri v20.8h, v20.8h, #10 \n" - "sri v21.8h, v21.8h, #10 \n" - "sri v22.8h, v22.8h, #10 \n" - "sri v23.8h, v23.8h, #10 \n" - "st1 {v20.8h-v23.8h}, [%1], #64 \n" - "b.gt 1b \n" + "1: \n" + "ld1 {v7.16b}, [%0], #16 \n" + "ld1 {v0.16b-v3.16b}, [%0], #64 \n" + "shl v4.16b, v7.16b, #6 \n" + "shl v5.16b, v7.16b, #4 \n" + "shl v6.16b, v7.16b, #2 \n" + "subs %2, %2, #80 \n" + "zip1 v16.16b, v4.16b, v0.16b \n" + "zip1 v18.16b, v5.16b, v1.16b \n" + "zip1 v20.16b, v6.16b, v2.16b \n" + "zip1 v22.16b, v7.16b, v3.16b \n" + "zip2 v17.16b, v4.16b, v0.16b \n" + "zip2 v19.16b, v5.16b, v1.16b \n" + "zip2 v21.16b, v6.16b, v2.16b \n" + "zip2 v23.16b, v7.16b, v3.16b \n" + "sri v16.8h, v16.8h, #10 \n" + "sri v17.8h, v17.8h, #10 \n" + "sri v18.8h, v18.8h, #10 \n" + "sri v19.8h, v19.8h, #10 \n" + "st1 {v16.8h-v19.8h}, [%1], #64 \n" + "sri v20.8h, v20.8h, #10 \n" + "sri v21.8h, v21.8h, #10 \n" + "sri v22.8h, v22.8h, #10 \n" + "sri v23.8h, v23.8h, #10 \n" + "st1 {v20.8h-v23.8h}, [%1], #64 \n" + "b.gt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(size) // %2 |