aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.chromium2
-rw-r--r--include/libyuv/version.h2
-rw-r--r--source/convert.cc5
-rw-r--r--source/rotate_neon.cc1
-rw-r--r--source/row_common.cc2
-rw-r--r--source/row_neon.cc48
-rw-r--r--source/row_neon64.cc52
7 files changed, 55 insertions, 57 deletions
diff --git a/README.chromium b/README.chromium
index 674775cd..f4a2fc6f 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1862
+Version: 1863
License: BSD
License File: LICENSE
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index cc1e66e7..e3905b46 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1862
+#define LIBYUV_VERSION 1863
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/source/convert.cc b/source/convert.cc
index 37b7091b..0bcfbf20 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -801,10 +801,9 @@ int MT2TToP010(const uint8_t* src_y,
}
{
- int u_width = (width + 1) / 2;
- int uv_width = 2 * u_width;
+ int uv_width = (width + 1) & ~1;
+ int uv_height = (height + 1) / 2;
int y = 0;
- int uv_height = uv_height = (height + 1) / 2;
const int tile_width = 16;
const int y_tile_height = 32;
const int uv_tile_height = 16;
diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc
index 1acee22f..569a7318 100644
--- a/source/rotate_neon.cc
+++ b/source/rotate_neon.cc
@@ -411,7 +411,6 @@ void TransposeUVWx8_NEON(const uint8_t* src,
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
}
-
// Transpose 32 bit values (ARGB)
void Transpose4x4_32_NEON(const uint8_t* src,
int src_stride,
diff --git a/source/row_common.cc b/source/row_common.cc
index 478d8ac6..afccdb8d 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -2881,7 +2881,7 @@ void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) {
*dst++ = ((src_lower_bits[k] >> (j * 2)) & 0x3) << 6 |
(uint16_t)*src_upper_bits << 8 |
(uint16_t)*src_upper_bits >> 2;
- src_upper_bits++;
+ src_upper_bits++;
}
}
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 59b3e05a..b3e800d9 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -722,37 +722,37 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
asm volatile(
- "1: \n"
- "vld1.8 q14, [%0]! \n" // Load lower bits.
- "vld1.8 q9, [%0]! \n" // Load upper bits row
+ "1: \n"
+ "vld1.8 q14, [%0]! \n" // Load lower bits.
+ "vld1.8 q9, [%0]! \n" // Load upper bits row
// by row.
- "vld1.8 q11, [%0]! \n"
- "vld1.8 q13, [%0]! \n"
- "vld1.8 q15, [%0]! \n"
- "vshl.u8 q8, q14, #6 \n" // Shift lower bit data
+ "vld1.8 q11, [%0]! \n"
+ "vld1.8 q13, [%0]! \n"
+ "vld1.8 q15, [%0]! \n"
+ "vshl.u8 q8, q14, #6 \n" // Shift lower bit data
// appropriately.
- "vshl.u8 q10, q14, #4 \n"
- "vshl.u8 q12, q14, #2 \n"
- "vzip.u8 q8, q9 \n" // Interleave upper and
+ "vshl.u8 q10, q14, #4 \n"
+ "vshl.u8 q12, q14, #2 \n"
+ "vzip.u8 q8, q9 \n" // Interleave upper and
// lower bits.
- "vzip.u8 q10, q11 \n"
- "vzip.u8 q12, q13 \n"
- "vzip.u8 q14, q15 \n"
- "vsri.u16 q8, q8, #10 \n" // Copy upper 6 bits
+ "vzip.u8 q10, q11 \n"
+ "vzip.u8 q12, q13 \n"
+ "vzip.u8 q14, q15 \n"
+ "vsri.u16 q8, q8, #10 \n" // Copy upper 6 bits
// into lower 6 bits for
// better accuracy in
// conversions.
- "vsri.u16 q9, q9, #10 \n"
- "vsri.u16 q10, q10, #10 \n"
- "vsri.u16 q11, q11, #10 \n"
- "vsri.u16 q12, q12, #10 \n"
- "vsri.u16 q13, q13, #10 \n"
- "vsri.u16 q14, q14, #10 \n"
- "vsri.u16 q15, q15, #10 \n"
- "vstmia %1!, {q8-q15} \n" // Store pixel block (64
+ "vsri.u16 q9, q9, #10 \n"
+ "vsri.u16 q10, q10, #10 \n"
+ "vsri.u16 q11, q11, #10 \n"
+ "vsri.u16 q12, q12, #10 \n"
+ "vsri.u16 q13, q13, #10 \n"
+ "vsri.u16 q14, q14, #10 \n"
+ "vsri.u16 q15, q15, #10 \n"
+ "vstmia %1!, {q8-q15} \n" // Store pixel block (64
// pixels).
- "subs %2, %2, #80 \n"
- "bgt 1b \n"
+ "subs %2, %2, #80 \n"
+ "bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(size) // %2
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 3afb5a20..2b5522f0 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -753,32 +753,32 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
// tinyurl.com/mtk-10bit-video-format for format documentation.
void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
asm volatile(
- "1: \n"
- "ld1 {v7.16b}, [%0], #16 \n"
- "ld1 {v0.16b-v3.16b}, [%0], #64 \n"
- "shl v4.16b, v7.16b, #6 \n"
- "shl v5.16b, v7.16b, #4 \n"
- "shl v6.16b, v7.16b, #2 \n"
- "subs %2, %2, #80 \n"
- "zip1 v16.16b, v4.16b, v0.16b \n"
- "zip1 v18.16b, v5.16b, v1.16b \n"
- "zip1 v20.16b, v6.16b, v2.16b \n"
- "zip1 v22.16b, v7.16b, v3.16b \n"
- "zip2 v17.16b, v4.16b, v0.16b \n"
- "zip2 v19.16b, v5.16b, v1.16b \n"
- "zip2 v21.16b, v6.16b, v2.16b \n"
- "zip2 v23.16b, v7.16b, v3.16b \n"
- "sri v16.8h, v16.8h, #10 \n"
- "sri v17.8h, v17.8h, #10 \n"
- "sri v18.8h, v18.8h, #10 \n"
- "sri v19.8h, v19.8h, #10 \n"
- "st1 {v16.8h-v19.8h}, [%1], #64 \n"
- "sri v20.8h, v20.8h, #10 \n"
- "sri v21.8h, v21.8h, #10 \n"
- "sri v22.8h, v22.8h, #10 \n"
- "sri v23.8h, v23.8h, #10 \n"
- "st1 {v20.8h-v23.8h}, [%1], #64 \n"
- "b.gt 1b \n"
+ "1: \n"
+ "ld1 {v7.16b}, [%0], #16 \n"
+ "ld1 {v0.16b-v3.16b}, [%0], #64 \n"
+ "shl v4.16b, v7.16b, #6 \n"
+ "shl v5.16b, v7.16b, #4 \n"
+ "shl v6.16b, v7.16b, #2 \n"
+ "subs %2, %2, #80 \n"
+ "zip1 v16.16b, v4.16b, v0.16b \n"
+ "zip1 v18.16b, v5.16b, v1.16b \n"
+ "zip1 v20.16b, v6.16b, v2.16b \n"
+ "zip1 v22.16b, v7.16b, v3.16b \n"
+ "zip2 v17.16b, v4.16b, v0.16b \n"
+ "zip2 v19.16b, v5.16b, v1.16b \n"
+ "zip2 v21.16b, v6.16b, v2.16b \n"
+ "zip2 v23.16b, v7.16b, v3.16b \n"
+ "sri v16.8h, v16.8h, #10 \n"
+ "sri v17.8h, v17.8h, #10 \n"
+ "sri v18.8h, v18.8h, #10 \n"
+ "sri v19.8h, v19.8h, #10 \n"
+ "st1 {v16.8h-v19.8h}, [%1], #64 \n"
+ "sri v20.8h, v20.8h, #10 \n"
+ "sri v21.8h, v21.8h, #10 \n"
+ "sri v22.8h, v22.8h, #10 \n"
+ "sri v23.8h, v23.8h, #10 \n"
+ "st1 {v20.8h-v23.8h}, [%1], #64 \n"
+ "b.gt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(size) // %2