aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMagnus Jedvert <magjed@google.com>2016-05-26 10:30:57 +0200
committerMagnus Jedvert <magjed@google.com>2016-05-26 10:30:57 +0200
commit942db3016a1653e66eb7935966449e06bdceb7b3 (patch)
treed22c7a525fad18ce6d6106da217bd09179be747e
parent6020d2aa641fe3e395e8ee186ee97fa9a817250c (diff)
downloadlibyuv-942db3016a1653e66eb7935966449e06bdceb7b3.tar.gz
Add ARGBExtractAlpha function
BUG=libyuv:572 R=fbarchard@google.com Review URL: https://codereview.chromium.org/1995293002 .
-rw-r--r--include/libyuv/planar_functions.h6
-rw-r--r--include/libyuv/row.h10
-rw-r--r--source/planar_functions.cc43
-rw-r--r--source/row_any.cc6
-rw-r--r--source/row_common.cc13
-rw-r--r--source/row_gcc.cc27
-rw-r--r--source/row_neon.cc17
-rw-r--r--source/row_neon64.cc19
-rw-r--r--source/row_win.cc27
-rw-r--r--unit_test/planar_test.cc30
10 files changed, 198 insertions, 0 deletions
diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h
index 9c19a59d..881b0c5c 100644
--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -288,6 +288,12 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
+// Extract the alpha channel from ARGB.
+LIBYUV_API
+int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb,
+ uint8* dst_a, int dst_stride_a,
+ int width, int height);
+
// Copy Y channel to Alpha of ARGB.
LIBYUV_API
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index b5d9aaa1..3028513e 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -104,6 +104,7 @@ extern "C" {
#define HAS_ARGBTOUVROW_SSSE3
#define HAS_ARGBTOYJROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3
+#define HAS_ARGBEXTRACTALPHAROW_SSE2
#define HAS_BGRATOUVROW_SSSE3
#define HAS_BGRATOYROW_SSSE3
#define HAS_COPYROW_ERMS
@@ -291,6 +292,7 @@ extern "C" {
#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOYJROW_NEON
#define HAS_ARGBTOYROW_NEON
+#define HAS_ARGBEXTRACTALPHAROW_NEON
#define HAS_BGRATOUVROW_NEON
#define HAS_BGRATOYROW_NEON
#define HAS_COPYROW_NEON
@@ -877,6 +879,14 @@ void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
int width);
+void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width);
+void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width);
+void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width);
+void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_a,
+ int width);
+void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, uint8* dst_a,
+ int width);
+
void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 73fa7d28..b1b1f2c8 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -2374,6 +2374,49 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
return 0;
}
+// Extract just the alpha channel from ARGB.
+LIBYUV_API
+int ARGBExtractAlpha(const uint8* src_argb, int src_stride,
+ uint8* dst_a, int dst_stride,
+ int width, int height) {
+ if (!src_argb || !dst_a || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb += (height - 1) * src_stride;
+ src_stride = -src_stride;
+ }
+ // Coalesce rows.
+ if (src_stride == width * 4 && dst_stride == width) {
+ width *= height;
+ height = 1;
+ src_stride = dst_stride = 0;
+ }
+ void (*ARGBExtractAlphaRow)(const uint8 *src_argb, uint8 *dst_a, int width) =
+ ARGBExtractAlphaRow_C;
+#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
+ : ARGBExtractAlphaRow_Any_SSE2;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_NEON
+ : ARGBExtractAlphaRow_Any_NEON;
+ }
+#endif
+
+ for (int y = 0; y < height; ++y) {
+ ARGBExtractAlphaRow(src_argb, dst_a, width);
+ src_argb += src_stride;
+ dst_a += dst_stride;
+ }
+ return 0;
+}
+
// Copy a planar Y channel to the alpha channel of a destination ARGB image.
LIBYUV_API
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
diff --git a/source/row_any.cc b/source/row_any.cc
index e1404eff..94ae0edd 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -466,6 +466,12 @@ ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
#ifdef HAS_ARGBATTENUATEROW_NEON
ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
#endif
+#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
+ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
+#endif
+#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
+ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 7)
+#endif
#undef ANY11
// Any 1 to 1 blended. Destination is read, modify, write.
diff --git a/source/row_common.cc b/source/row_common.cc
index 0c47e101..32d2f686 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -2381,6 +2381,19 @@ void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
}
}
+void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) {
+ int i;
+ for (i = 0; i < width - 1; i += 2) {
+ dst_a[0] = src_argb[3];
+ dst_a[1] = src_argb[7];
+ dst_a += 2;
+ src_argb += 8;
+ }
+ if (width & 1) {
+ dst_a[0] = src_argb[3];
+ }
+}
+
void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
int i;
for (i = 0; i < width - 1; i += 2) {
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index 866bded7..7e060664 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -2936,6 +2936,33 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
}
#endif // HAS_ARGBCOPYALPHAROW_AVX2
+#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
+// width in pixels
+void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) {
+ asm volatile (
+ LABELALIGN
+ "1: \n"
+ "movdqu " MEMACCESS(0) ", %%xmm0 \n"
+ "movdqu " MEMACCESS2(0x10, 0) ", %%xmm1 \n"
+ "lea " MEMLEA(0x20, 0) ", %0 \n"
+ "psrld $0x18, %%xmm0 \n"
+ "psrld $0x18, %%xmm1 \n"
+ "packssdw %%xmm1, %%xmm0 \n"
+ "packuswb %%xmm0, %%xmm0 \n"
+ "movq %%xmm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x8, 1) ", %1 \n"
+ "sub $0x8, %2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_a), // %1
+ "+rm"(width) // %2
+ :
+ : "memory", "cc"
+ , "xmm0", "xmm1"
+ );
+}
+#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
+
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
// width in pixels
void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 91d6aa85..7574cee8 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -1298,6 +1298,23 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
);
}
+void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
+ asm volatile (
+ "1: \n"
+ MEMACCESS(0)
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels
+ "subs %2, %2, #8 \n" // 8 processed per loop
+ MEMACCESS(1)
+ "vst1.8 {d3}, [%1]! \n" // store 8 A's.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_a), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
+ );
+}
+
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile (
"vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index ee42af12..e5f2dc8f 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -1399,6 +1399,25 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
}
#endif // HAS_ARGBTOYROW_NEON
+#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
+void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
+ asm volatile (
+ "1: \n"
+ MEMACCESS(0)
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
+ MEMACCESS(1)
+ "st1 {v3.8b}, [%1], #8 \n" // store 8 A's.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_a), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+#endif // HAS_ARGBEXTRACTALPHAROW_NEON
+
#ifdef HAS_ARGBTOYJROW_NEON
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
asm volatile (
diff --git a/source/row_win.cc b/source/row_win.cc
index a8c16c3c..dc325fb9 100644
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -3532,6 +3532,33 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
}
#endif // HAS_ARGBCOPYALPHAROW_AVX2
+#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
+// width in pixels
+__declspec(naked)
+void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_a
+ mov ecx, [esp + 12] // width
+
+ extractloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ psrld xmm0, 24
+ psrld xmm1, 24
+ packssdw xmm0, xmm1
+ packuswb xmm0, xmm0
+ movq qword ptr [edx], xmm0
+ lea edx, [edx + 8]
+ sub ecx, 8
+ jg extractloop
+
+ ret
+ }
+}
+#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
+
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
// width in pixels
__declspec(naked)
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc
index 9146c9a4..1974a033 100644
--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -2390,6 +2390,36 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
free_aligned_buffer_64(orig_pixels);
}
+TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_64(src_pixels, kPixels * 4);
+ align_buffer_64(dst_pixels_opt, kPixels);
+ align_buffer_64(dst_pixels_c, kPixels);
+
+ MemRandomize(src_pixels, kPixels * 4);
+ MemRandomize(dst_pixels_opt, kPixels);
+ memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
+
+ MaskCpuFlags(disable_cpu_flags_);
+ ARGBExtractAlpha(src_pixels, benchmark_width_ * 4,
+ dst_pixels_c, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ MaskCpuFlags(benchmark_cpu_info_);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ ARGBExtractAlpha(src_pixels, benchmark_width_ * 4,
+ dst_pixels_opt, benchmark_width_,
+ benchmark_width_, benchmark_height_);
+ }
+ for (int i = 0; i < kPixels; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_64(dst_pixels_c);
+ free_aligned_buffer_64(dst_pixels_opt);
+ free_aligned_buffer_64(src_pixels);
+}
+
TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_64(orig_pixels, kPixels);