diff options
author | Magnus Jedvert <magjed@google.com> | 2016-05-26 10:30:57 +0200 |
---|---|---|
committer | Magnus Jedvert <magjed@google.com> | 2016-05-26 10:30:57 +0200 |
commit | 942db3016a1653e66eb7935966449e06bdceb7b3 (patch) | |
tree | d22c7a525fad18ce6d6106da217bd09179be747e | |
parent | 6020d2aa641fe3e395e8ee186ee97fa9a817250c (diff) | |
download | libyuv-942db3016a1653e66eb7935966449e06bdceb7b3.tar.gz |
Add ARGBExtractAlpha function
BUG=libyuv:572
R=fbarchard@google.com
Review URL: https://codereview.chromium.org/1995293002 .
-rw-r--r-- | include/libyuv/planar_functions.h | 6 | ||||
-rw-r--r-- | include/libyuv/row.h | 10 | ||||
-rw-r--r-- | source/planar_functions.cc | 43 | ||||
-rw-r--r-- | source/row_any.cc | 6 | ||||
-rw-r--r-- | source/row_common.cc | 13 | ||||
-rw-r--r-- | source/row_gcc.cc | 27 | ||||
-rw-r--r-- | source/row_neon.cc | 17 | ||||
-rw-r--r-- | source/row_neon64.cc | 19 | ||||
-rw-r--r-- | source/row_win.cc | 27 | ||||
-rw-r--r-- | unit_test/planar_test.cc | 30 |
10 files changed, 198 insertions, 0 deletions
diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 9c19a59d..881b0c5c 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -288,6 +288,12 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); +// Extract the alpha channel from ARGB. +LIBYUV_API +int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb, + uint8* dst_a, int dst_stride_a, + int width, int height); + // Copy Y channel to Alpha of ARGB. LIBYUV_API int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index b5d9aaa1..3028513e 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -104,6 +104,7 @@ extern "C" { #define HAS_ARGBTOUVROW_SSSE3 #define HAS_ARGBTOYJROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3 +#define HAS_ARGBEXTRACTALPHAROW_SSE2 #define HAS_BGRATOUVROW_SSSE3 #define HAS_BGRATOYROW_SSSE3 #define HAS_COPYROW_ERMS @@ -291,6 +292,7 @@ extern "C" { #define HAS_ARGBTOUVROW_NEON #define HAS_ARGBTOYJROW_NEON #define HAS_ARGBTOYROW_NEON +#define HAS_ARGBEXTRACTALPHAROW_NEON #define HAS_BGRATOUVROW_NEON #define HAS_BGRATOYROW_NEON #define HAS_COPYROW_NEON @@ -877,6 +879,14 @@ void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, int width); +void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width); +void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width); +void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width); +void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_a, + int width); +void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, uint8* dst_a, + int width); + void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width); void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 73fa7d28..b1b1f2c8 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -2374,6 +2374,49 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, return 0; } +// Extract just the alpha channel from ARGB. +LIBYUV_API +int ARGBExtractAlpha(const uint8* src_argb, int src_stride, + uint8* dst_a, int dst_stride, + int width, int height) { + if (!src_argb || !dst_a || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb += (height - 1) * src_stride; + src_stride = -src_stride; + } + // Coalesce rows. + if (src_stride == width * 4 && dst_stride == width) { + width *= height; + height = 1; + src_stride = dst_stride = 0; + } + void (*ARGBExtractAlphaRow)(const uint8 *src_argb, uint8 *dst_a, int width) = + ARGBExtractAlphaRow_C; +#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2 + : ARGBExtractAlphaRow_Any_SSE2; + } +#endif +#if defined(HAS_ARGBEXTRACTALPHAROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_NEON + : ARGBExtractAlphaRow_Any_NEON; + } +#endif + + for (int y = 0; y < height; ++y) { + ARGBExtractAlphaRow(src_argb, dst_a, width); + src_argb += src_stride; + dst_a += dst_stride; + } + return 0; +} + // Copy a planar Y channel to the alpha channel of a destination ARGB image. LIBYUV_API int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, diff --git a/source/row_any.cc b/source/row_any.cc index e1404eff..94ae0edd 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -466,6 +466,12 @@ ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) #ifdef HAS_ARGBATTENUATEROW_NEON ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) #endif +#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 +ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7) +#endif +#ifdef HAS_ARGBEXTRACTALPHAROW_NEON +ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 7) +#endif #undef ANY11 // Any 1 to 1 blended. Destination is read, modify, write. diff --git a/source/row_common.cc b/source/row_common.cc index 0c47e101..32d2f686 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2381,6 +2381,19 @@ void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { } } +void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) { + int i; + for (i = 0; i < width - 1; i += 2) { + dst_a[0] = src_argb[3]; + dst_a[1] = src_argb[7]; + dst_a += 2; + src_argb += 8; + } + if (width & 1) { + dst_a[0] = src_argb[3]; + } +} + void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { int i; for (i = 0; i < width - 1; i += 2) { diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 866bded7..7e060664 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -2936,6 +2936,33 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { } #endif // HAS_ARGBCOPYALPHAROW_AVX2 +#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 +// width in pixels +void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) { + asm volatile ( + LABELALIGN + "1: \n" + "movdqu " MEMACCESS(0) ", %%xmm0 \n" + "movdqu " MEMACCESS2(0x10, 0) ", %%xmm1 \n" + "lea " MEMLEA(0x20, 0) ", %0 \n" + "psrld $0x18, %%xmm0 \n" + "psrld $0x18, %%xmm1 \n" + "packssdw %%xmm1, %%xmm0 \n" + "packuswb %%xmm0, %%xmm0 \n" + "movq %%xmm0," MEMACCESS(1) " \n" + "lea " MEMLEA(0x8, 1) ", %1 \n" + "sub $0x8, %2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_a), // %1 + "+rm"(width) // %2 + : + : "memory", "cc" + , "xmm0", "xmm1" + ); +} +#endif // HAS_ARGBEXTRACTALPHAROW_SSE2 + #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 // width in pixels void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { diff --git a/source/row_neon.cc b/source/row_neon.cc index 91d6aa85..7574cee8 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1298,6 +1298,23 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { ); } +void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { + asm volatile ( + "1: \n" + MEMACCESS(0) + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels + "subs %2, %2, #8 \n" // 8 processed per loop + MEMACCESS(1) + "vst1.8 {d3}, [%1]! \n" // store 8 A's. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_a), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List + ); +} + void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { asm volatile ( "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient diff --git a/source/row_neon64.cc b/source/row_neon64.cc index ee42af12..e5f2dc8f 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -1399,6 +1399,25 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { } #endif // HAS_ARGBTOYROW_NEON +#ifdef HAS_ARGBEXTRACTALPHAROW_NEON +void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { + asm volatile ( + "1: \n" + MEMACCESS(0) + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels + "subs %w2, %w2, #8 \n" // 8 processed per loop + MEMACCESS(1) + "st1 {v3.8b}, [%1], #8 \n" // store 8 A's. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_a), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List + ); +} +#endif // HAS_ARGBEXTRACTALPHAROW_NEON + #ifdef HAS_ARGBTOYJROW_NEON void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { asm volatile ( diff --git a/source/row_win.cc b/source/row_win.cc index a8c16c3c..dc325fb9 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -3532,6 +3532,33 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { } #endif // HAS_ARGBCOPYALPHAROW_AVX2 +#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 +// width in pixels +__declspec(naked) +void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) { + __asm { + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_a + mov ecx, [esp + 12] // width + + extractloop: + movdqu xmm0, [eax] + movdqu xmm1, [eax + 16] + lea eax, [eax + 32] + psrld xmm0, 24 + psrld xmm1, 24 + packssdw xmm0, xmm1 + packuswb xmm0, xmm0 + movq qword ptr [edx], xmm0 + lea edx, [edx + 8] + sub ecx, 8 + jg extractloop + + ret + } +} +#endif // HAS_ARGBEXTRACTALPHAROW_SSE2 + #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 // width in pixels __declspec(naked) diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 9146c9a4..1974a033 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -2390,6 +2390,36 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) { free_aligned_buffer_64(orig_pixels); } +TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_64(src_pixels, kPixels * 4); + align_buffer_64(dst_pixels_opt, kPixels); + align_buffer_64(dst_pixels_c, kPixels); + + MemRandomize(src_pixels, kPixels * 4); + MemRandomize(dst_pixels_opt, kPixels); + memcpy(dst_pixels_c, dst_pixels_opt, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, + dst_pixels_c, benchmark_width_, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, + dst_pixels_opt, benchmark_width_, + benchmark_width_, benchmark_height_); + } + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_64(dst_pixels_c); + free_aligned_buffer_64(dst_pixels_opt); + free_aligned_buffer_64(src_pixels); +} + TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { const int kPixels = benchmark_width_ * benchmark_height_; align_buffer_64(orig_pixels, kPixels); |