diff options
author | Frank Barchard <fbarchard@google.com> | 2016-09-22 16:12:22 -0700 |
---|---|---|
committer | Frank Barchard <fbarchard@google.com> | 2016-09-22 16:12:22 -0700 |
commit | c5323b0fdc3428b9341043e8adc2c2715a227330 (patch) | |
tree | 66ef37541c0632264ae45e3db484befcc6901286 | |
parent | 5da918b48dd42281da74ca0c84a962c89d4d1430 (diff) | |
download | libyuv-c5323b0fdc3428b9341043e8adc2c2715a227330.tar.gz |
Add MIPS SIMD Arch (MSA) optimized MirrorRow function
As per the preparation patch added in Chromium sources at,
2150943003: Add MIPS SIMD Arch (MSA) build flags for GYP/GN builds
This patch adds first MSA optimized function in libYUV project.
BUG=libyuv:634
R=fbarchard@google.com
Review URL: https://codereview.chromium.org/2285683002 .
-rw-r--r-- | Android.mk | 6 | ||||
-rw-r--r-- | BUILD.gn | 15 | ||||
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | docs/getting_started.md | 10 | ||||
-rw-r--r-- | include/libyuv/cpu_id.h | 1 | ||||
-rw-r--r-- | include/libyuv/macros_msa.h | 78 | ||||
-rw-r--r-- | include/libyuv/row.h | 6 | ||||
-rw-r--r-- | libyuv.gni | 3 | ||||
-rw-r--r-- | libyuv.gyp | 11 | ||||
-rw-r--r-- | libyuv.gypi | 2 | ||||
-rw-r--r-- | libyuv_test.gyp | 6 | ||||
-rw-r--r-- | source/cpu_id.cc | 38 | ||||
-rw-r--r-- | source/planar_functions.cc | 8 | ||||
-rw-r--r-- | source/rotate.cc | 8 | ||||
-rw-r--r-- | source/row_any.cc | 3 | ||||
-rw-r--r-- | source/row_msa.cc | 45 |
16 files changed, 242 insertions, 0 deletions
@@ -53,6 +53,12 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) source/scale_neon.cc.neon endif +ifeq ($(TARGET_ARCH_ABI),mips) + LOCAL_CFLAGS += -DLIBYUV_MSA + LOCAL_SRC_FILES += \ + source/row_msa.cc +endif + LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include LOCAL_C_INCLUDES += $(LOCAL_PATH)/include @@ -94,6 +94,10 @@ static_library("libyuv") { deps += [ ":libyuv_neon" ] } + if (libyuv_use_msa) { + deps += [ ":libyuv_msa" ] + } + if (is_nacl) { # Always enable optimization under NaCl to workaround crbug.com/538243 . configs -= [ "//build/config/compiler:default_optimization" ] @@ -124,6 +128,17 @@ if (libyuv_use_neon) { } } +if (libyuv_use_msa) { + static_library("libyuv_msa") { + sources = [ + # MSA Source Files + "source/row_msa.cc", + ] + + public_configs = [ ":libyuv_config" ] + } +} + if (libyuv_include_tests) { config("libyuv_unittest_warnings_config") { if (!is_win) { diff --git a/CMakeLists.txt b/CMakeLists.txt index 718b47ad..6b7d2ab1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ set(ly_source_files ${ly_src_dir}/row_any.cc ${ly_src_dir}/row_common.cc ${ly_src_dir}/row_mips.cc + ${ly_src_dir}/row_msa.cc ${ly_src_dir}/row_neon.cc ${ly_src_dir}/row_neon64.cc ${ly_src_dir}/row_gcc.cc @@ -80,6 +81,7 @@ set(ly_header_files ${ly_inc_dir}/libyuv/convert_from.h ${ly_inc_dir}/libyuv/convert_from_argb.h ${ly_inc_dir}/libyuv/cpu_id.h + ${ly_inc_dir}/libyuv/macros_msa.h ${ly_inc_dir}/libyuv/planar_functions.h ${ly_inc_dir}/libyuv/rotate.h ${ly_inc_dir}/libyuv/rotate_argb.h diff --git a/docs/getting_started.md b/docs/getting_started.md index c119a82f..4a0948e6 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -195,6 +195,16 @@ Running test with C code: gn gen out/Official "--args=is_debug=false is_official_build=true is_chrome_branded=true" ninja -C out/Official +#### Building mips with GN + +mipsel + gn gen out/Default "--args=is_debug=false target_cpu=\"mipsel\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false" + ninja -C out/Default + +mips64el + gn gen out/Default "--args=is_debug=false target_cpu=\"mips64el\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false" + ninja -C out/Default + ### Linux GYP_DEFINES="target_arch=x64" ./gyp_libyuv diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index dfb7445e..0924cb3e 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -42,6 +42,7 @@ static const int kCpuHasAVX3 = 0x2000; // These flags are only valid on MIPS processors. static const int kCpuHasMIPS = 0x10000; static const int kCpuHasDSPR2 = 0x20000; +static const int kCpuHasMSA = 0x40000; // Internal function used to auto-init. LIBYUV_API diff --git a/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h new file mode 100644 index 00000000..641fbb26 --- /dev/null +++ b/include/libyuv/macros_msa.h @@ -0,0 +1,78 @@ +/* + * Copyright 2016 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef __MACROS_MSA_H__ +#define __MACROS_MSA_H__ + +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#include <stdint.h> +#include <msa.h> + +#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) +#define LD_UB(...) LD_B(v16u8, __VA_ARGS__) + +#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) +#define ST_UB(...) ST_B(v16u8, __VA_ARGS__) + +/* Description : Load two vectors with 16 'byte' sized elements + Arguments : Inputs - psrc, stride + Outputs - out0, out1 + Return Type - as per RTYPE + Details : Load 16 byte elements in 'out0' from (psrc) + Load 16 byte elements in 'out1' from (psrc + stride) +*/ +#define LD_B2(RTYPE, psrc, stride, out0, out1) { \ + out0 = LD_B(RTYPE, (psrc)); \ + out1 = LD_B(RTYPE, (psrc) + stride); \ +} +#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__) +#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__) + +#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \ + LD_B2(RTYPE, (psrc), stride, out0, out1); \ + LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \ +} +#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__) +#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__) + +/* Description : Store two vectors with stride each having 16 'byte' sized + elements + Arguments : Inputs - in0, in1, pdst, stride + Details : Store 16 byte elements from 'in0' to (pdst) + Store 16 byte elements from 'in1' to (pdst + stride) +*/ +#define ST_B2(RTYPE, in0, in1, pdst, stride) { \ + ST_B(RTYPE, in0, (pdst)); \ + ST_B(RTYPE, in1, (pdst) + stride); \ +} +#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) +#define ST_SB2(...) ST_B2(v16i8, __VA_ARGS__) + +#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \ + ST_B2(RTYPE, in0, in1, (pdst), stride); \ + ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ +} +#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) +#define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__) + +/* Description : Shuffle byte vector elements as per mask vector + Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 + Outputs - out0, out1 + Return Type - as per RTYPE + Details : Byte elements from 'in0' & 'in1' are copied selectively to + 'out0' as per control vector 'mask0' +*/ +#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \ + out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \ + out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \ +} +#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) +#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ +#endif /* __MACROS_MSA_H__ */ diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 013a7e53..d1ba8919 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -372,6 +372,10 @@ extern "C" { #endif #endif +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#define HAS_MIRRORROW_MSA +#endif + #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) #if defined(VISUALC_HAS_AVX2) #define SIMD_ALIGNED(var) __declspec(align(32)) var @@ -809,11 +813,13 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); void MirrorRow_NEON(const uint8* src, uint8* dst, int width); void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width); +void MirrorRow_MSA(const uint8* src, uint8* dst, int width); void MirrorRow_C(const uint8* src, uint8* dst, int width); void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width); void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width); void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width); +void MirrorRow_Any_MSA(const uint8* src, uint8* dst, int width); void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); @@ -8,10 +8,13 @@ import("//build_overrides/build.gni") import("//build/config/arm.gni") +import("//build/config/mips.gni") declare_args() { libyuv_include_tests = !build_with_chromium libyuv_disable_jpeg = false libyuv_use_neon = (current_cpu == "arm64" || (current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon))) + libyuv_use_msa = (current_cpu == "mips64el" || current_cpu == "mipsel") && + mips_use_msa } @@ -26,12 +26,18 @@ # Link-Time Optimizations. 'use_lto%': 0, 'build_neon': 0, + 'build_msa': 0, 'conditions': [ ['(target_arch == "armv7" or target_arch == "armv7s" or \ (target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\ and (arm_neon == 1 or arm_neon_optional == 1)', { 'build_neon': 1, }], + ['(target_arch == "mipsel" or target_arch == "mips64el")\ + and (mips_msa == 1)', + { + 'build_msa': 1, + }], ], }, @@ -79,6 +85,11 @@ }], ], }], + ['build_msa != 0', { + 'defines': [ + 'LIBYUV_MSA', + ], + }], ['OS != "ios" and libyuv_disable_jpeg != 1', { 'defines': [ 'HAVE_JPEG' diff --git a/libyuv.gypi b/libyuv.gypi index 73fdec0a..4f68a065 100644 --- a/libyuv.gypi +++ b/libyuv.gypi @@ -18,6 +18,7 @@ 'include/libyuv/convert_from.h', 'include/libyuv/convert_from_argb.h', 'include/libyuv/cpu_id.h', + 'include/libyuv/macros_msa.h', 'include/libyuv/mjpeg_decoder.h', 'include/libyuv/planar_functions.h', 'include/libyuv/rotate.h', @@ -61,6 +62,7 @@ 'source/row_common.cc', 'source/row_gcc.cc', 'source/row_mips.cc', + 'source/row_msa.cc', 'source/row_neon.cc', 'source/row_neon64.cc', 'source/row_win.cc', diff --git a/libyuv_test.gyp b/libyuv_test.gyp index b8ceca1f..2d70ee09 100644 --- a/libyuv_test.gyp +++ b/libyuv_test.gyp @@ -86,6 +86,12 @@ 'LIBYUV_NEON' ], }], + [ '(target_arch == "mipsel" or target_arch == "mips64el") \ + and (mips_msa == 1)', { + 'defines': [ + 'LIBYUV_MSA' + ], + }], ], # conditions 'defines': [ # Enable the following 3 macros to turn off assembly for specified CPU. diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 84927ebc..aaf58cf0 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -161,6 +161,38 @@ int ArmCpuCaps(const char* cpuinfo_name) { return 0; } +LIBYUV_API SAFEBUFFERS +int MipsCpuCaps(const char* cpuinfo_name, const char ase[]) { + char cpuinfo_line[512]; + int len = strlen(ase); + FILE* f = fopen(cpuinfo_name, "r"); + if (!f) { + // ase enabled if /proc/cpuinfo is unavailable. + if(strcmp(ase, " msa") == 0) { + return kCpuHasMSA; + } + if(strcmp(ase, " dspr2") == 0) { + return kCpuHasDSPR2; + } + } + while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { + if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) { + char* p = strstr(cpuinfo_line, ase); + if (p && (p[len] == ' ' || p[len] == '\n')) { + fclose(f); + if(strcmp(ase, " msa") == 0) { + return kCpuHasMSA; + } + if(strcmp(ase, " dspr2") == 0) { + return kCpuHasDSPR2; + } + } + } + } + fclose(f); + return 0; +} + // CPU detect function for SIMD instruction sets. LIBYUV_API int cpu_info_ = 0; // cpu_info is not initialized yet. @@ -254,10 +286,16 @@ int InitCpuFlags(void) { #if defined(__mips_dspr2) cpu_info |= kCpuHasDSPR2; #endif +#if defined(__mips_msa) + cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa"); +#endif cpu_info |= kCpuHasMIPS; if (getenv("LIBYUV_DISABLE_DSPR2")) { cpu_info &= ~kCpuHasDSPR2; } + if (getenv("LIBYUV_DISABLE_MSA")) { + cpu_info &= ~kCpuHasMSA; + } #endif #if defined(__arm__) || defined(__aarch64__) // gcc -mfpu=neon defines __ARM_NEON__ diff --git a/source/planar_functions.cc b/source/planar_functions.cc index a764f8da..71f39b3b 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -402,6 +402,14 @@ void MirrorPlane(const uint8* src_y, int src_stride_y, MirrorRow = MirrorRow_DSPR2; } #endif +#if defined(HAS_MIRRORROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + MirrorRow = MirrorRow_Any_MSA; + if (IS_ALIGNED(width, 64)) { + MirrorRow = MirrorRow_MSA; + } +} +#endif // Mirror plane for (y = 0; y < height; ++y) { diff --git a/source/rotate.cc b/source/rotate.cc index 01ea5c40..bd36e81f 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -141,6 +141,14 @@ void RotatePlane180(const uint8* src, int src_stride, MirrorRow = MirrorRow_DSPR2; } #endif +#if defined(HAS_MIRRORROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + MirrorRow = MirrorRow_Any_MSA; + if (IS_ALIGNED(width, 64)) { + MirrorRow = MirrorRow_MSA; + } +} +#endif #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; diff --git a/source/row_any.cc b/source/row_any.cc index 494164fd..14a59718 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -631,6 +631,9 @@ ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15) #ifdef HAS_MIRRORROW_NEON ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15) #endif +#ifdef HAS_MIRRORROW_MSA +ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63) +#endif #ifdef HAS_ARGBMIRRORROW_AVX2 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7) #endif diff --git a/source/row_msa.cc b/source/row_msa.cc new file mode 100644 index 00000000..6dd6f5f3 --- /dev/null +++ b/source/row_msa.cc @@ -0,0 +1,45 @@ +/* + * Copyright 2016 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/row.h" + +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#include "libyuv/macros_msa.h" +#endif + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { + int count; + v16u8 src0, src1, src2, src3; + v16u8 dst0, dst1, dst2, dst3; + v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + + src += width - 64; + + for (count = 0; count < width; count += 64) { + LD_UB4(src, 16, src3, src2, src1, src0); + VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2); + VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0); + ST_UB4(dst0, dst1, dst2, dst3, dst, 16); + dst += 64; + src -= 64; + } +} +#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif |