aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2016-09-22 16:12:22 -0700
committerFrank Barchard <fbarchard@google.com>2016-09-22 16:12:22 -0700
commitc5323b0fdc3428b9341043e8adc2c2715a227330 (patch)
tree66ef37541c0632264ae45e3db484befcc6901286
parent5da918b48dd42281da74ca0c84a962c89d4d1430 (diff)
downloadlibyuv-c5323b0fdc3428b9341043e8adc2c2715a227330.tar.gz
Add MIPS SIMD Arch (MSA) optimized MirrorRow function
As per the preparation patch added in Chromium sources at, 2150943003: Add MIPS SIMD Arch (MSA) build flags for GYP/GN builds This patch adds first MSA optimized function in libYUV project. BUG=libyuv:634 R=fbarchard@google.com Review URL: https://codereview.chromium.org/2285683002 .
-rw-r--r--Android.mk6
-rw-r--r--BUILD.gn15
-rw-r--r--CMakeLists.txt2
-rw-r--r--docs/getting_started.md10
-rw-r--r--include/libyuv/cpu_id.h1
-rw-r--r--include/libyuv/macros_msa.h78
-rw-r--r--include/libyuv/row.h6
-rw-r--r--libyuv.gni3
-rw-r--r--libyuv.gyp11
-rw-r--r--libyuv.gypi2
-rw-r--r--libyuv_test.gyp6
-rw-r--r--source/cpu_id.cc38
-rw-r--r--source/planar_functions.cc8
-rw-r--r--source/rotate.cc8
-rw-r--r--source/row_any.cc3
-rw-r--r--source/row_msa.cc45
16 files changed, 242 insertions, 0 deletions
diff --git a/Android.mk b/Android.mk
index 4d2092ac..3988cb96 100644
--- a/Android.mk
+++ b/Android.mk
@@ -53,6 +53,12 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
source/scale_neon.cc.neon
endif
+ifeq ($(TARGET_ARCH_ABI),mips)
+ LOCAL_CFLAGS += -DLIBYUV_MSA
+ LOCAL_SRC_FILES += \
+ source/row_msa.cc
+endif
+
LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include
LOCAL_C_INCLUDES += $(LOCAL_PATH)/include
diff --git a/BUILD.gn b/BUILD.gn
index e2deffaf..fd8e2312 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -94,6 +94,10 @@ static_library("libyuv") {
deps += [ ":libyuv_neon" ]
}
+ if (libyuv_use_msa) {
+ deps += [ ":libyuv_msa" ]
+ }
+
if (is_nacl) {
# Always enable optimization under NaCl to workaround crbug.com/538243 .
configs -= [ "//build/config/compiler:default_optimization" ]
@@ -124,6 +128,17 @@ if (libyuv_use_neon) {
}
}
+if (libyuv_use_msa) {
+ static_library("libyuv_msa") {
+ sources = [
+ # MSA Source Files
+ "source/row_msa.cc",
+ ]
+
+ public_configs = [ ":libyuv_config" ]
+ }
+}
+
if (libyuv_include_tests) {
config("libyuv_unittest_warnings_config") {
if (!is_win) {
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 718b47ad..6b7d2ab1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -40,6 +40,7 @@ set(ly_source_files
${ly_src_dir}/row_any.cc
${ly_src_dir}/row_common.cc
${ly_src_dir}/row_mips.cc
+ ${ly_src_dir}/row_msa.cc
${ly_src_dir}/row_neon.cc
${ly_src_dir}/row_neon64.cc
${ly_src_dir}/row_gcc.cc
@@ -80,6 +81,7 @@ set(ly_header_files
${ly_inc_dir}/libyuv/convert_from.h
${ly_inc_dir}/libyuv/convert_from_argb.h
${ly_inc_dir}/libyuv/cpu_id.h
+ ${ly_inc_dir}/libyuv/macros_msa.h
${ly_inc_dir}/libyuv/planar_functions.h
${ly_inc_dir}/libyuv/rotate.h
${ly_inc_dir}/libyuv/rotate_argb.h
diff --git a/docs/getting_started.md b/docs/getting_started.md
index c119a82f..4a0948e6 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -195,6 +195,16 @@ Running test with C code:
gn gen out/Official "--args=is_debug=false is_official_build=true is_chrome_branded=true"
ninja -C out/Official
+#### Building mips with GN
+
+mipsel
+ gn gen out/Default "--args=is_debug=false target_cpu=\"mipsel\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
+ ninja -C out/Default
+
+mips64el
+ gn gen out/Default "--args=is_debug=false target_cpu=\"mips64el\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
+ ninja -C out/Default
+
### Linux
GYP_DEFINES="target_arch=x64" ./gyp_libyuv
diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h
index dfb7445e..0924cb3e 100644
--- a/include/libyuv/cpu_id.h
+++ b/include/libyuv/cpu_id.h
@@ -42,6 +42,7 @@ static const int kCpuHasAVX3 = 0x2000;
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x10000;
static const int kCpuHasDSPR2 = 0x20000;
+static const int kCpuHasMSA = 0x40000;
// Internal function used to auto-init.
LIBYUV_API
diff --git a/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h
new file mode 100644
index 00000000..641fbb26
--- /dev/null
+++ b/include/libyuv/macros_msa.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2016 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef __MACROS_MSA_H__
+#define __MACROS_MSA_H__
+
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#include <stdint.h>
+#include <msa.h>
+
+#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc))
+#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
+
+#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in)
+#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
+
+/* Description : Load two vectors with 16 'byte' sized elements
+ Arguments : Inputs - psrc, stride
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Load 16 byte elements in 'out0' from (psrc)
+ Load 16 byte elements in 'out1' from (psrc + stride)
+*/
+#define LD_B2(RTYPE, psrc, stride, out0, out1) { \
+ out0 = LD_B(RTYPE, (psrc)); \
+ out1 = LD_B(RTYPE, (psrc) + stride); \
+}
+#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
+#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)
+
+#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
+ LD_B2(RTYPE, (psrc), stride, out0, out1); \
+ LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
+}
+#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
+#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)
+
+/* Description : Store two vectors with stride each having 16 'byte' sized
+ elements
+ Arguments : Inputs - in0, in1, pdst, stride
+ Details : Store 16 byte elements from 'in0' to (pdst)
+ Store 16 byte elements from 'in1' to (pdst + stride)
+*/
+#define ST_B2(RTYPE, in0, in1, pdst, stride) { \
+ ST_B(RTYPE, in0, (pdst)); \
+ ST_B(RTYPE, in1, (pdst) + stride); \
+}
+#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
+#define ST_SB2(...) ST_B2(v16i8, __VA_ARGS__)
+
+#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \
+ ST_B2(RTYPE, in0, in1, (pdst), stride); \
+ ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
+}
+#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
+#define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__)
+
+/* Description : Shuffle byte vector elements as per mask vector
+ Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Byte elements from 'in0' & 'in1' are copied selectively to
+ 'out0' as per control vector 'mask0'
+*/
+#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
+ out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \
+ out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \
+}
+#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
+#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
+#endif /* __MACROS_MSA_H__ */
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 013a7e53..d1ba8919 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -372,6 +372,10 @@ extern "C" {
#endif
#endif
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#define HAS_MIRRORROW_MSA
+#endif
+
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
#if defined(VISUALC_HAS_AVX2)
#define SIMD_ALIGNED(var) __declspec(align(32)) var
@@ -809,11 +813,13 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width);
+void MirrorRow_MSA(const uint8* src, uint8* dst, int width);
void MirrorRow_C(const uint8* src, uint8* dst, int width);
void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
+void MirrorRow_Any_MSA(const uint8* src, uint8* dst, int width);
void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width);
diff --git a/libyuv.gni b/libyuv.gni
index 2f6930d7..89e4d382 100644
--- a/libyuv.gni
+++ b/libyuv.gni
@@ -8,10 +8,13 @@
import("//build_overrides/build.gni")
import("//build/config/arm.gni")
+import("//build/config/mips.gni")
declare_args() {
libyuv_include_tests = !build_with_chromium
libyuv_disable_jpeg = false
libyuv_use_neon = (current_cpu == "arm64" ||
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon)))
+ libyuv_use_msa = (current_cpu == "mips64el" || current_cpu == "mipsel") &&
+ mips_use_msa
}
diff --git a/libyuv.gyp b/libyuv.gyp
index abd012ac..7c6c8046 100644
--- a/libyuv.gyp
+++ b/libyuv.gyp
@@ -26,12 +26,18 @@
# Link-Time Optimizations.
'use_lto%': 0,
'build_neon': 0,
+ 'build_msa': 0,
'conditions': [
['(target_arch == "armv7" or target_arch == "armv7s" or \
(target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
and (arm_neon == 1 or arm_neon_optional == 1)', {
'build_neon': 1,
}],
+ ['(target_arch == "mipsel" or target_arch == "mips64el")\
+ and (mips_msa == 1)',
+ {
+ 'build_msa': 1,
+ }],
],
},
@@ -79,6 +85,11 @@
}],
],
}],
+ ['build_msa != 0', {
+ 'defines': [
+ 'LIBYUV_MSA',
+ ],
+ }],
['OS != "ios" and libyuv_disable_jpeg != 1', {
'defines': [
'HAVE_JPEG'
diff --git a/libyuv.gypi b/libyuv.gypi
index 73fdec0a..4f68a065 100644
--- a/libyuv.gypi
+++ b/libyuv.gypi
@@ -18,6 +18,7 @@
'include/libyuv/convert_from.h',
'include/libyuv/convert_from_argb.h',
'include/libyuv/cpu_id.h',
+ 'include/libyuv/macros_msa.h',
'include/libyuv/mjpeg_decoder.h',
'include/libyuv/planar_functions.h',
'include/libyuv/rotate.h',
@@ -61,6 +62,7 @@
'source/row_common.cc',
'source/row_gcc.cc',
'source/row_mips.cc',
+ 'source/row_msa.cc',
'source/row_neon.cc',
'source/row_neon64.cc',
'source/row_win.cc',
diff --git a/libyuv_test.gyp b/libyuv_test.gyp
index b8ceca1f..2d70ee09 100644
--- a/libyuv_test.gyp
+++ b/libyuv_test.gyp
@@ -86,6 +86,12 @@
'LIBYUV_NEON'
],
}],
+ [ '(target_arch == "mipsel" or target_arch == "mips64el") \
+ and (mips_msa == 1)', {
+ 'defines': [
+ 'LIBYUV_MSA'
+ ],
+ }],
], # conditions
'defines': [
# Enable the following 3 macros to turn off assembly for specified CPU.
diff --git a/source/cpu_id.cc b/source/cpu_id.cc
index 84927ebc..aaf58cf0 100644
--- a/source/cpu_id.cc
+++ b/source/cpu_id.cc
@@ -161,6 +161,38 @@ int ArmCpuCaps(const char* cpuinfo_name) {
return 0;
}
+LIBYUV_API SAFEBUFFERS
+int MipsCpuCaps(const char* cpuinfo_name, const char ase[]) {
+ char cpuinfo_line[512];
+ int len = strlen(ase);
+ FILE* f = fopen(cpuinfo_name, "r");
+ if (!f) {
+ // ase enabled if /proc/cpuinfo is unavailable.
+ if(strcmp(ase, " msa") == 0) {
+ return kCpuHasMSA;
+ }
+ if(strcmp(ase, " dspr2") == 0) {
+ return kCpuHasDSPR2;
+ }
+ }
+ while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
+ if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
+ char* p = strstr(cpuinfo_line, ase);
+ if (p && (p[len] == ' ' || p[len] == '\n')) {
+ fclose(f);
+ if(strcmp(ase, " msa") == 0) {
+ return kCpuHasMSA;
+ }
+ if(strcmp(ase, " dspr2") == 0) {
+ return kCpuHasDSPR2;
+ }
+ }
+ }
+ }
+ fclose(f);
+ return 0;
+}
+
// CPU detect function for SIMD instruction sets.
LIBYUV_API
int cpu_info_ = 0; // cpu_info is not initialized yet.
@@ -254,10 +286,16 @@ int InitCpuFlags(void) {
#if defined(__mips_dspr2)
cpu_info |= kCpuHasDSPR2;
#endif
+#if defined(__mips_msa)
+ cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
+#endif
cpu_info |= kCpuHasMIPS;
if (getenv("LIBYUV_DISABLE_DSPR2")) {
cpu_info &= ~kCpuHasDSPR2;
}
+ if (getenv("LIBYUV_DISABLE_MSA")) {
+ cpu_info &= ~kCpuHasMSA;
+ }
#endif
#if defined(__arm__) || defined(__aarch64__)
// gcc -mfpu=neon defines __ARM_NEON__
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index a764f8da..71f39b3b 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -402,6 +402,14 @@ void MirrorPlane(const uint8* src_y, int src_stride_y,
MirrorRow = MirrorRow_DSPR2;
}
#endif
+#if defined(HAS_MIRRORROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ MirrorRow = MirrorRow_Any_MSA;
+ if (IS_ALIGNED(width, 64)) {
+ MirrorRow = MirrorRow_MSA;
+ }
+}
+#endif
// Mirror plane
for (y = 0; y < height; ++y) {
diff --git a/source/rotate.cc b/source/rotate.cc
index 01ea5c40..bd36e81f 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -141,6 +141,14 @@ void RotatePlane180(const uint8* src, int src_stride,
MirrorRow = MirrorRow_DSPR2;
}
#endif
+#if defined(HAS_MIRRORROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ MirrorRow = MirrorRow_Any_MSA;
+ if (IS_ALIGNED(width, 64)) {
+ MirrorRow = MirrorRow_MSA;
+ }
+}
+#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
diff --git a/source/row_any.cc b/source/row_any.cc
index 494164fd..14a59718 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -631,6 +631,9 @@ ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
#ifdef HAS_MIRRORROW_NEON
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
#endif
+#ifdef HAS_MIRRORROW_MSA
+ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
+#endif
#ifdef HAS_ARGBMIRRORROW_AVX2
ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
#endif
diff --git a/source/row_msa.cc b/source/row_msa.cc
new file mode 100644
index 00000000..6dd6f5f3
--- /dev/null
+++ b/source/row_msa.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2016 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#include "libyuv/macros_msa.h"
+#endif
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+void MirrorRow_MSA(const uint8* src, uint8* dst, int width) {
+ int count;
+ v16u8 src0, src1, src2, src3;
+ v16u8 dst0, dst1, dst2, dst3;
+ v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+
+ src += width - 64;
+
+ for (count = 0; count < width; count += 64) {
+ LD_UB4(src, 16, src3, src2, src1, src0);
+ VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2);
+ VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0);
+ ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
+ dst += 64;
+ src -= 64;
+ }
+}
+#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif