aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2017-12-13 17:38:52 -0800
committerCommit Bot <commit-bot@chromium.org>2017-12-14 18:22:16 +0000
commit3b81288ecef7ff63ca773040431cba728c9a3621 (patch)
tree05d07d0a6af8541c11e338aab6074a6408fe66f7
parentbb3180ae807ddf55335926d5f53b3856e2882b1c (diff)
downloadlibyuv-3b81288ecef7ff63ca773040431cba728c9a3621.tar.gz
Remove Mips DSPR2 code
Bug: libyuv:765 Test: build for mips still passes Change-Id: I99105ad3951d2210c0793e3b9241c178442fdc37 Reviewed-on: https://chromium-review.googlesource.com/826404 Reviewed-by: Weiyong Yao <braveyao@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
-rw-r--r--Android.bp3
-rw-r--r--Android.mk3
-rw-r--r--BUILD.gn4
-rw-r--r--README.md8
-rw-r--r--docs/environment_variables.md2
-rw-r--r--docs/getting_started.md11
-rw-r--r--include/libyuv/cpu_id.h3
-rw-r--r--include/libyuv/rotate_row.h35
-rw-r--r--include/libyuv/row.h194
-rw-r--r--include/libyuv/scale_row.h55
-rw-r--r--libyuv.gyp1
-rw-r--r--libyuv.gypi3
-rw-r--r--libyuv_test.gyp1
-rw-r--r--linux.mk3
-rw-r--r--source/convert.cc77
-rw-r--r--source/convert_argb.cc91
-rw-r--r--source/convert_from.cc33
-rw-r--r--source/convert_from_argb.cc96
-rw-r--r--source/cpu_id.cc10
-rw-r--r--source/planar_functions.cc45
-rw-r--r--source/rotate.cc35
-rw-r--r--source/rotate_any.cc6
-rw-r--r--source/rotate_argb.cc5
-rw-r--r--source/rotate_dspr2.cc475
-rw-r--r--source/row_any.cc54
-rw-r--r--source/row_dspr2.cc1721
-rw-r--r--source/scale.cc122
-rw-r--r--source/scale_any.cc3
-rw-r--r--source/scale_argb.cc30
-rw-r--r--source/scale_common.cc20
-rw-r--r--source/scale_dspr2.cc668
-rw-r--r--unit_test/cpu_test.cc2
-rw-r--r--util/cpuid.c4
33 files changed, 13 insertions, 3810 deletions
diff --git a/Android.bp b/Android.bp
index a3d8d834..7d95a786 100644
--- a/Android.bp
+++ b/Android.bp
@@ -24,14 +24,12 @@ cc_library {
"source/rotate_any.cc",
"source/rotate_argb.cc",
"source/rotate_common.cc",
- "source/rotate_dspr2.cc",
"source/rotate_gcc.cc",
"source/rotate_msa.cc",
"source/rotate_neon.cc",
"source/rotate_neon64.cc",
"source/row_any.cc",
"source/row_common.cc",
- "source/row_dspr2.cc",
"source/row_gcc.cc",
"source/row_msa.cc",
"source/row_neon.cc",
@@ -40,7 +38,6 @@ cc_library {
"source/scale_any.cc",
"source/scale_argb.cc",
"source/scale_common.cc",
- "source/scale_dspr2.cc",
"source/scale_gcc.cc",
"source/scale_msa.cc",
"source/scale_neon.cc",
diff --git a/Android.mk b/Android.mk
index 85402061..dbc6cad3 100644
--- a/Android.mk
+++ b/Android.mk
@@ -24,14 +24,12 @@ LOCAL_SRC_FILES := \
source/rotate_any.cc \
source/rotate_argb.cc \
source/rotate_common.cc \
- source/rotate_dspr2.cc \
source/rotate_gcc.cc \
source/rotate_msa.cc \
source/rotate_neon.cc \
source/rotate_neon64.cc \
source/row_any.cc \
source/row_common.cc \
- source/row_dspr2.cc \
source/row_gcc.cc \
source/row_msa.cc \
source/row_neon.cc \
@@ -40,7 +38,6 @@ LOCAL_SRC_FILES := \
source/scale_any.cc \
source/scale_argb.cc \
source/scale_common.cc \
- source/scale_dspr2.cc \
source/scale_gcc.cc \
source/scale_msa.cc \
source/scale_neon.cc \
diff --git a/BUILD.gn b/BUILD.gn
index 0b28db35..f583edfe 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -110,19 +110,16 @@ static_library("libyuv_internal") {
"source/rotate_any.cc",
"source/rotate_argb.cc",
"source/rotate_common.cc",
- "source/rotate_dspr2.cc",
"source/rotate_gcc.cc",
"source/rotate_win.cc",
"source/row_any.cc",
"source/row_common.cc",
- "source/row_dspr2.cc",
"source/row_gcc.cc",
"source/row_win.cc",
"source/scale.cc",
"source/scale_any.cc",
"source/scale_argb.cc",
"source/scale_common.cc",
- "source/scale_dspr2.cc",
"source/scale_gcc.cc",
"source/scale_win.cc",
"source/video_common.cc",
@@ -302,7 +299,6 @@ if (libyuv_include_tests) {
# Enable the following 3 macros to turn off assembly for specified CPU.
# "LIBYUV_DISABLE_X86",
# "LIBYUV_DISABLE_NEON",
- # "LIBYUV_DISABLE_DSPR2",
# Enable the following macro to build libyuv as a shared library (dll).
# "LIBYUV_USING_SHARED_LIBRARY"
]
diff --git a/README.md b/README.md
index b59b71c5..7b661922 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
**libyuv** is an open source project that includes YUV scaling and conversion functionality.
* Scale YUV to prepare content for compression, with point, bilinear or box filter.
-* Convert to YUV from webcam formats.
-* Convert from YUV to formats for rendering/effects.
+* Convert to YUV from webcam formats for compression.
+* Convert to RGB formats for rendering/effects.
* Rotate by 90/180/270 degrees to adjust for mobile devices in portrait mode.
-* Optimized for SSE2/SSSE3/AVX2 on x86/x64.
+* Optimized for SSSE3/AVX2 on x86/x64.
* Optimized for Neon on Arm.
-* Optimized for DSP R2 on Mips.
+* Optimized for MSA on Mips.
### Development
diff --git a/docs/environment_variables.md b/docs/environment_variables.md
index 9071c54d..c28d83e7 100644
--- a/docs/environment_variables.md
+++ b/docs/environment_variables.md
@@ -17,7 +17,7 @@ By default the cpu is detected and the most advanced form of SIMD is used. But
LIBYUV_DISABLE_AVX512BW
LIBYUV_DISABLE_ERMS
LIBYUV_DISABLE_FMA3
- LIBYUV_DISABLE_DSPR2
+ LIBYUV_DISABLE_MSA
LIBYUV_DISABLE_NEON
# Test Width/Height/Repeat
diff --git a/docs/getting_started.md b/docs/getting_started.md
index 58e05f3c..fefffce4 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -129,15 +129,10 @@ ia32
ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest
-mipsel
+mips
- gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mipsel\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false"
- gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mipsel\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false"
- ninja -v -C out/Debug libyuv_unittest
- ninja -v -C out/Release libyuv_unittest
-
- gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false"
- gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false"
+ gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=true"
+ gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=true"
ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest
diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h
index c2e9bbbd..14f735f5 100644
--- a/include/libyuv/cpu_id.h
+++ b/include/libyuv/cpu_id.h
@@ -47,8 +47,7 @@ static const int kCpuHasAVX512VPOPCNTDQ = 0x100000;
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x200000;
-static const int kCpuHasDSPR2 = 0x400000;
-static const int kCpuHasMSA = 0x800000;
+static const int kCpuHasMSA = 0x400000;
// Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags.
diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h
index 973fc152..7e9dfd2c 100644
--- a/include/libyuv/rotate_row.h
+++ b/include/libyuv/rotate_row.h
@@ -54,12 +54,6 @@ extern "C" {
#define HAS_TRANSPOSEUVWX8_NEON
#endif
-#if !defined(LIBYUV_DISABLE_DSPR2) && !defined(__native_client__) && \
- defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_TRANSPOSEWX8_DSPR2
-#define HAS_TRANSPOSEUVWX8_DSPR2
-#endif // defined(__mips__)
-
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_TRANSPOSEWX16_MSA
#define HAS_TRANSPOSEUVWX16_MSA
@@ -97,16 +91,6 @@ void TransposeWx8_Fast_SSSE3(const uint8* src,
uint8* dst,
int dst_stride,
int width);
-void TransposeWx8_DSPR2(const uint8* src,
- int src_stride,
- uint8* dst,
- int dst_stride,
- int width);
-void TransposeWx8_Fast_DSPR2(const uint8* src,
- int src_stride,
- uint8* dst,
- int dst_stride,
- int width);
void TransposeWx16_MSA(const uint8* src,
int src_stride,
uint8* dst,
@@ -128,11 +112,6 @@ void TransposeWx8_Fast_Any_SSSE3(const uint8* src,
uint8* dst,
int dst_stride,
int width);
-void TransposeWx8_Any_DSPR2(const uint8* src,
- int src_stride,
- uint8* dst,
- int dst_stride,
- int width);
void TransposeWx16_Any_MSA(const uint8* src,
int src_stride,
uint8* dst,
@@ -176,13 +155,6 @@ void TransposeUVWx8_NEON(const uint8* src,
uint8* dst_b,
int dst_stride_b,
int width);
-void TransposeUVWx8_DSPR2(const uint8* src,
- int src_stride,
- uint8* dst_a,
- int dst_stride_a,
- uint8* dst_b,
- int dst_stride_b,
- int width);
void TransposeUVWx16_MSA(const uint8* src,
int src_stride,
uint8* dst_a,
@@ -205,13 +177,6 @@ void TransposeUVWx8_Any_NEON(const uint8* src,
uint8* dst_b,
int dst_stride_b,
int width);
-void TransposeUVWx8_Any_DSPR2(const uint8* src,
- int src_stride,
- uint8* dst_a,
- int dst_stride_a,
- uint8* dst_b,
- int dst_stride_b,
- int width);
void TransposeUVWx16_Any_MSA(const uint8* src,
int src_stride,
uint8* dst_a,
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index cb719693..7c9ca04a 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -380,37 +380,6 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#define HAS_SCALESUMSAMPLES_NEON
#endif
-
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips__) && \
- (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
-#define HAS_COPYROW_MIPS
-#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_I422TOARGBROW_DSPR2
-#define HAS_INTERPOLATEROW_DSPR2
-#define HAS_MIRRORROW_DSPR2
-#define HAS_MIRRORUVROW_DSPR2
-#define HAS_SPLITUVROW_DSPR2
-#define HAS_RGB24TOARGBROW_DSPR2
-#define HAS_RAWTOARGBROW_DSPR2
-#define HAS_RGB565TOARGBROW_DSPR2
-#define HAS_ARGB1555TOARGBROW_DSPR2
-#define HAS_ARGB4444TOARGBROW_DSPR2
-#define HAS_I444TOARGBROW_DSPR2
-#define HAS_I422TOARGB4444ROW_DSPR2
-#define HAS_I422TOARGB1555ROW_DSPR2
-#define HAS_NV12TOARGBROW_DSPR2
-#define HAS_BGRATOUVROW_DSPR2
-#define HAS_BGRATOYROW_DSPR2
-#define HAS_ABGRTOUVROW_DSPR2
-#define HAS_ARGBTOYROW_DSPR2
-#define HAS_ABGRTOYROW_DSPR2
-#define HAS_RGBATOUVROW_DSPR2
-#define HAS_RGBATOYROW_DSPR2
-#define HAS_ARGBTOUVROW_DSPR2
-#endif
-#endif
-
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_ABGRTOUVROW_MSA
#define HAS_ABGRTOYROW_MSA
@@ -797,29 +766,6 @@ void I444ToARGBRow_MSA(const uint8* src_y,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I444ToARGBRow_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
-void I422ToARGB4444Row_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- const struct YuvConstants* yuvconstants,
- int width);
-void I422ToARGB1555Row_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- const struct YuvConstants* yuvconstants,
- int width);
-void NV12ToARGBRow_DSPR2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
void I422ToARGBRow_MSA(const uint8* src_y,
const uint8* src_u,
@@ -1021,30 +967,6 @@ void RGB24ToYRow_MSA(const uint8* src_rgb24, uint8* dst_y, int width);
void RAWToYRow_MSA(const uint8* src_raw, uint8* dst_y, int width);
void RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width);
void ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width);
-void BGRAToUVRow_DSPR2(const uint8* src_bgra,
- int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
- int width);
-void BGRAToYRow_DSPR2(const uint8* src_bgra, uint8* dst_y, int width);
-void ABGRToUVRow_DSPR2(const uint8* src_abgr,
- int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
- int width);
-void ARGBToYRow_DSPR2(const uint8* src_argb, uint8* dst_y, int width);
-void ABGRToYRow_DSPR2(const uint8* src_abgr, uint8* dst_y, int width);
-void RGBAToUVRow_DSPR2(const uint8* src_rgba,
- int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
- int width);
-void RGBAToYRow_DSPR2(const uint8* src_rgba, uint8* dst_y, int width);
-void ARGBToUVRow_DSPR2(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
- int width);
void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width);
void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width);
void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width);
@@ -1073,10 +995,6 @@ void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555,
uint8* dst_y,
int width);
-void BGRAToYRow_Any_DSPR2(const uint8* src_bgra, uint8* dst_y, int width);
-void ARGBToYRow_Any_DSPR2(const uint8* src_argb, uint8* dst_y, int width);
-void ABGRToYRow_Any_DSPR2(const uint8* src_abgr, uint8* dst_y, int width);
-void RGBAToYRow_Any_DSPR2(const uint8* src_rgba, uint8* dst_y, int width);
void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_y,
int width);
@@ -1263,26 +1181,6 @@ void ARGB1555ToUVRow_Any_MSA(const uint8* src_argb1555,
uint8* dst_u,
uint8* dst_v,
int width);
-void BGRAToUVRow_Any_DSPR2(const uint8* src_bgra,
- int src_stride_bgra,
- uint8* dst_u,
- uint8* dst_v,
- int width);
-void ABGRToUVRow_Any_DSPR2(const uint8* src_abgr,
- int src_stride_abgr,
- uint8* dst_u,
- uint8* dst_v,
- int width);
-void RGBAToUVRow_Any_DSPR2(const uint8* src_rgba,
- int src_stride_rgba,
- uint8* dst_u,
- uint8* dst_v,
- int width);
-void ARGBToUVRow_Any_DSPR2(const uint8* src_argb,
- int src_stride_argb,
- uint8* dst_u,
- uint8* dst_v,
- int width);
void ARGBToUVRow_C(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
@@ -1361,7 +1259,6 @@ void ARGBToUV444Row_C(const uint8* src_argb,
void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width);
void MirrorRow_MSA(const uint8* src, uint8* dst, int width);
void MirrorRow_C(const uint8* src, uint8* dst, int width);
void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
@@ -1378,10 +1275,6 @@ void MirrorUVRow_NEON(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
int width);
-void MirrorUVRow_DSPR2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
- int width);
void MirrorUVRow_MSA(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
@@ -1411,10 +1304,6 @@ void SplitUVRow_NEON(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
int width);
-void SplitUVRow_DSPR2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
- int width);
void SplitUVRow_MSA(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
void SplitUVRow_Any_SSE2(const uint8* src_uv,
uint8* dst_u,
@@ -1428,10 +1317,6 @@ void SplitUVRow_Any_NEON(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
int width);
-void SplitUVRow_Any_DSPR2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
- int width);
void SplitUVRow_Any_MSA(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
@@ -1707,15 +1592,6 @@ void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555,
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
uint8* dst_argb,
int width);
-void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width);
-void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width);
-void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565, uint8* dst_argb, int width);
-void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555,
- uint8* dst_argb,
- int width);
-void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444,
- uint8* dst_argb,
- int width);
void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
uint8* dst_argb,
int width);
@@ -1773,19 +1649,6 @@ void ARGB1555ToARGBRow_Any_MSA(const uint8* src_argb1555,
void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_argb,
int width);
-void RGB24ToARGBRow_Any_DSPR2(const uint8* src_rgb24,
- uint8* dst_argb,
- int width);
-void RAWToARGBRow_Any_DSPR2(const uint8* src_raw, uint8* dst_argb, int width);
-void RGB565ToARGBRow_Any_DSPR2(const uint8* src_rgb565,
- uint8* dst_argb,
- int width);
-void ARGB1555ToARGBRow_Any_DSPR2(const uint8* src_argb1555,
- uint8* dst_argb,
- int width);
-void ARGB4444ToARGBRow_Any_DSPR2(const uint8* src_argb4444,
- uint8* dst_argb,
- int width);
void ARGB4444ToARGBRow_Any_MSA(const uint8* src_argb4444,
uint8* dst_argb,
@@ -2543,53 +2406,6 @@ void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void I444ToARGBRow_Any_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
-void I422ToARGB4444Row_Any_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
-void I422ToARGBRow_Any_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
-void I422ToARGBRow_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
-void I422ToARGB1555Row_Any_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
-void I411ToARGBRow_Any_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
-void NV12ToARGBRow_Any_DSPR2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
-void I422ToARGBRow_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
void I444ToARGBRow_Any_MSA(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
@@ -3088,11 +2904,6 @@ void InterpolateRow_NEON(uint8* dst_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
-void InterpolateRow_DSPR2(uint8* dst_ptr,
- const uint8* src_ptr,
- ptrdiff_t src_stride_ptr,
- int width,
- int source_y_fraction);
void InterpolateRow_MSA(uint8* dst_ptr,
const uint8* src_ptr,
ptrdiff_t src_stride_ptr,
@@ -3113,11 +2924,6 @@ void InterpolateRow_Any_AVX2(uint8* dst_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
-void InterpolateRow_Any_DSPR2(uint8* dst_ptr,
- const uint8* src_ptr,
- ptrdiff_t src_stride_ptr,
- int width,
- int source_y_fraction);
void InterpolateRow_Any_MSA(uint8* dst_ptr,
const uint8* src_ptr,
ptrdiff_t src_stride_ptr,
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index c4a66aa0..3db46d39 100644
--- a/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -94,16 +94,6 @@ extern "C" {
#define HAS_SCALEARGBFILTERCOLS_NEON
#endif
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_DSPR2) && !defined(__native_client__) && \
- defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_SCALEROWDOWN2_DSPR2
-#define HAS_SCALEROWDOWN4_DSPR2
-#define HAS_SCALEROWDOWN34_DSPR2
-#define HAS_SCALEROWDOWN38_DSPR2
-#define HAS_SCALEADDROW_DSPR2
-#endif
-
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_SCALEADDROW_MSA
#define HAS_SCALEARGBCOLS_MSA
@@ -831,51 +821,6 @@ void ScaleFilterCols_Any_NEON(uint8* dst_ptr,
int x,
int dx);
-void ScaleRowDown2_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width);
-void ScaleRowDown2Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width);
-void ScaleRowDown4_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width);
-void ScaleRowDown4Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width);
-void ScaleRowDown34_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width);
-void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* d,
- int dst_width);
-void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* d,
- int dst_width);
-void ScaleRowDown38_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width);
-void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr,
- int dst_width);
-void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr,
- int dst_width);
-void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-void ScaleAddRow_Any_DSPR2(const uint8* src_ptr,
- uint16* dst_ptr,
- int src_width);
-
void ScaleRowDown2_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
diff --git a/libyuv.gyp b/libyuv.gyp
index f73a1a4b..e853ba31 100644
--- a/libyuv.gyp
+++ b/libyuv.gyp
@@ -121,7 +121,6 @@
# Enable the following 3 macros to turn off assembly for specified CPU.
# 'LIBYUV_DISABLE_X86',
# 'LIBYUV_DISABLE_NEON',
- # 'LIBYUV_DISABLE_DSPR2',
# Enable the following macro to build libyuv as a shared library (dll).
# 'LIBYUV_USING_SHARED_LIBRARY',
# TODO(fbarchard): Make these into gyp defines.
diff --git a/libyuv.gypi b/libyuv.gypi
index ec81bc9b..9467adfc 100644
--- a/libyuv.gypi
+++ b/libyuv.gypi
@@ -55,7 +55,6 @@
'source/rotate_argb.cc',
'source/rotate_common.cc',
'source/rotate_gcc.cc',
- 'source/rotate_dspr2.cc',
'source/rotate_msa.cc',
'source/rotate_neon.cc',
'source/rotate_neon64.cc',
@@ -63,7 +62,6 @@
'source/row_any.cc',
'source/row_common.cc',
'source/row_gcc.cc',
- 'source/row_dspr2.cc',
'source/row_msa.cc',
'source/row_neon.cc',
'source/row_neon64.cc',
@@ -73,7 +71,6 @@
'source/scale_argb.cc',
'source/scale_common.cc',
'source/scale_gcc.cc',
- 'source/scale_dspr2.cc',
'source/scale_msa.cc',
'source/scale_neon.cc',
'source/scale_neon64.cc',
diff --git a/libyuv_test.gyp b/libyuv_test.gyp
index 4222cf26..5fe154c6 100644
--- a/libyuv_test.gyp
+++ b/libyuv_test.gyp
@@ -100,7 +100,6 @@
# Enable the following 3 macros to turn off assembly for specified CPU.
# 'LIBYUV_DISABLE_X86',
# 'LIBYUV_DISABLE_NEON',
- # 'LIBYUV_DISABLE_DSPR2',
# Enable the following macro to build libyuv as a shared library (dll).
# 'LIBYUV_USING_SHARED_LIBRARY',
],
diff --git a/linux.mk b/linux.mk
index 7e9aa5e4..b84c89f9 100644
--- a/linux.mk
+++ b/linux.mk
@@ -32,14 +32,12 @@ LOCAL_OBJ_FILES := \
source/rotate.o \
source/rotate_common.o \
source/rotate_gcc.o \
- source/rotate_dspr2.o \
source/rotate_neon64.o \
source/rotate_neon.o \
source/rotate_win.o \
source/row_any.o \
source/row_common.o \
source/row_gcc.o \
- source/row_dspr2.o \
source/row_neon64.o \
source/row_neon.o \
source/row_win.o \
@@ -48,7 +46,6 @@ LOCAL_OBJ_FILES := \
source/scale.o \
source/scale_common.o \
source/scale_gcc.o \
- source/scale_dspr2.o \
source/scale_neon64.o \
source/scale_neon.o \
source/scale_win.o \
diff --git a/source/convert.cc b/source/convert.cc
index dfa83a5a..ef78fb5f 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -212,11 +212,6 @@ static void CopyPlane2(const uint8* src,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
// Copy plane
for (y = 0; y < height - 1; y += 2) {
@@ -579,14 +574,6 @@ int ARGBToI420(const uint8* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToYRow = ARGBToYRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
@@ -595,14 +582,6 @@ int ARGBToI420(const uint8* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOUVROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_ARGBTOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVRow = ARGBToUVRow_Any_MSA;
@@ -680,22 +659,6 @@ int BGRAToI420(const uint8* src_bgra,
}
}
#endif
-#if defined(HAS_BGRATOYROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- BGRAToYRow = BGRAToYRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- BGRAToYRow = BGRAToYRow_DSPR2;
- }
- }
-#endif
-#if defined(HAS_BGRATOUVROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- BGRAToUVRow = BGRAToUVRow_Any_DSPR2;
- if (IS_ALIGNED(width, 16)) {
- BGRAToUVRow = BGRAToUVRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_BGRATOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
BGRAToYRow = BGRAToYRow_Any_MSA;
@@ -781,22 +744,6 @@ int ABGRToI420(const uint8* src_abgr,
}
}
#endif
-#if defined(HAS_ABGRTOYROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ABGRToYRow = ABGRToYRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- ABGRToYRow = ABGRToYRow_DSPR2;
- }
- }
-#endif
-#if defined(HAS_ABGRTOUVROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ABGRToUVRow = ABGRToUVRow_Any_DSPR2;
- if (IS_ALIGNED(width, 16)) {
- ABGRToUVRow = ABGRToUVRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_ABGRTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToYRow = ABGRToYRow_Any_MSA;
@@ -882,22 +829,6 @@ int RGBAToI420(const uint8* src_rgba,
}
}
#endif
-#if defined(HAS_RGBATOYROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- RGBAToYRow = RGBAToYRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- RGBAToYRow = RGBAToYRow_DSPR2;
- }
- }
-#endif
-#if defined(HAS_RGBATOUVROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- RGBAToUVRow = RGBAToUVRow_Any_DSPR2;
- if (IS_ALIGNED(width, 16)) {
- RGBAToUVRow = RGBAToUVRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_RGBATOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RGBAToYRow = RGBAToYRow_Any_MSA;
@@ -1287,14 +1218,6 @@ int RGB565ToI420(const uint8* src_rgb565,
}
}
#endif
-#if defined(HAS_RGB565TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
- }
- }
-#endif
#endif
{
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA))
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index 5b6ddadb..2e7c0f8f 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -97,15 +97,6 @@ static int I420ToARGBMatrix(const uint8* src_y,
}
}
#endif
-#if defined(HAS_I422TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- I422ToARGBRow = I422ToARGBRow_DSPR2;
- }
-#endif
#if defined(HAS_I422TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGBRow = I422ToARGBRow_Any_MSA;
@@ -292,15 +283,6 @@ static int I422ToARGBMatrix(const uint8* src_y,
}
}
#endif
-#if defined(HAS_I422TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- I422ToARGBRow = I422ToARGBRow_DSPR2;
- }
-#endif
#if defined(HAS_I422TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGBRow = I422ToARGBRow_Any_MSA;
@@ -769,14 +751,6 @@ static int I444ToARGBMatrix(const uint8* src_y,
}
}
#endif
-#if defined(HAS_I444TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- I444ToARGBRow = I444ToARGBRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- I444ToARGBRow = I444ToARGBRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_I444TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I444ToARGBRow = I444ToARGBRow_Any_MSA;
@@ -905,15 +879,6 @@ static int I420AlphaToARGBMatrix(const uint8* src_y,
}
}
#endif
-#if defined(HAS_I422ALPHATOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- I422AlphaToARGBRow = I422AlphaToARGBRow_DSPR2;
- }
-#endif
#if defined(HAS_I422ALPHATOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_MSA;
@@ -1262,14 +1227,6 @@ int RGB24ToARGB(const uint8* src_rgb24,
}
}
#endif
-#if defined(HAS_RGB24TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- RGB24ToARGBRow = RGB24ToARGBRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- RGB24ToARGBRow = RGB24ToARGBRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_RGB24TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_MSA;
@@ -1329,14 +1286,6 @@ int RAWToARGB(const uint8* src_raw,
}
}
#endif
-#if defined(HAS_RAWTOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- RAWToARGBRow = RAWToARGBRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_RAWTOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RAWToARGBRow = RAWToARGBRow_Any_MSA;
@@ -1404,14 +1353,6 @@ int RGB565ToARGB(const uint8* src_rgb565,
}
}
#endif
-#if defined(HAS_RGB565TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_RGB565TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_MSA;
@@ -1479,14 +1420,6 @@ int ARGB1555ToARGB(const uint8* src_argb1555,
}
}
#endif
-#if defined(HAS_ARGB1555TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_DSPR2;
- if (IS_ALIGNED(width, 4)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_ARGB1555TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MSA;
@@ -1554,14 +1487,6 @@ int ARGB4444ToARGB(const uint8* src_argb4444,
}
}
#endif
-#if defined(HAS_ARGB4444TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_DSPR2;
- if (IS_ALIGNED(width, 4)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_ARGB4444TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA;
@@ -1626,14 +1551,6 @@ static int NV12ToARGBMatrix(const uint8* src_y,
}
}
#endif
-#if defined(HAS_NV12TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_NV12TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
NV12ToARGBRow = NV12ToARGBRow_Any_MSA;
@@ -1823,14 +1740,6 @@ int M420ToARGB(const uint8* src_m420,
}
}
#endif
-#if defined(HAS_NV12TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_NV12TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
NV12ToARGBRow = NV12ToARGBRow_Any_MSA;
diff --git a/source/convert_from.cc b/source/convert_from.cc
index 509fe232..5c803753 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -484,15 +484,6 @@ static int I420ToRGBAMatrix(const uint8* src_y,
}
}
#endif
-#if defined(HAS_I422TORGBAROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
- I422ToRGBARow = I422ToRGBARow_DSPR2;
- }
-#endif
#if defined(HAS_I422TORGBAROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToRGBARow = I422ToRGBARow_Any_MSA;
@@ -744,14 +735,6 @@ int I420ToARGB1555(const uint8* src_y,
}
}
#endif
-#if defined(HAS_I422TOARGB1555ROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- I422ToARGB1555Row = I422ToARGB1555Row_Any_DSPR2;
- if (IS_ALIGNED(width, 4)) {
- I422ToARGB1555Row = I422ToARGB1555Row_DSPR2;
- }
- }
-#endif
#if defined(HAS_I422TOARGB1555ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA;
@@ -825,14 +808,6 @@ int I420ToARGB4444(const uint8* src_y,
}
}
#endif
-#if defined(HAS_I422TOARGB4444ROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- I422ToARGB4444Row = I422ToARGB4444Row_Any_DSPR2;
- if (IS_ALIGNED(width, 4)) {
- I422ToARGB4444Row = I422ToARGB4444Row_DSPR2;
- }
- }
-#endif
#if defined(HAS_I422TOARGB4444ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA;
@@ -1057,14 +1032,6 @@ int I420ToRGB565Dither(const uint8* src_y,
}
}
#endif
-#if defined(HAS_I422TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
- I422ToARGBRow = I422ToARGBRow_DSPR2;
- }
-#endif
#if defined(HAS_I422TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGBRow = I422ToARGBRow_Any_MSA;
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index 4dca2405..02e12a12 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -100,14 +100,6 @@ int ARGBToI444(const uint8* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToYRow = ARGBToYRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
@@ -197,22 +189,6 @@ int ARGBToI422(const uint8* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToYRow = ARGBToYRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_DSPR2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
@@ -344,22 +320,6 @@ int ARGBToNV12(const uint8* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToYRow = ARGBToYRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_DSPR2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_MERGEUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MergeUVRow_ = MergeUVRow_Any_MSA;
@@ -495,22 +455,6 @@ int ARGBToNV21(const uint8* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToYRow = ARGBToYRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_DSPR2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_MERGEUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MergeUVRow_ = MergeUVRow_Any_MSA;
@@ -643,22 +587,6 @@ int ARGBToYUY2(const uint8* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToYRow = ARGBToYRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_DSPR2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_I422TOYUY2ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MSA;
@@ -787,22 +715,6 @@ int ARGBToUYVY(const uint8* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToYRow = ARGBToYRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_DSPR2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_I422TOUYVYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
@@ -880,14 +792,6 @@ int ARGBToI400(const uint8* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOYROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ARGBToYRow = ARGBToYRow_Any_DSPR2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
diff --git a/source/cpu_id.cc b/source/cpu_id.cc
index 344f3c06..d08fc365 100644
--- a/source/cpu_id.cc
+++ b/source/cpu_id.cc
@@ -179,7 +179,7 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
if (strcmp(ase, " msa") == 0) {
return kCpuHasMSA;
}
- return kCpuHasDSPR2;
+ return 0;
}
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
@@ -189,7 +189,7 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
if (strcmp(ase, " msa") == 0) {
return kCpuHasMSA;
}
- return kCpuHasDSPR2;
+ return 0;
}
}
}
@@ -290,16 +290,10 @@ static SAFEBUFFERS int GetCpuFlags(void) {
#endif
#if defined(__mips__) && defined(__linux__)
-#if defined(__mips_dspr2)
- cpu_info |= kCpuHasDSPR2;
-#endif
#if defined(__mips_msa)
cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
#endif
cpu_info |= kCpuHasMIPS;
- if (getenv("LIBYUV_DISABLE_DSPR2")) {
- cpu_info &= ~kCpuHasDSPR2;
- }
if (getenv("LIBYUV_DISABLE_MSA")) {
cpu_info &= ~kCpuHasMSA;
}
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index dd311d1f..e65f1788 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -70,11 +70,6 @@ void CopyPlane(const uint8* src_y,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
// Copy plane
for (y = 0; y < height; ++y) {
@@ -116,11 +111,6 @@ void CopyPlane_16(const uint16* src_y,
CopyRow = CopyRow_16_NEON;
}
#endif
-#if defined(HAS_COPYROW_16_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_16_MIPS;
- }
-#endif
// Copy plane
for (y = 0; y < height; ++y) {
@@ -311,16 +301,6 @@ void SplitUVPlane(const uint8* src_uv,
}
}
#endif
-#if defined(HAS_SPLITUVROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_u, 4) &&
- IS_ALIGNED(dst_stride_u, 4) && IS_ALIGNED(dst_v, 4) &&
- IS_ALIGNED(dst_stride_v, 4)) {
- SplitUVRow = SplitUVRow_Any_DSPR2;
- if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_SPLITUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
SplitUVRow = SplitUVRow_Any_MSA;
@@ -562,14 +542,6 @@ void MirrorPlane(const uint8* src_y,
}
}
#endif
-// TODO(fbarchard): Mirror on mips handle unaligned memory.
-#if defined(HAS_MIRRORROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_y, 4) &&
- IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(dst_y, 4) &&
- IS_ALIGNED(dst_stride_y, 4)) {
- MirrorRow = MirrorRow_DSPR2;
- }
-#endif
#if defined(HAS_MIRRORROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MirrorRow = MirrorRow_Any_MSA;
@@ -1473,15 +1445,6 @@ static int I422ToRGBAMatrix(const uint8* src_y,
}
}
#endif
-#if defined(HAS_I422TORGBAROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
- I422ToRGBARow = I422ToRGBARow_DSPR2;
- }
-#endif
#if defined(HAS_I422TORGBAROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToRGBARow = I422ToRGBARow_Any_MSA;
@@ -2534,14 +2497,6 @@ int InterpolatePlane(const uint8* src0,
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src0, 4) &&
- IS_ALIGNED(src_stride0, 4) && IS_ALIGNED(src1, 4) &&
- IS_ALIGNED(src_stride1, 4) && IS_ALIGNED(dst, 4) &&
- IS_ALIGNED(dst_stride, 4) && IS_ALIGNED(width, 4)) {
- InterpolateRow = InterpolateRow_DSPR2;
- }
-#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
diff --git a/source/rotate.cc b/source/rotate.cc
index b16af507..1f74cd07 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -57,16 +57,6 @@ void TransposePlane(const uint8* src,
}
}
#endif
-#if defined(HAS_TRANSPOSEWX8_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- if (IS_ALIGNED(width, 4) && IS_ALIGNED(src, 4) &&
- IS_ALIGNED(src_stride, 4)) {
- TransposeWx8 = TransposeWx8_Fast_DSPR2;
- } else {
- TransposeWx8 = TransposeWx8_DSPR2;
- }
- }
-#endif
#if defined(HAS_TRANSPOSEWX16_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
TransposeWx16 = TransposeWx16_Any_MSA;
@@ -168,14 +158,6 @@ void RotatePlane180(const uint8* src,
}
}
#endif
-// TODO(fbarchard): Mirror on mips handle unaligned memory.
-#if defined(HAS_MIRRORROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src, 4) &&
- IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst, 4) &&
- IS_ALIGNED(dst_stride, 4)) {
- MirrorRow = MirrorRow_DSPR2;
- }
-#endif
#if defined(HAS_MIRRORROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MirrorRow = MirrorRow_Any_MSA;
@@ -204,11 +186,6 @@ void RotatePlane180(const uint8* src,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
@@ -255,12 +232,6 @@ void TransposeUV(const uint8* src,
}
}
#endif
-#if defined(HAS_TRANSPOSEUVWX8_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) && IS_ALIGNED(src, 4) &&
- IS_ALIGNED(src_stride, 4)) {
- TransposeUVWx8 = TransposeUVWx8_DSPR2;
- }
-#endif
#if defined(HAS_TRANSPOSEUVWX16_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
TransposeUVWx16 = TransposeUVWx16_Any_MSA;
@@ -355,12 +326,6 @@ void RotateUV180(const uint8* src,
MirrorUVRow = MirrorUVRow_SSSE3;
}
#endif
-#if defined(HAS_MIRRORUVROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src, 4) &&
- IS_ALIGNED(src_stride, 4)) {
- MirrorUVRow = MirrorUVRow_DSPR2;
- }
-#endif
#if defined(HAS_MIRRORUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 32)) {
MirrorUVRow = MirrorUVRow_MSA;
diff --git a/source/rotate_any.cc b/source/rotate_any.cc
index 562096b9..eb4f7418 100644
--- a/source/rotate_any.cc
+++ b/source/rotate_any.cc
@@ -38,9 +38,6 @@ TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
#endif
-#ifdef HAS_TRANSPOSEWX8_DSPR2
-TANY(TransposeWx8_Any_DSPR2, TransposeWx8_DSPR2, 7)
-#endif
#ifdef HAS_TRANSPOSEWX16_MSA
TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15)
#endif
@@ -64,9 +61,6 @@ TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
#ifdef HAS_TRANSPOSEUVWX8_SSE2
TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
#endif
-#ifdef HAS_TRANSPOSEUVWX8_DSPR2
-TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7)
-#endif
#ifdef HAS_TRANSPOSEUVWX16_MSA
TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7)
#endif
diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc
index ede4eafa..f6a2bf69 100644
--- a/source/rotate_argb.cc
+++ b/source/rotate_argb.cc
@@ -173,11 +173,6 @@ void ARGBRotate180(const uint8* src,
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
diff --git a/source/rotate_dspr2.cc b/source/rotate_dspr2.cc
deleted file mode 100644
index 5d2338de..00000000
--- a/source/rotate_dspr2.cc
+++ /dev/null
@@ -1,475 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/rotate_row.h"
-#include "libyuv/row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \
- (__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32)
-
-void TransposeWx8_DSPR2(const uint8* src,
- int src_stride,
- uint8* dst,
- int dst_stride,
- int width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
- "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
- "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
- "addu $t3, $t2, %[src_stride] \n"
- "addu $t5, $t4, %[src_stride] \n"
- "addu $t6, $t2, $t4 \n"
- "andi $t0, %[dst], 0x3 \n"
- "andi $t1, %[dst_stride], 0x3 \n"
- "or $t0, $t0, $t1 \n"
- "bnez $t0, 11f \n"
- " subu $t7, $t9, %[src_stride] \n"
- // dst + dst_stride word aligned
- "1: \n"
- "lbu $t0, 0(%[src]) \n"
- "lbux $t1, %[src_stride](%[src]) \n"
- "lbux $t8, $t2(%[src]) \n"
- "lbux $t9, $t3(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s0, $t8, $t0 \n"
- "lbux $t0, $t4(%[src]) \n"
- "lbux $t1, $t5(%[src]) \n"
- "lbux $t8, $t6(%[src]) \n"
- "lbux $t9, $t7(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s1, $t8, $t0 \n"
- "sw $s0, 0(%[dst]) \n"
- "addiu %[width], -1 \n"
- "addiu %[src], 1 \n"
- "sw $s1, 4(%[dst]) \n"
- "bnez %[width], 1b \n"
- " addu %[dst], %[dst], %[dst_stride] \n"
- "b 2f \n"
- // dst + dst_stride unaligned
- "11: \n"
- "lbu $t0, 0(%[src]) \n"
- "lbux $t1, %[src_stride](%[src]) \n"
- "lbux $t8, $t2(%[src]) \n"
- "lbux $t9, $t3(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s0, $t8, $t0 \n"
- "lbux $t0, $t4(%[src]) \n"
- "lbux $t1, $t5(%[src]) \n"
- "lbux $t8, $t6(%[src]) \n"
- "lbux $t9, $t7(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s1, $t8, $t0 \n"
- "swr $s0, 0(%[dst]) \n"
- "swl $s0, 3(%[dst]) \n"
- "addiu %[width], -1 \n"
- "addiu %[src], 1 \n"
- "swr $s1, 4(%[dst]) \n"
- "swl $s1, 7(%[dst]) \n"
- "bnez %[width], 11b \n"
- "addu %[dst], %[dst], %[dst_stride] \n"
- "2: \n"
- ".set pop \n"
- : [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width)
- : [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1");
-}
-
-void TransposeWx8_Fast_DSPR2(const uint8* src,
- int src_stride,
- uint8* dst,
- int dst_stride,
- int width) {
- __asm__ __volatile__(
- ".set noat \n"
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
- "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
- "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
- "addu $t3, $t2, %[src_stride] \n"
- "addu $t5, $t4, %[src_stride] \n"
- "addu $t6, $t2, $t4 \n"
-
- "srl $AT, %[width], 0x2 \n"
- "andi $t0, %[dst], 0x3 \n"
- "andi $t1, %[dst_stride], 0x3 \n"
- "or $t0, $t0, $t1 \n"
- "bnez $t0, 11f \n"
- " subu $t7, $t9, %[src_stride] \n"
- // dst + dst_stride word aligned
- "1: \n"
- "lw $t0, 0(%[src]) \n"
- "lwx $t1, %[src_stride](%[src]) \n"
- "lwx $t8, $t2(%[src]) \n"
- "lwx $t9, $t3(%[src]) \n"
-
- // t0 = | 30 | 20 | 10 | 00 |
- // t1 = | 31 | 21 | 11 | 01 |
- // t8 = | 32 | 22 | 12 | 02 |
- // t9 = | 33 | 23 | 13 | 03 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 21 | 01 | 20 | 00 |
- // s1 = | 23 | 03 | 22 | 02 |
- // s2 = | 31 | 11 | 30 | 10 |
- // s3 = | 33 | 13 | 32 | 12 |
-
- "precr.qb.ph $s4, $s1, $s0 \n"
- "precrq.qb.ph $s5, $s1, $s0 \n"
- "precr.qb.ph $s6, $s3, $s2 \n"
- "precrq.qb.ph $s7, $s3, $s2 \n"
-
- // s4 = | 03 | 02 | 01 | 00 |
- // s5 = | 23 | 22 | 21 | 20 |
- // s6 = | 13 | 12 | 11 | 10 |
- // s7 = | 33 | 32 | 31 | 30 |
-
- "lwx $t0, $t4(%[src]) \n"
- "lwx $t1, $t5(%[src]) \n"
- "lwx $t8, $t6(%[src]) \n"
- "lwx $t9, $t7(%[src]) \n"
-
- // t0 = | 34 | 24 | 14 | 04 |
- // t1 = | 35 | 25 | 15 | 05 |
- // t8 = | 36 | 26 | 16 | 06 |
- // t9 = | 37 | 27 | 17 | 07 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 25 | 05 | 24 | 04 |
- // s1 = | 27 | 07 | 26 | 06 |
- // s2 = | 35 | 15 | 34 | 14 |
- // s3 = | 37 | 17 | 36 | 16 |
-
- "precr.qb.ph $t0, $s1, $s0 \n"
- "precrq.qb.ph $t1, $s1, $s0 \n"
- "precr.qb.ph $t8, $s3, $s2 \n"
- "precrq.qb.ph $t9, $s3, $s2 \n"
-
- // t0 = | 07 | 06 | 05 | 04 |
- // t1 = | 27 | 26 | 25 | 24 |
- // t8 = | 17 | 16 | 15 | 14 |
- // t9 = | 37 | 36 | 35 | 34 |
-
- "addu $s0, %[dst], %[dst_stride] \n"
- "addu $s1, $s0, %[dst_stride] \n"
- "addu $s2, $s1, %[dst_stride] \n"
-
- "sw $s4, 0(%[dst]) \n"
- "sw $t0, 4(%[dst]) \n"
- "sw $s6, 0($s0) \n"
- "sw $t8, 4($s0) \n"
- "sw $s5, 0($s1) \n"
- "sw $t1, 4($s1) \n"
- "sw $s7, 0($s2) \n"
- "sw $t9, 4($s2) \n"
-
- "addiu $AT, -1 \n"
- "addiu %[src], 4 \n"
-
- "bnez $AT, 1b \n"
- " addu %[dst], $s2, %[dst_stride] \n"
- "b 2f \n"
- // dst + dst_stride unaligned
- "11: \n"
- "lw $t0, 0(%[src]) \n"
- "lwx $t1, %[src_stride](%[src]) \n"
- "lwx $t8, $t2(%[src]) \n"
- "lwx $t9, $t3(%[src]) \n"
-
- // t0 = | 30 | 20 | 10 | 00 |
- // t1 = | 31 | 21 | 11 | 01 |
- // t8 = | 32 | 22 | 12 | 02 |
- // t9 = | 33 | 23 | 13 | 03 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 21 | 01 | 20 | 00 |
- // s1 = | 23 | 03 | 22 | 02 |
- // s2 = | 31 | 11 | 30 | 10 |
- // s3 = | 33 | 13 | 32 | 12 |
-
- "precr.qb.ph $s4, $s1, $s0 \n"
- "precrq.qb.ph $s5, $s1, $s0 \n"
- "precr.qb.ph $s6, $s3, $s2 \n"
- "precrq.qb.ph $s7, $s3, $s2 \n"
-
- // s4 = | 03 | 02 | 01 | 00 |
- // s5 = | 23 | 22 | 21 | 20 |
- // s6 = | 13 | 12 | 11 | 10 |
- // s7 = | 33 | 32 | 31 | 30 |
-
- "lwx $t0, $t4(%[src]) \n"
- "lwx $t1, $t5(%[src]) \n"
- "lwx $t8, $t6(%[src]) \n"
- "lwx $t9, $t7(%[src]) \n"
-
- // t0 = | 34 | 24 | 14 | 04 |
- // t1 = | 35 | 25 | 15 | 05 |
- // t8 = | 36 | 26 | 16 | 06 |
- // t9 = | 37 | 27 | 17 | 07 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 25 | 05 | 24 | 04 |
- // s1 = | 27 | 07 | 26 | 06 |
- // s2 = | 35 | 15 | 34 | 14 |
- // s3 = | 37 | 17 | 36 | 16 |
-
- "precr.qb.ph $t0, $s1, $s0 \n"
- "precrq.qb.ph $t1, $s1, $s0 \n"
- "precr.qb.ph $t8, $s3, $s2 \n"
- "precrq.qb.ph $t9, $s3, $s2 \n"
-
- // t0 = | 07 | 06 | 05 | 04 |
- // t1 = | 27 | 26 | 25 | 24 |
- // t8 = | 17 | 16 | 15 | 14 |
- // t9 = | 37 | 36 | 35 | 34 |
-
- "addu $s0, %[dst], %[dst_stride] \n"
- "addu $s1, $s0, %[dst_stride] \n"
- "addu $s2, $s1, %[dst_stride] \n"
-
- "swr $s4, 0(%[dst]) \n"
- "swl $s4, 3(%[dst]) \n"
- "swr $t0, 4(%[dst]) \n"
- "swl $t0, 7(%[dst]) \n"
- "swr $s6, 0($s0) \n"
- "swl $s6, 3($s0) \n"
- "swr $t8, 4($s0) \n"
- "swl $t8, 7($s0) \n"
- "swr $s5, 0($s1) \n"
- "swl $s5, 3($s1) \n"
- "swr $t1, 4($s1) \n"
- "swl $t1, 7($s1) \n"
- "swr $s7, 0($s2) \n"
- "swl $s7, 3($s2) \n"
- "swr $t9, 4($s2) \n"
- "swl $t9, 7($s2) \n"
-
- "addiu $AT, -1 \n"
- "addiu %[src], 4 \n"
-
- "bnez $AT, 11b \n"
- " addu %[dst], $s2, %[dst_stride] \n"
- "2: \n"
- ".set pop \n"
- ".set at \n"
- : [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width)
- : [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1",
- "s2", "s3", "s4", "s5", "s6", "s7");
-}
-
-void TransposeUVWx8_DSPR2(const uint8* src,
- int src_stride,
- uint8* dst_a,
- int dst_stride_a,
- uint8* dst_b,
- int dst_stride_b,
- int width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
- "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
- "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
- "addu $t3, $t2, %[src_stride] \n"
- "addu $t5, $t4, %[src_stride] \n"
- "addu $t6, $t2, $t4 \n"
- "subu $t7, $t9, %[src_stride] \n"
- "srl $t1, %[width], 1 \n"
-
- // check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
- "andi $t0, %[dst_a], 0x3 \n"
- "andi $t8, %[dst_b], 0x3 \n"
- "or $t0, $t0, $t8 \n"
- "andi $t8, %[dst_stride_a], 0x3 \n"
- "andi $s5, %[dst_stride_b], 0x3 \n"
- "or $t8, $t8, $s5 \n"
- "or $t0, $t0, $t8 \n"
- "bnez $t0, 11f \n"
- " nop \n"
- // dst + dst_stride word aligned (both, a & b dst addresses)
- "1: \n"
- "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
- "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
- "addu $s5, %[dst_a], %[dst_stride_a] \n"
- "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
- "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
- "addu $s6, %[dst_b], %[dst_stride_b] \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
-
- "sw $s3, 0($s5) \n"
- "sw $s4, 0($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
-
- "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
- "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
- "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
- "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
- "sw $s3, 0(%[dst_a]) \n"
- "sw $s4, 0(%[dst_b]) \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
- "sw $s3, 4($s5) \n"
- "sw $s4, 4($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
-
- "addiu %[src], 4 \n"
- "addiu $t1, -1 \n"
- "sll $t0, %[dst_stride_a], 1 \n"
- "sll $t8, %[dst_stride_b], 1 \n"
- "sw $s3, 4(%[dst_a]) \n"
- "sw $s4, 4(%[dst_b]) \n"
- "addu %[dst_a], %[dst_a], $t0 \n"
- "bnez $t1, 1b \n"
- " addu %[dst_b], %[dst_b], $t8 \n"
- "b 2f \n"
- " nop \n"
-
- // dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
- "11: \n"
- "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
- "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
- "addu $s5, %[dst_a], %[dst_stride_a] \n"
- "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
- "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
- "addu $s6, %[dst_b], %[dst_stride_b] \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
-
- "swr $s3, 0($s5) \n"
- "swl $s3, 3($s5) \n"
- "swr $s4, 0($s6) \n"
- "swl $s4, 3($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
-
- "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
- "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
- "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
- "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
- "swr $s3, 0(%[dst_a]) \n"
- "swl $s3, 3(%[dst_a]) \n"
- "swr $s4, 0(%[dst_b]) \n"
- "swl $s4, 3(%[dst_b]) \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
-
- "swr $s3, 4($s5) \n"
- "swl $s3, 7($s5) \n"
- "swr $s4, 4($s6) \n"
- "swl $s4, 7($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
-
- "addiu %[src], 4 \n"
- "addiu $t1, -1 \n"
- "sll $t0, %[dst_stride_a], 1 \n"
- "sll $t8, %[dst_stride_b], 1 \n"
- "swr $s3, 4(%[dst_a]) \n"
- "swl $s3, 7(%[dst_a]) \n"
- "swr $s4, 4(%[dst_b]) \n"
- "swl $s4, 7(%[dst_b]) \n"
- "addu %[dst_a], %[dst_a], $t0 \n"
- "bnez $t1, 11b \n"
- " addu %[dst_b], %[dst_b], $t8 \n"
-
- "2: \n"
- ".set pop \n"
- : [src] "+r"(src), [dst_a] "+r"(dst_a), [dst_b] "+r"(dst_b),
- [width] "+r"(width), [src_stride] "+r"(src_stride)
- : [dst_stride_a] "r"(dst_stride_a), [dst_stride_b] "r"(dst_stride_b)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1",
- "s2", "s3", "s4", "s5", "s6");
-}
-
-#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/source/row_any.cc b/source/row_any.cc
index 6d65ca7d..cc9fb50c 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -183,12 +183,6 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
#endif
-#ifdef HAS_I422TOARGBROW_DSPR2
-ANY31C(I444ToARGBRow_Any_DSPR2, I444ToARGBRow_DSPR2, 0, 0, 4, 7)
-ANY31C(I422ToARGBRow_Any_DSPR2, I422ToARGBRow_DSPR2, 1, 0, 4, 7)
-ANY31C(I422ToARGB4444Row_Any_DSPR2, I422ToARGB4444Row_DSPR2, 1, 0, 2, 7)
-ANY31C(I422ToARGB1555Row_Any_DSPR2, I422ToARGB1555Row_DSPR2, 1, 0, 2, 7)
-#endif
#ifdef HAS_I422TOARGBROW_MSA
ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
@@ -326,9 +320,6 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
#ifdef HAS_NV12TOARGBROW_NEON
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
#endif
-#ifdef HAS_NV12TOARGBROW_DSPR2
-ANY21C(NV12ToARGBRow_Any_DSPR2, NV12ToARGBRow_DSPR2, 1, 1, 2, 4, 7)
-#endif
#ifdef HAS_NV12TOARGBROW_MSA
ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7)
#endif
@@ -578,33 +569,6 @@ ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15)
#ifdef HAS_ARGB4444TOARGBROW_NEON
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
#endif
-#ifdef HAS_RGB24TOARGBROW_DSPR2
-ANY11(RGB24ToARGBRow_Any_DSPR2, RGB24ToARGBRow_DSPR2, 0, 3, 4, 7)
-#endif
-#ifdef HAS_RAWTOARGBROW_DSPR2
-ANY11(RAWToARGBRow_Any_DSPR2, RAWToARGBRow_DSPR2, 0, 3, 4, 7)
-#endif
-#ifdef HAS_RGB565TOARGBROW_DSPR2
-ANY11(RGB565ToARGBRow_Any_DSPR2, RGB565ToARGBRow_DSPR2, 0, 2, 4, 7)
-#endif
-#ifdef HAS_ARGB1555TOARGBROW_DSPR2
-ANY11(ARGB1555ToARGBRow_Any_DSPR2, ARGB1555ToARGBRow_DSPR2, 0, 2, 4, 7)
-#endif
-#ifdef HAS_ARGB4444TOARGBROW_DSPR2
-ANY11(ARGB4444ToARGBRow_Any_DSPR2, ARGB4444ToARGBRow_DSPR2, 0, 2, 4, 7)
-#endif
-#ifdef HAS_BGRATOYROW_DSPR2
-ANY11(BGRAToYRow_Any_DSPR2, BGRAToYRow_DSPR2, 0, 4, 1, 7)
-#endif
-#ifdef HAS_ARGBTOYROW_DSPR2
-ANY11(ARGBToYRow_Any_DSPR2, ARGBToYRow_DSPR2, 0, 4, 1, 7)
-#endif
-#ifdef HAS_ABGRTOYROW_DSPR2
-ANY11(ABGRToYRow_Any_DSPR2, ABGRToYRow_DSPR2, 0, 4, 1, 7)
-#endif
-#ifdef HAS_RGBATOYROW_DSPR2
-ANY11(RGBAToYRow_Any_DSPR2, RGBAToYRow_DSPR2, 0, 4, 1, 7)
-#endif
#ifdef HAS_ARGB4444TOARGBROW_MSA
ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
#endif
@@ -851,9 +815,6 @@ ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
#ifdef HAS_INTERPOLATEROW_NEON
ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
#endif
-#ifdef HAS_INTERPOLATEROW_DSPR2
-ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3)
-#endif
#ifdef HAS_INTERPOLATEROW_MSA
ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
#endif
@@ -952,9 +913,6 @@ ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
#ifdef HAS_SPLITUVROW_NEON
ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
#endif
-#ifdef HAS_SPLITUVROW_DSPR2
-ANY12(SplitUVRow_Any_DSPR2, SplitUVRow_DSPR2, 0, 2, 0, 15)
-#endif
#ifdef HAS_SPLITUVROW_MSA
ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31)
#endif
@@ -1116,18 +1074,6 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
#ifdef HAS_UYVYTOUVROW_NEON
ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
#endif
-#ifdef HAS_BGRATOUVROW_DSPR2
-ANY12S(BGRAToUVRow_Any_DSPR2, BGRAToUVRow_DSPR2, 0, 4, 15)
-#endif
-#ifdef HAS_ABGRTOUVROW_DSPR2
-ANY12S(ABGRToUVRow_Any_DSPR2, ABGRToUVRow_DSPR2, 0, 4, 15)
-#endif
-#ifdef HAS_RGBATOUVROW_DSPR2
-ANY12S(RGBAToUVRow_Any_DSPR2, RGBAToUVRow_DSPR2, 0, 4, 15)
-#endif
-#ifdef HAS_ARGBTOUVROW_DSPR2
-ANY12S(ARGBToUVRow_Any_DSPR2, ARGBToUVRow_DSPR2, 0, 4, 15)
-#endif
#ifdef HAS_YUY2TOUVROW_MSA
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
#endif
diff --git a/source/row_dspr2.cc b/source/row_dspr2.cc
deleted file mode 100644
index 11f78e0d..00000000
--- a/source/row_dspr2.cc
+++ /dev/null
@@ -1,1721 +0,0 @@
-/*
- * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips__) && \
- (_MIPS_SIM == _MIPS_SIM_ABI32)
-
-#ifdef HAS_COPYROW_MIPS
-void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
- __asm__ __volatile__(
- ".set noreorder \n"
- ".set noat \n"
- "slti $at, %[count], 8 \n"
- "bne $at ,$zero, $last8 \n"
- "xor $t8, %[src], %[dst] \n"
- "andi $t8, $t8, 0x3 \n"
-
- "bne $t8, $zero, unaligned \n"
- "negu $a3, %[dst] \n"
- // make dst/src aligned
- "andi $a3, $a3, 0x3 \n"
- "beq $a3, $zero, $chk16w \n"
- // word-aligned now count is the remining bytes count
- "subu %[count], %[count], $a3 \n"
-
- "lwr $t8, 0(%[src]) \n"
- "addu %[src], %[src], $a3 \n"
- "swr $t8, 0(%[dst]) \n"
- "addu %[dst], %[dst], $a3 \n"
-
- // Now the dst/src are mutually word-aligned with word-aligned addresses
- "$chk16w: \n"
- "andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
- // t8 is the byte count after 64-byte chunks
- "beq %[count], $t8, chk8w \n"
- // There will be at most 1 32-byte chunk after it
- "subu $a3, %[count], $t8 \n" // the reminder
- // Here a3 counts bytes in 16w chunks
- "addu $a3, %[dst], $a3 \n"
- // Now a3 is the final dst after 64-byte chunks
- "addu $t0, %[dst], %[count] \n"
- // t0 is the "past the end" address
-
- // When in the loop we exercise "pref 30,x(a1)", the a1+x should not be
- // past
- // the "t0-32" address
- // This means: for x=128 the last "safe" a1 address is "t0-160"
- // Alternatively, for x=64 the last "safe" a1 address is "t0-96"
- // we will use "pref 30,128(a1)", so "t0-160" is the limit
- "subu $t9, $t0, 160 \n"
- // t9 is the "last safe pref 30,128(a1)" address
- "pref 0, 0(%[src]) \n" // first line of src
- "pref 0, 32(%[src]) \n" // second line of src
- "pref 0, 64(%[src]) \n"
- "pref 30, 32(%[dst]) \n"
- // In case the a1 > t9 don't use "pref 30" at all
- "sltu $v1, $t9, %[dst] \n"
- "bgtz $v1, $loop16w \n"
- "nop \n"
- // otherwise, start with using pref30
- "pref 30, 64(%[dst]) \n"
- "$loop16w: \n"
- "pref 0, 96(%[src]) \n"
- "lw $t0, 0(%[src]) \n"
- "bgtz $v1, $skip_pref30_96 \n" // skip
- "lw $t1, 4(%[src]) \n"
- "pref 30, 96(%[dst]) \n" // continue
- "$skip_pref30_96: \n"
- "lw $t2, 8(%[src]) \n"
- "lw $t3, 12(%[src]) \n"
- "lw $t4, 16(%[src]) \n"
- "lw $t5, 20(%[src]) \n"
- "lw $t6, 24(%[src]) \n"
- "lw $t7, 28(%[src]) \n"
- "pref 0, 128(%[src]) \n"
- // bring the next lines of src, addr 128
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "lw $t0, 32(%[src]) \n"
- "bgtz $v1, $skip_pref30_128 \n" // skip pref 30,128(a1)
- "lw $t1, 36(%[src]) \n"
- "pref 30, 128(%[dst]) \n" // set dest, addr 128
- "$skip_pref30_128: \n"
- "lw $t2, 40(%[src]) \n"
- "lw $t3, 44(%[src]) \n"
- "lw $t4, 48(%[src]) \n"
- "lw $t5, 52(%[src]) \n"
- "lw $t6, 56(%[src]) \n"
- "lw $t7, 60(%[src]) \n"
- "pref 0, 160(%[src]) \n"
- // bring the next lines of src, addr 160
- "sw $t0, 32(%[dst]) \n"
- "sw $t1, 36(%[dst]) \n"
- "sw $t2, 40(%[dst]) \n"
- "sw $t3, 44(%[dst]) \n"
- "sw $t4, 48(%[dst]) \n"
- "sw $t5, 52(%[dst]) \n"
- "sw $t6, 56(%[dst]) \n"
- "sw $t7, 60(%[dst]) \n"
-
- "addiu %[dst], %[dst], 64 \n" // adding 64 to dest
- "sltu $v1, $t9, %[dst] \n"
- "bne %[dst], $a3, $loop16w \n"
- " addiu %[src], %[src], 64 \n" // adding 64 to src
- "move %[count], $t8 \n"
-
- // Here we have src and dest word-aligned but less than 64-bytes to go
-
- "chk8w: \n"
- "pref 0, 0x0(%[src]) \n"
- "andi $t8, %[count], 0x1f \n" // 32-byte chunk?
- // the t8 is the reminder count past 32-bytes
- "beq %[count], $t8, chk1w \n"
- // count=t8,no 32-byte chunk
- " nop \n"
-
- "lw $t0, 0(%[src]) \n"
- "lw $t1, 4(%[src]) \n"
- "lw $t2, 8(%[src]) \n"
- "lw $t3, 12(%[src]) \n"
- "lw $t4, 16(%[src]) \n"
- "lw $t5, 20(%[src]) \n"
- "lw $t6, 24(%[src]) \n"
- "lw $t7, 28(%[src]) \n"
- "addiu %[src], %[src], 32 \n"
-
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "addiu %[dst], %[dst], 32 \n"
-
- "chk1w: \n"
- "andi %[count], $t8, 0x3 \n"
- // now count is the reminder past 1w chunks
- "beq %[count], $t8, $last8 \n"
- " subu $a3, $t8, %[count] \n"
- // a3 is count of bytes in 1w chunks
- "addu $a3, %[dst], $a3 \n"
- // now a3 is the dst address past the 1w chunks
- // copying in words (4-byte chunks)
- "$wordCopy_loop: \n"
- "lw $t3, 0(%[src]) \n"
- // the first t3 may be equal t0 ... optimize?
- "addiu %[src], %[src],4 \n"
- "addiu %[dst], %[dst],4 \n"
- "bne %[dst], $a3,$wordCopy_loop \n"
- " sw $t3, -4(%[dst]) \n"
-
- // For the last (<8) bytes
- "$last8: \n"
- "blez %[count], leave \n"
- " addu $a3, %[dst], %[count] \n" // a3 -last dst address
- "$last8loop: \n"
- "lb $v1, 0(%[src]) \n"
- "addiu %[src], %[src], 1 \n"
- "addiu %[dst], %[dst], 1 \n"
- "bne %[dst], $a3, $last8loop \n"
- " sb $v1, -1(%[dst]) \n"
-
- "leave: \n"
- " j $ra \n"
- " nop \n"
-
- //
- // UNALIGNED case
- //
-
- "unaligned: \n"
- // got here with a3="negu a1"
- "andi $a3, $a3, 0x3 \n" // a1 is word aligned?
- "beqz $a3, $ua_chk16w \n"
- " subu %[count], %[count], $a3 \n"
- // bytes left after initial a3 bytes
- "lwr $v1, 0(%[src]) \n"
- "lwl $v1, 3(%[src]) \n"
- "addu %[src], %[src], $a3 \n" // a3 may be 1, 2 or 3
- "swr $v1, 0(%[dst]) \n"
- "addu %[dst], %[dst], $a3 \n"
- // below the dst will be word aligned (NOTE1)
- "$ua_chk16w: \n"
- "andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
- // t8 is the byte count after 64-byte chunks
- "beq %[count], $t8, ua_chk8w \n"
- // if a2==t8, no 64-byte chunks
- // There will be at most 1 32-byte chunk after it
- "subu $a3, %[count], $t8 \n" // the reminder
- // Here a3 counts bytes in 16w chunks
- "addu $a3, %[dst], $a3 \n"
- // Now a3 is the final dst after 64-byte chunks
- "addu $t0, %[dst], %[count] \n" // t0 "past the end"
- "subu $t9, $t0, 160 \n"
- // t9 is the "last safe pref 30,128(a1)" address
- "pref 0, 0(%[src]) \n" // first line of src
- "pref 0, 32(%[src]) \n" // second line addr 32
- "pref 0, 64(%[src]) \n"
- "pref 30, 32(%[dst]) \n"
- // safe, as we have at least 64 bytes ahead
- // In case the a1 > t9 don't use "pref 30" at all
- "sltu $v1, $t9, %[dst] \n"
- "bgtz $v1, $ua_loop16w \n"
- // skip "pref 30,64(a1)" for too short arrays
- " nop \n"
- // otherwise, start with using pref30
- "pref 30, 64(%[dst]) \n"
- "$ua_loop16w: \n"
- "pref 0, 96(%[src]) \n"
- "lwr $t0, 0(%[src]) \n"
- "lwl $t0, 3(%[src]) \n"
- "lwr $t1, 4(%[src]) \n"
- "bgtz $v1, $ua_skip_pref30_96 \n"
- " lwl $t1, 7(%[src]) \n"
- "pref 30, 96(%[dst]) \n"
- // continue setting up the dest, addr 96
- "$ua_skip_pref30_96: \n"
- "lwr $t2, 8(%[src]) \n"
- "lwl $t2, 11(%[src]) \n"
- "lwr $t3, 12(%[src]) \n"
- "lwl $t3, 15(%[src]) \n"
- "lwr $t4, 16(%[src]) \n"
- "lwl $t4, 19(%[src]) \n"
- "lwr $t5, 20(%[src]) \n"
- "lwl $t5, 23(%[src]) \n"
- "lwr $t6, 24(%[src]) \n"
- "lwl $t6, 27(%[src]) \n"
- "lwr $t7, 28(%[src]) \n"
- "lwl $t7, 31(%[src]) \n"
- "pref 0, 128(%[src]) \n"
- // bring the next lines of src, addr 128
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "lwr $t0, 32(%[src]) \n"
- "lwl $t0, 35(%[src]) \n"
- "lwr $t1, 36(%[src]) \n"
- "bgtz $v1, ua_skip_pref30_128 \n"
- " lwl $t1, 39(%[src]) \n"
- "pref 30, 128(%[dst]) \n"
- // continue setting up the dest, addr 128
- "ua_skip_pref30_128: \n"
-
- "lwr $t2, 40(%[src]) \n"
- "lwl $t2, 43(%[src]) \n"
- "lwr $t3, 44(%[src]) \n"
- "lwl $t3, 47(%[src]) \n"
- "lwr $t4, 48(%[src]) \n"
- "lwl $t4, 51(%[src]) \n"
- "lwr $t5, 52(%[src]) \n"
- "lwl $t5, 55(%[src]) \n"
- "lwr $t6, 56(%[src]) \n"
- "lwl $t6, 59(%[src]) \n"
- "lwr $t7, 60(%[src]) \n"
- "lwl $t7, 63(%[src]) \n"
- "pref 0, 160(%[src]) \n"
- // bring the next lines of src, addr 160
- "sw $t0, 32(%[dst]) \n"
- "sw $t1, 36(%[dst]) \n"
- "sw $t2, 40(%[dst]) \n"
- "sw $t3, 44(%[dst]) \n"
- "sw $t4, 48(%[dst]) \n"
- "sw $t5, 52(%[dst]) \n"
- "sw $t6, 56(%[dst]) \n"
- "sw $t7, 60(%[dst]) \n"
-
- "addiu %[dst],%[dst],64 \n" // adding 64 to dest
- "sltu $v1,$t9,%[dst] \n"
- "bne %[dst],$a3,$ua_loop16w \n"
- " addiu %[src],%[src],64 \n" // adding 64 to src
- "move %[count],$t8 \n"
-
- // Here we have src and dest word-aligned but less than 64-bytes to go
-
- "ua_chk8w: \n"
- "pref 0, 0x0(%[src]) \n"
- "andi $t8, %[count], 0x1f \n" // 32-byte chunk?
- // the t8 is the reminder count
- "beq %[count], $t8, $ua_chk1w \n"
- // when count==t8, no 32-byte chunk
-
- "lwr $t0, 0(%[src]) \n"
- "lwl $t0, 3(%[src]) \n"
- "lwr $t1, 4(%[src]) \n"
- "lwl $t1, 7(%[src]) \n"
- "lwr $t2, 8(%[src]) \n"
- "lwl $t2, 11(%[src]) \n"
- "lwr $t3, 12(%[src]) \n"
- "lwl $t3, 15(%[src]) \n"
- "lwr $t4, 16(%[src]) \n"
- "lwl $t4, 19(%[src]) \n"
- "lwr $t5, 20(%[src]) \n"
- "lwl $t5, 23(%[src]) \n"
- "lwr $t6, 24(%[src]) \n"
- "lwl $t6, 27(%[src]) \n"
- "lwr $t7, 28(%[src]) \n"
- "lwl $t7, 31(%[src]) \n"
- "addiu %[src], %[src], 32 \n"
-
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "addiu %[dst], %[dst], 32 \n"
-
- "$ua_chk1w: \n"
- "andi %[count], $t8, 0x3 \n"
- // now count is the reminder past 1w chunks
- "beq %[count], $t8, ua_smallCopy \n"
- "subu $a3, $t8, %[count] \n"
- // a3 is count of bytes in 1w chunks
- "addu $a3, %[dst], $a3 \n"
- // now a3 is the dst address past the 1w chunks
-
- // copying in words (4-byte chunks)
- "$ua_wordCopy_loop: \n"
- "lwr $v1, 0(%[src]) \n"
- "lwl $v1, 3(%[src]) \n"
- "addiu %[src], %[src], 4 \n"
- "addiu %[dst], %[dst], 4 \n"
- // note: dst=a1 is word aligned here, see NOTE1
- "bne %[dst], $a3, $ua_wordCopy_loop \n"
- " sw $v1,-4(%[dst]) \n"
-
- // Now less than 4 bytes (value in count) left to copy
- "ua_smallCopy: \n"
- "beqz %[count], leave \n"
- " addu $a3, %[dst], %[count] \n" // a3 = last dst address
- "$ua_smallCopy_loop: \n"
- "lb $v1, 0(%[src]) \n"
- "addiu %[src], %[src], 1 \n"
- "addiu %[dst], %[dst], 1 \n"
- "bne %[dst],$a3,$ua_smallCopy_loop \n"
- " sb $v1, -1(%[dst]) \n"
-
- "j $ra \n"
- " nop \n"
- ".set at \n"
- ".set reorder \n"
- : [dst] "+r"(dst), [src] "+r"(src)
- : [count] "r"(count)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "a3", "v1",
- "at");
-}
-#endif // HAS_COPYROW_MIPS
-
-// DSPR2 functions
-#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \
- (__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32) && \
- (__mips_isa_rev < 6)
-
-void SplitUVRow_DSPR2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
- int width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "srl $t4, %[width], 4 \n" // multiplies of 16
- "blez $t4, 2f \n"
- " andi %[width], %[width], 0xf \n" // residual
-
- "1: \n"
- "addiu $t4, $t4, -1 \n"
- "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
- "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2
- "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4
- "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6
- "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8
- "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 |
- // U10
- "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 |
- // U12
- "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 |
- // U14
- "addiu %[src_uv], %[src_uv], 32 \n"
- "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
- "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
- "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
- "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
- "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
- "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
- "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 |
- // V12
- "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 |
- // U12
- "sw $t9, 0(%[dst_v]) \n"
- "sw $t0, 0(%[dst_u]) \n"
- "sw $t1, 4(%[dst_v]) \n"
- "sw $t2, 4(%[dst_u]) \n"
- "sw $t3, 8(%[dst_v]) \n"
- "sw $t5, 8(%[dst_u]) \n"
- "sw $t6, 12(%[dst_v]) \n"
- "sw $t7, 12(%[dst_u]) \n"
- "addiu %[dst_v], %[dst_v], 16 \n"
- "bgtz $t4, 1b \n"
- " addiu %[dst_u], %[dst_u], 16 \n"
-
- "beqz %[width], 3f \n"
- " nop \n"
-
- "2: \n"
- "lbu $t0, 0(%[src_uv]) \n"
- "lbu $t1, 1(%[src_uv]) \n"
- "addiu %[src_uv], %[src_uv], 2 \n"
- "addiu %[width], %[width], -1 \n"
- "sb $t0, 0(%[dst_u]) \n"
- "sb $t1, 0(%[dst_v]) \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "bgtz %[width], 2b \n"
- " addiu %[dst_v], %[dst_v], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_uv] "+r"(src_uv), [width] "+r"(width), [dst_u] "+r"(dst_u),
- [dst_v] "+r"(dst_v)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
-}
-
-void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t4, %[width], 4 \n" // multiplies of 16
- "andi $t5, %[width], 0xf \n"
- "blez $t4, 2f \n"
- " addu %[src], %[src], %[width] \n" // src += width
-
- "1: \n"
- "lw $t0, -16(%[src]) \n" // |3|2|1|0|
- "lw $t1, -12(%[src]) \n" // |7|6|5|4|
- "lw $t2, -8(%[src]) \n" // |11|10|9|8|
- "lw $t3, -4(%[src]) \n" // |15|14|13|12|
- "wsbh $t0, $t0 \n" // |2|3|0|1|
- "wsbh $t1, $t1 \n" // |6|7|4|5|
- "wsbh $t2, $t2 \n" // |10|11|8|9|
- "wsbh $t3, $t3 \n" // |14|15|12|13|
- "rotr $t0, $t0, 16 \n" // |0|1|2|3|
- "rotr $t1, $t1, 16 \n" // |4|5|6|7|
- "rotr $t2, $t2, 16 \n" // |8|9|10|11|
- "rotr $t3, $t3, 16 \n" // |12|13|14|15|
- "addiu %[src], %[src], -16 \n"
- "addiu $t4, $t4, -1 \n"
- "sw $t3, 0(%[dst]) \n" // |15|14|13|12|
- "sw $t2, 4(%[dst]) \n" // |11|10|9|8|
- "sw $t1, 8(%[dst]) \n" // |7|6|5|4|
- "sw $t0, 12(%[dst]) \n" // |3|2|1|0|
- "bgtz $t4, 1b \n"
- " addiu %[dst], %[dst], 16 \n"
- "beqz $t5, 3f \n"
- " nop \n"
-
- "2: \n"
- "lbu $t0, -1(%[src]) \n"
- "addiu $t5, $t5, -1 \n"
- "addiu %[src], %[src], -1 \n"
- "sb $t0, 0(%[dst]) \n"
- "bgez $t5, 2b \n"
- " addiu %[dst], %[dst], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src] "+r"(src), [dst] "+r"(dst)
- : [width] "r"(width)
- : "t0", "t1", "t2", "t3", "t4", "t5");
-}
-
-void MirrorUVRow_DSPR2(const uint8* src_uv,
- uint8* dst_u,
- uint8* dst_v,
- int width) {
- int x;
- int y;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "addu $t4, %[width], %[width] \n"
- "srl %[x], %[width], 4 \n"
- "andi %[y], %[width], 0xf \n"
- "blez %[x], 2f \n"
- " addu %[src_uv], %[src_uv], $t4 \n"
-
- "1: \n"
- "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
- "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
- "lw $t2, -24(%[src_uv]) \n" // |11|10|9|8|
- "lw $t3, -20(%[src_uv]) \n" // |15|14|13|12|
- "lw $t4, -16(%[src_uv]) \n" // |19|18|17|16|
- "lw $t6, -12(%[src_uv]) \n" // |23|22|21|20|
- "lw $t7, -8(%[src_uv]) \n" // |27|26|25|24|
- "lw $t8, -4(%[src_uv]) \n" // |31|30|29|28|
-
- "rotr $t0, $t0, 16 \n" // |1|0|3|2|
- "rotr $t1, $t1, 16 \n" // |5|4|7|6|
- "rotr $t2, $t2, 16 \n" // |9|8|11|10|
- "rotr $t3, $t3, 16 \n" // |13|12|15|14|
- "rotr $t4, $t4, 16 \n" // |17|16|19|18|
- "rotr $t6, $t6, 16 \n" // |21|20|23|22|
- "rotr $t7, $t7, 16 \n" // |25|24|27|26|
- "rotr $t8, $t8, 16 \n" // |29|28|31|30|
- "precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6|
- "precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7|
- "precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14|
- "precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15|
- "precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22|
- "precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23|
- "precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30|
- "precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31|
- "addiu %[src_uv], %[src_uv], -32 \n"
- "addiu %[x], %[x], -1 \n"
- "swr $t4, 0(%[dst_u]) \n"
- "swl $t4, 3(%[dst_u]) \n" // |30|28|26|24|
- "swr $t6, 0(%[dst_v]) \n"
- "swl $t6, 3(%[dst_v]) \n" // |31|29|27|25|
- "swr $t2, 4(%[dst_u]) \n"
- "swl $t2, 7(%[dst_u]) \n" // |22|20|18|16|
- "swr $t3, 4(%[dst_v]) \n"
- "swl $t3, 7(%[dst_v]) \n" // |23|21|19|17|
- "swr $t0, 8(%[dst_u]) \n"
- "swl $t0, 11(%[dst_u]) \n" // |14|12|10|8|
- "swr $t1, 8(%[dst_v]) \n"
- "swl $t1, 11(%[dst_v]) \n" // |15|13|11|9|
- "swr $t9, 12(%[dst_u]) \n"
- "swl $t9, 15(%[dst_u]) \n" // |6|4|2|0|
- "swr $t5, 12(%[dst_v]) \n"
- "swl $t5, 15(%[dst_v]) \n" // |7|5|3|1|
- "addiu %[dst_v], %[dst_v], 16 \n"
- "bgtz %[x], 1b \n"
- " addiu %[dst_u], %[dst_u], 16 \n"
- "beqz %[y], 3f \n"
- " nop \n"
- "b 2f \n"
- " nop \n"
-
- "2: \n"
- "lbu $t0, -2(%[src_uv]) \n"
- "lbu $t1, -1(%[src_uv]) \n"
- "addiu %[src_uv], %[src_uv], -2 \n"
- "addiu %[y], %[y], -1 \n"
- "sb $t0, 0(%[dst_u]) \n"
- "sb $t1, 0(%[dst_v]) \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "bgtz %[y], 2b \n"
- " addiu %[dst_v], %[dst_v], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_uv] "+r"(src_uv), [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v),
- [x] "=&r"(x), [y] "=&r"(y)
- : [width] "r"(width)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t7", "t8", "t9");
-}
-
-void I422ToARGBRow_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- int x;
- uint32 tmp_ub = yuvconstants->kUVToB[0];
- uint32 tmp_ug = yuvconstants->kUVToG[0];
- uint32 tmp_vg = yuvconstants->kUVToG[1];
- uint32 tmp_vr = yuvconstants->kUVToR[1];
- uint32 tmp_bb = yuvconstants->kUVBiasB[0];
- uint32 tmp_bg = yuvconstants->kUVBiasG[0];
- uint32 tmp_br = yuvconstants->kUVBiasR[0];
- uint32 yg = yuvconstants->kYToRgb[0];
- uint32 tmp_yg;
- uint32 tmp_mask = 0x7fff7fff;
- tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
- tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
- tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
- tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
- tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
- tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
- tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
- tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
- yg = yg * 0x0101;
-
- for (x = 0; x < width - 1; x += 2) {
- uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lbu %[tmp_t7], 0(%[src_y]) \n"
- "lbu %[tmp_t1], 1(%[src_y]) \n"
- "mul %[tmp_t7], %[tmp_t7], %[yg] \n"
- "mul %[tmp_t1], %[tmp_t1], %[yg] \n"
- "lbu %[tmp_t2], 0(%[src_u]) \n"
- "lbu %[tmp_t3], 0(%[src_v]) \n"
- "replv.ph %[tmp_t2], %[tmp_t2] \n"
- "replv.ph %[tmp_t3], %[tmp_t3] \n"
- "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
- "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
- "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
- "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
- "srl %[tmp_t7], %[tmp_t7], 16 \n"
- "ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
- "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
- "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
- "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
- "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
- "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
- "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
- "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
- "shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
- "shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
- "shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
- "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
- "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
- "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
- "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
- "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
- "precrq.ph.w %[tmp_t9], %[tmp_t8], %[tmp_t7] \n"
- "ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
- "precr.qb.ph %[tmp_t8], %[tmp_t9], %[tmp_t7] \n"
- "precrq.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
- "sw %[tmp_t8], 0(%[rgb_buf]) \n"
- "sw %[tmp_t7], 4(%[rgb_buf]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
- : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
- [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [yg] "r"(yg),
- [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb),
- [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg),
- [rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask));
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 4 pixels.
- }
-}
-
-// Bilinear filter 8x2 -> 8x1
-void InterpolateRow_DSPR2(uint8* dst_ptr,
- const uint8* src_ptr,
- ptrdiff_t src_stride,
- int dst_width,
- int source_y_fraction) {
- int y0_fraction = 256 - source_y_fraction;
- const uint8* src_ptr1 = src_ptr + src_stride;
-
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "replv.ph $t0, %[y0_fraction] \n"
- "replv.ph $t1, %[source_y_fraction] \n"
-
- "1: \n"
- "lw $t2, 0(%[src_ptr]) \n"
- "lw $t3, 0(%[src_ptr1]) \n"
- "lw $t4, 4(%[src_ptr]) \n"
- "lw $t5, 4(%[src_ptr1]) \n"
- "muleu_s.ph.qbl $t6, $t2, $t0 \n"
- "muleu_s.ph.qbr $t7, $t2, $t0 \n"
- "muleu_s.ph.qbl $t8, $t3, $t1 \n"
- "muleu_s.ph.qbr $t9, $t3, $t1 \n"
- "muleu_s.ph.qbl $t2, $t4, $t0 \n"
- "muleu_s.ph.qbr $t3, $t4, $t0 \n"
- "muleu_s.ph.qbl $t4, $t5, $t1 \n"
- "muleu_s.ph.qbr $t5, $t5, $t1 \n"
- "addq.ph $t6, $t6, $t8 \n"
- "addq.ph $t7, $t7, $t9 \n"
- "addq.ph $t2, $t2, $t4 \n"
- "addq.ph $t3, $t3, $t5 \n"
- "shra_r.ph $t6, $t6, 8 \n"
- "shra_r.ph $t7, $t7, 8 \n"
- "shra_r.ph $t2, $t2, 8 \n"
- "shra_r.ph $t3, $t3, 8 \n"
- "precr.qb.ph $t6, $t6, $t7 \n"
- "precr.qb.ph $t2, $t2, $t3 \n"
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[src_ptr1], %[src_ptr1], 8 \n"
- "addiu %[dst_width], %[dst_width], -8 \n"
- "sw $t6, 0(%[dst_ptr]) \n"
- "sw $t2, 4(%[dst_ptr]) \n"
- "bgtz %[dst_width], 1b \n"
- " addiu %[dst_ptr], %[dst_ptr], 8 \n"
-
- ".set pop \n"
- : [dst_ptr] "+r"(dst_ptr), [src_ptr1] "+r"(src_ptr1),
- [src_ptr] "+r"(src_ptr), [dst_width] "+r"(dst_width)
- : [source_y_fraction] "r"(source_y_fraction),
- [y0_fraction] "r"(y0_fraction), [src_stride] "r"(src_stride)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
-}
-#include <stdio.h>
-void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width) {
- int x;
- uint32 tmp_mask = 0xff;
- uint32 tmp_t1;
- for (x = 0; x < (width - 1); ++x) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "ulw %[tmp_t1], 0(%[src_rgb24]) \n"
- "addiu %[dst_argb], %[dst_argb], 4 \n"
- "addiu %[src_rgb24], %[src_rgb24], 3 \n"
- "ins %[tmp_t1], %[tmp_mask], 24, 8 \n"
- "sw %[tmp_t1], -4(%[dst_argb]) \n"
- ".set pop \n"
- : [src_rgb24] "+r"(src_rgb24), [dst_argb] "+r"(dst_argb),
- [tmp_t1] "=&r"(tmp_t1)
- : [tmp_mask] "r"(tmp_mask)
- : "memory");
- }
- uint8 b = src_rgb24[0];
- uint8 g = src_rgb24[1];
- uint8 r = src_rgb24[2];
- dst_argb[0] = b;
- dst_argb[1] = g;
- dst_argb[2] = r;
- dst_argb[3] = 255u;
-}
-
-void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width) {
- int x;
- uint32 tmp_mask = 0xff;
- uint32 tmp_t1, tmp_t2;
- for (x = 0; x < (width - 1); ++x) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "ulw %[tmp_t1], 0(%[src_raw]) \n"
- "addiu %[dst_argb], %[dst_argb], 4 \n"
- "addiu %[src_raw], %[src_raw], 3 \n"
- "srl %[tmp_t2], %[tmp_t1], 16 \n"
- "ins %[tmp_t1], %[tmp_mask], 24, 8 \n"
- "ins %[tmp_t1], %[tmp_t1], 16, 8 \n"
- "ins %[tmp_t1], %[tmp_t2], 0, 8 \n"
- "sw %[tmp_t1], -4(%[dst_argb]) \n"
- ".set pop \n"
- : [src_raw] "+r"(src_raw), [dst_argb] "+r"(dst_argb),
- [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2)
- : [tmp_mask] "r"(tmp_mask)
- : "memory");
- }
- uint8 r = src_raw[0];
- uint8 g = src_raw[1];
- uint8 b = src_raw[2];
- dst_argb[0] = b;
- dst_argb[1] = g;
- dst_argb[2] = r;
- dst_argb[3] = 255u;
-}
-
-void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565,
- uint8* dst_argb,
- int width) {
- int x;
- uint32 tmp_mask = 0xff;
- uint32 tmp_t1, tmp_t2, tmp_t3;
- for (x = 0; x < width; ++x) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lhu %[tmp_t1], 0(%[src_rgb565]) \n"
- "addiu %[dst_argb], %[dst_argb], 4 \n"
- "addiu %[src_rgb565], %[src_rgb565], 2 \n"
- "sll %[tmp_t2], %[tmp_t1], 8 \n"
- "ins %[tmp_t2], %[tmp_mask], 24,8 \n"
- "ins %[tmp_t2], %[tmp_t1], 3, 16 \n"
- "ins %[tmp_t2], %[tmp_t1], 5, 11 \n"
- "srl %[tmp_t3], %[tmp_t1], 9 \n"
- "ins %[tmp_t2], %[tmp_t3], 8, 2 \n"
- "ins %[tmp_t2], %[tmp_t1], 3, 5 \n"
- "srl %[tmp_t3], %[tmp_t1], 2 \n"
- "ins %[tmp_t2], %[tmp_t3], 0, 3 \n"
- "sw %[tmp_t2], -4(%[dst_argb]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [src_rgb565] "+r"(src_rgb565),
- [dst_argb] "+r"(dst_argb)
- : [tmp_mask] "r"(tmp_mask));
- }
-}
-
-void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555,
- uint8* dst_argb,
- int width) {
- int x;
- uint32 tmp_t1, tmp_t2, tmp_t3;
- for (x = 0; x < width; ++x) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lh %[tmp_t1], 0(%[src_argb1555]) \n"
- "addiu %[dst_argb], %[dst_argb], 4 \n"
- "addiu %[src_argb1555], %[src_argb1555], 2 \n"
- "sll %[tmp_t2], %[tmp_t1], 9 \n"
- "ins %[tmp_t2], %[tmp_t1], 4, 15 \n"
- "ins %[tmp_t2], %[tmp_t1], 6, 10 \n"
- "srl %[tmp_t3], %[tmp_t1], 7 \n"
- "ins %[tmp_t2], %[tmp_t3], 8, 3 \n"
- "ins %[tmp_t2], %[tmp_t1], 3, 5 \n"
- "srl %[tmp_t3], %[tmp_t1], 2 \n"
- "ins %[tmp_t2], %[tmp_t3], 0, 3 \n"
- "sw %[tmp_t2], -4(%[dst_argb]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [src_argb1555] "+r"(src_argb1555),
- [dst_argb] "+r"(dst_argb)
- :);
- }
-}
-
-void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444,
- uint8* dst_argb,
- int width) {
- int x;
- uint32 tmp_t1;
- for (x = 0; x < width; ++x) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lh %[tmp_t1], 0(%[src_argb4444]) \n"
- "addiu %[dst_argb], %[dst_argb], 4 \n"
- "addiu %[src_argb4444], %[src_argb4444], 2 \n"
- "ins %[tmp_t1], %[tmp_t1], 16, 16 \n"
- "ins %[tmp_t1], %[tmp_t1], 12, 16 \n"
- "ins %[tmp_t1], %[tmp_t1], 8, 12 \n"
- "ins %[tmp_t1], %[tmp_t1], 4, 8 \n"
- "sw %[tmp_t1], -4(%[dst_argb]) \n"
- ".set pop \n"
- : [src_argb4444] "+r"(src_argb4444), [dst_argb] "+r"(dst_argb),
- [tmp_t1] "=&r"(tmp_t1));
- }
-}
-
-void I444ToARGBRow_DSPR2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- int x;
- uint32 tmp_ub = yuvconstants->kUVToB[0];
- uint32 tmp_ug = yuvconstants->kUVToG[0];
- uint32 tmp_vg = yuvconstants->kUVToG[1];
- uint32 tmp_vr = yuvconstants->kUVToR[1];
- uint32 tmp_bb = yuvconstants->kUVBiasB[0];
- uint32 tmp_bg = yuvconstants->kUVBiasG[0];
- uint32 tmp_br = yuvconstants->kUVBiasR[0];
- uint32 yg = yuvconstants->kYToRgb[0];
- uint32 tmp_mask = 0x7fff7fff;
- uint32 tmp_yg;
-
- tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
- tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
- tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
- tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
- tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
- tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
- tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
- tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
- yg = yg * 0x0101;
-
- for (x = 0; x < width - 1; x += 2) {
- uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lbu %[tmp_t7], 0(%[y_buf]) \n"
- "lbu %[tmp_t1], 1(%[y_buf]) \n"
- "mul %[tmp_t7], %[tmp_t7], %[yg] \n"
- "mul %[tmp_t1], %[tmp_t1], %[yg] \n"
- "lh %[tmp_t2], 0(%[u_buf]) \n"
- "lh %[tmp_t3], 0(%[v_buf]) \n"
- "preceu.ph.qbr %[tmp_t2], %[tmp_t2] \n"
- "preceu.ph.qbr %[tmp_t3], %[tmp_t3] \n"
- "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
- "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
- "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
- "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
- "srl %[tmp_t7], %[tmp_t7], 16 \n"
- "ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
- "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
- "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
- "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
- "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
- "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
- "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
- "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
- "shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
- "shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
- "shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
- "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
- "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
- "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
- "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
- "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
- "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
- "ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
- "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
- "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
- "sw %[tmp_t8], 0(%[rgb_buf]) \n"
- "sw %[tmp_t7], 4(%[rgb_buf]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
- : [y_buf] "r"(y_buf), [yg] "r"(yg), [u_buf] "r"(u_buf),
- [v_buf] "r"(v_buf), [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug),
- [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb),
- [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg),
- [rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask));
- y_buf += 2;
- u_buf += 2;
- v_buf += 2;
- rgb_buf += 8; // Advance 1 pixel.
- }
-}
-
-void I422ToARGB4444Row_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- const struct YuvConstants* yuvconstants,
- int width) {
- int x;
- uint32 tmp_ub = yuvconstants->kUVToB[0];
- uint32 tmp_ug = yuvconstants->kUVToG[0];
- uint32 tmp_vg = yuvconstants->kUVToG[1];
- uint32 tmp_vr = yuvconstants->kUVToR[1];
- uint32 tmp_bb = yuvconstants->kUVBiasB[0];
- uint32 tmp_bg = yuvconstants->kUVBiasG[0];
- uint32 tmp_br = yuvconstants->kUVBiasR[0];
- uint32 yg = yuvconstants->kYToRgb[0];
- uint32 tmp_yg;
- uint32 tmp_mask = 0x7fff7fff;
- tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
- tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
- tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
- tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
- tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
- tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
- tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
- tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
- yg = yg * 0x0101;
-
- for (x = 0; x < width - 1; x += 2) {
- uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lbu %[tmp_t7], 0(%[src_y]) \n"
- "lbu %[tmp_t1], 1(%[src_y]) \n"
- "mul %[tmp_t7], %[tmp_t7], %[yg] \n"
- "mul %[tmp_t1], %[tmp_t1], %[yg] \n"
- "lbu %[tmp_t2], 0(%[src_u]) \n"
- "lbu %[tmp_t3], 0(%[src_v]) \n"
- "replv.ph %[tmp_t2], %[tmp_t2] \n"
- "replv.ph %[tmp_t3], %[tmp_t3] \n"
- "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
- "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
- "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
- "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
- "srl %[tmp_t7], %[tmp_t7], 16 \n"
- "ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
- "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
- "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
- "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
- "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
- "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
- "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
- "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
- "shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
- "shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
- "shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
- "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
- "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
- "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
- "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
- "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
- "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
- "ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
- "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
- "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
- "shrl.qb %[tmp_t1], %[tmp_t8], 4 \n"
- "shrl.qb %[tmp_t2], %[tmp_t7], 4 \n"
- "shrl.ph %[tmp_t8], %[tmp_t1], 4 \n"
- "shrl.ph %[tmp_t7], %[tmp_t2], 4 \n"
- "or %[tmp_t8], %[tmp_t8], %[tmp_t1] \n"
- "or %[tmp_t7], %[tmp_t7], %[tmp_t2] \n"
- "precr.qb.ph %[tmp_t8], %[tmp_t7], %[tmp_t8] \n"
- "sw %[tmp_t8], 0(%[dst_argb4444]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
- : [dst_argb4444] "r"(dst_argb4444), [yg] "r"(yg), [src_u] "r"(src_u),
- [src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub),
- [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr),
- [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br),
- [tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask));
- src_y += 2;
- src_u += 1;
- src_v += 1;
- dst_argb4444 += 4; // Advance 2 pixels.
- }
-}
-
-void I422ToARGB1555Row_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- const struct YuvConstants* yuvconstants,
- int width) {
- int x;
- uint32 tmp_ub = yuvconstants->kUVToB[0];
- uint32 tmp_ug = yuvconstants->kUVToG[0];
- uint32 tmp_vg = yuvconstants->kUVToG[1];
- uint32 tmp_vr = yuvconstants->kUVToR[1];
- uint32 tmp_bb = yuvconstants->kUVBiasB[0];
- uint32 tmp_bg = yuvconstants->kUVBiasG[0];
- uint32 tmp_br = yuvconstants->kUVBiasR[0];
- uint32 yg = yuvconstants->kYToRgb[0];
- uint32 tmp_yg;
- uint32 tmp_mask = 0x80008000;
- tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
- tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
- tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
- tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
- tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
- tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
- tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
- tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
- yg = yg * 0x0101;
-
- for (x = 0; x < width - 1; x += 2) {
- uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lbu %[tmp_t7], 0(%[src_y]) \n"
- "lbu %[tmp_t1], 1(%[src_y]) \n"
- "mul %[tmp_t7], %[tmp_t7], %[yg] \n"
- "mul %[tmp_t1], %[tmp_t1], %[yg] \n"
- "lbu %[tmp_t2], 0(%[src_u]) \n"
- "lbu %[tmp_t3], 0(%[src_v]) \n"
- "replv.ph %[tmp_t2], %[tmp_t2] \n"
- "replv.ph %[tmp_t3], %[tmp_t3] \n"
- "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
- "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
- "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
- "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
- "srl %[tmp_t7], %[tmp_t7], 16 \n"
- "ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
- "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
- "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
- "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
- "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
- "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
- "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
- "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
- "shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
- "shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
- "shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
- "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
- "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
- "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
- "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
- "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
- "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
- "ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
- "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
- "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
- "ins %[tmp_t3], %[tmp_t8], 7, 24 \n"
- "ins %[tmp_t3], %[tmp_t8], 10, 16 \n"
- "ins %[tmp_t3], %[tmp_t8], 13, 8 \n"
- "ins %[tmp_t4], %[tmp_t7], 7, 24 \n"
- "ins %[tmp_t4], %[tmp_t7], 10, 16 \n"
- "ins %[tmp_t4], %[tmp_t7], 13, 8 \n"
- "precrq.ph.w %[tmp_t8], %[tmp_t4], %[tmp_t3] \n"
- "or %[tmp_t8], %[tmp_t8], %[tmp_mask]\n"
- "sw %[tmp_t8], 0(%[dst_argb1555]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
- : [dst_argb1555] "r"(dst_argb1555), [yg] "r"(yg), [src_u] "r"(src_u),
- [src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub),
- [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr),
- [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br),
- [tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask));
- src_y += 2;
- src_u += 1;
- src_v += 1;
- dst_argb1555 += 4; // Advance 2 pixels.
- }
-}
-
-void NV12ToARGBRow_DSPR2(const uint8* src_y,
- const uint8* src_uv,
- uint8* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- int x;
- uint32 tmp_ub = yuvconstants->kUVToB[0];
- uint32 tmp_ug = yuvconstants->kUVToG[0];
- uint32 tmp_vg = yuvconstants->kUVToG[1];
- uint32 tmp_vr = yuvconstants->kUVToR[1];
- uint32 tmp_bb = yuvconstants->kUVBiasB[0];
- uint32 tmp_bg = yuvconstants->kUVBiasG[0];
- uint32 tmp_br = yuvconstants->kUVBiasR[0];
- uint32 yg = yuvconstants->kYToRgb[0];
- uint32 tmp_mask = 0x7fff7fff;
- uint32 tmp_yg;
- tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
- tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
- tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
- tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
- tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
- tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
- tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
- tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
- yg = yg * 0x0101;
-
- for (x = 0; x < width - 1; x += 2) {
- uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lbu %[tmp_t7], 0(%[src_y]) \n"
- "lbu %[tmp_t1], 1(%[src_y]) \n"
- "mul %[tmp_t7], %[tmp_t7], %[yg] \n"
- "mul %[tmp_t1], %[tmp_t1], %[yg] \n"
- "lbu %[tmp_t2], 0(%[src_uv]) \n"
- "lbu %[tmp_t3], 1(%[src_uv]) \n"
- "replv.ph %[tmp_t2], %[tmp_t2] \n"
- "replv.ph %[tmp_t3], %[tmp_t3] \n"
- "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
- "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
- "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
- "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
- "srl %[tmp_t7], %[tmp_t7], 16 \n"
- "ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
- "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
- "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
- "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
- "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
- "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
- "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
- "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
- "shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
- "shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
- "shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
- "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
- "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
- "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
- "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
- "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
- "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
- "ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
- "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
- "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
- "sw %[tmp_t8], 0(%[rgb_buf]) \n"
- "sw %[tmp_t7], 4(%[rgb_buf]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
- : [src_y] "r"(src_y), [src_uv] "r"(src_uv), [yg] "r"(yg),
- [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg),
- [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg),
- [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), [rgb_buf] "r"(rgb_buf),
- [tmp_mask] "r"(tmp_mask));
-
- src_y += 2;
- src_uv += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
-}
-
-void BGRAToUVRow_DSPR2(const uint8* src_rgb0,
- int src_stride_rgb,
- uint8* dst_u,
- uint8* dst_v,
- int width) {
- const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
- int x;
- int const1 = 0xffda0000;
- int const2 = 0x0070ffb6;
- int const3 = 0x00700000;
- int const4 = 0xffeeffa2;
- int const5 = 0x100;
- for (x = 0; x < width - 1; x += 2) {
- int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- int tmp_t6, tmp_t7, tmp_t8;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lw %[tmp_t1], 0(%[src_rgb0]) \n"
- "lw %[tmp_t2], 4(%[src_rgb0]) \n"
- "lw %[tmp_t3], 0(%[src_rgb1]) \n"
- "lw %[tmp_t4], 4(%[src_rgb1]) \n"
- "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
- "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
- "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
- "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
- "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
- "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
- "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
- "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
- "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
- "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
- "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
- "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
- "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
- "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
- "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
- "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
- "mult $ac0, %[const5], %[const5] \n"
- "mult $ac1, %[const5], %[const5] \n"
- "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
- "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
- "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
- "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
- "extr_r.w %[tmp_t7], $ac0, 9 \n"
- "extr_r.w %[tmp_t8], $ac1, 9 \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "addiu %[dst_v], %[dst_v], 1 \n"
- "addiu %[src_rgb0], %[src_rgb0], 8 \n"
- "addiu %[src_rgb1], %[src_rgb1], 8 \n"
- "sb %[tmp_t7], -1(%[dst_u]) \n"
- "sb %[tmp_t8], -1(%[dst_v]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
- [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
- [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
- : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
- [const4] "r"(const4), [const5] "r"(const5)
- : "hi", "lo", "$ac1lo", "$ac1hi");
- }
-}
-
-void BGRAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
- int x;
- int const1 = 0x00420000;
- int const2 = 0x00190081;
- int const5 = 0x40;
- for (x = 0; x < width; x += 4) {
- int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- int tmp_t6, tmp_t7, tmp_t8;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lw %[tmp_t1], 0(%[src_argb0]) \n"
- "lw %[tmp_t2], 4(%[src_argb0]) \n"
- "lw %[tmp_t3], 8(%[src_argb0]) \n"
- "lw %[tmp_t4], 12(%[src_argb0]) \n"
- "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
- "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
- "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
- "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
- "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
- "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
- "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
- "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
- "mult $ac0, %[const5], %[const5] \n"
- "mult $ac1, %[const5], %[const5] \n"
- "mult $ac2, %[const5], %[const5] \n"
- "mult $ac3, %[const5], %[const5] \n"
- "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
- "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
- "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
- "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
- "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
- "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
- "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
- "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
- "extr_r.w %[tmp_t1], $ac0, 8 \n"
- "extr_r.w %[tmp_t2], $ac1, 8 \n"
- "extr_r.w %[tmp_t3], $ac2, 8 \n"
- "extr_r.w %[tmp_t4], $ac3, 8 \n"
- "addiu %[src_argb0],%[src_argb0], 16 \n"
- "addiu %[dst_y], %[dst_y], 4 \n"
- "sb %[tmp_t1], -4(%[dst_y]) \n"
- "sb %[tmp_t2], -3(%[dst_y]) \n"
- "sb %[tmp_t3], -2(%[dst_y]) \n"
- "sb %[tmp_t4], -1(%[dst_y]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
- [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
- : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
- : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
- "$ac3hi");
- }
-}
-
-void ABGRToUVRow_DSPR2(const uint8* src_rgb0,
- int src_stride_rgb,
- uint8* dst_u,
- uint8* dst_v,
- int width) {
- const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
- int x;
- int const1 = 0xffb6ffda;
- int const2 = 0x00000070;
- int const3 = 0xffa20070;
- int const4 = 0x0000ffee;
- int const5 = 0x100;
-
- for (x = 0; x < width - 1; x += 2) {
- int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- int tmp_t6, tmp_t7, tmp_t8;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lw %[tmp_t1], 0(%[src_rgb0]) \n"
- "lw %[tmp_t2], 4(%[src_rgb0]) \n"
- "lw %[tmp_t3], 0(%[src_rgb1]) \n"
- "lw %[tmp_t4], 4(%[src_rgb1]) \n"
- "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
- "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
- "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
- "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
- "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
- "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
- "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
- "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
- "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
- "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
- "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
- "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
- "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
- "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
- "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
- "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
- "mult $ac0, %[const5], %[const5] \n"
- "mult $ac1, %[const5], %[const5] \n"
- "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
- "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
- "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
- "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
- "extr_r.w %[tmp_t7], $ac0, 9 \n"
- "extr_r.w %[tmp_t8], $ac1, 9 \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "addiu %[dst_v], %[dst_v], 1 \n"
- "addiu %[src_rgb0], %[src_rgb0], 8 \n"
- "addiu %[src_rgb1], %[src_rgb1], 8 \n"
- "sb %[tmp_t7], -1(%[dst_u]) \n"
- "sb %[tmp_t8], -1(%[dst_v]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
- [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
- [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
- : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
- [const4] "r"(const4), [const5] "r"(const5)
- : "hi", "lo", "$ac1lo", "$ac1hi");
- }
-}
-
-void ARGBToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
- int x;
- int const1 = 0x00810019;
- int const2 = 0x00000042;
- int const5 = 0x40;
- for (x = 0; x < width; x += 4) {
- int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- int tmp_t6, tmp_t7, tmp_t8;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lw %[tmp_t1], 0(%[src_argb0]) \n"
- "lw %[tmp_t2], 4(%[src_argb0]) \n"
- "lw %[tmp_t3], 8(%[src_argb0]) \n"
- "lw %[tmp_t4], 12(%[src_argb0]) \n"
- "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
- "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
- "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
- "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
- "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
- "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
- "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
- "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
- "mult $ac0, %[const5], %[const5] \n"
- "mult $ac1, %[const5], %[const5] \n"
- "mult $ac2, %[const5], %[const5] \n"
- "mult $ac3, %[const5], %[const5] \n"
- "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
- "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
- "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
- "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
- "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
- "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
- "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
- "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
- "extr_r.w %[tmp_t1], $ac0, 8 \n"
- "extr_r.w %[tmp_t2], $ac1, 8 \n"
- "extr_r.w %[tmp_t3], $ac2, 8 \n"
- "extr_r.w %[tmp_t4], $ac3, 8 \n"
- "addiu %[dst_y], %[dst_y], 4 \n"
- "addiu %[src_argb0],%[src_argb0], 16 \n"
- "sb %[tmp_t1], -4(%[dst_y]) \n"
- "sb %[tmp_t2], -3(%[dst_y]) \n"
- "sb %[tmp_t3], -2(%[dst_y]) \n"
- "sb %[tmp_t4], -1(%[dst_y]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
- [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
- : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
- : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
- "$ac3hi");
- }
-}
-
-void ABGRToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
- int x;
- int const1 = 0x00810042;
- int const2 = 0x00000019;
- int const5 = 0x40;
- for (x = 0; x < width; x += 4) {
- int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- int tmp_t6, tmp_t7, tmp_t8;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lw %[tmp_t1], 0(%[src_argb0]) \n"
- "lw %[tmp_t2], 4(%[src_argb0]) \n"
- "lw %[tmp_t3], 8(%[src_argb0]) \n"
- "lw %[tmp_t4], 12(%[src_argb0]) \n"
- "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
- "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
- "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
- "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
- "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
- "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
- "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
- "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
- "mult $ac0, %[const5], %[const5] \n"
- "mult $ac1, %[const5], %[const5] \n"
- "mult $ac2, %[const5], %[const5] \n"
- "mult $ac3, %[const5], %[const5] \n"
- "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
- "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
- "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
- "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
- "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
- "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
- "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
- "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
- "extr_r.w %[tmp_t1], $ac0, 8 \n"
- "extr_r.w %[tmp_t2], $ac1, 8 \n"
- "extr_r.w %[tmp_t3], $ac2, 8 \n"
- "extr_r.w %[tmp_t4], $ac3, 8 \n"
- "addiu %[src_argb0],%[src_argb0], 16 \n"
- "addiu %[dst_y], %[dst_y], 4 \n"
- "sb %[tmp_t1], -4(%[dst_y]) \n"
- "sb %[tmp_t2], -3(%[dst_y]) \n"
- "sb %[tmp_t3], -2(%[dst_y]) \n"
- "sb %[tmp_t4], -1(%[dst_y]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
- [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
- : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
- : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
- "$ac3hi");
- }
-}
-
-void RGBAToUVRow_DSPR2(const uint8* src_rgb0,
- int src_stride_rgb,
- uint8* dst_u,
- uint8* dst_v,
- int width) {
- const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
- int x;
- int const1 = 0xffb60070;
- int const2 = 0x0000ffda;
- int const3 = 0xffa2ffee;
- int const4 = 0x00000070;
- int const5 = 0x100;
-
- for (x = 0; x < width - 1; x += 2) {
- int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- int tmp_t6, tmp_t7, tmp_t8;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "ulw %[tmp_t1], 0+1(%[src_rgb0]) \n"
- "ulw %[tmp_t2], 4+1(%[src_rgb0]) \n"
- "ulw %[tmp_t3], 0+1(%[src_rgb1]) \n"
- "ulw %[tmp_t4], 4+1(%[src_rgb1]) \n"
- "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
- "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
- "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
- "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
- "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
- "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
- "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
- "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
- "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
- "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
- "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
- "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
- "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
- "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
- "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
- "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
- "mult $ac0, %[const5], %[const5] \n"
- "mult $ac1, %[const5], %[const5] \n"
- "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
- "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
- "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
- "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
- "extr_r.w %[tmp_t7], $ac0, 9 \n"
- "extr_r.w %[tmp_t8], $ac1, 9 \n"
- "addiu %[src_rgb0], %[src_rgb0], 8 \n"
- "addiu %[src_rgb1], %[src_rgb1], 8 \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "addiu %[dst_v], %[dst_v], 1 \n"
- "sb %[tmp_t7], -1(%[dst_u]) \n"
- "sb %[tmp_t8], -1(%[dst_v]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
- [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
- [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
- : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
- [const4] "r"(const4), [const5] "r"(const5)
- : "hi", "lo", "$ac1lo", "$ac1hi");
- }
-}
-
-void RGBAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
- int x;
- int const1 = 0x00420081;
- int const2 = 0x00190000;
- int const5 = 0x40;
- for (x = 0; x < width; x += 4) {
- int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- int tmp_t6, tmp_t7, tmp_t8;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lw %[tmp_t1], 0(%[src_argb0]) \n"
- "lw %[tmp_t2], 4(%[src_argb0]) \n"
- "lw %[tmp_t3], 8(%[src_argb0]) \n"
- "lw %[tmp_t4], 12(%[src_argb0]) \n"
- "preceu.ph.qbl %[tmp_t5], %[tmp_t1] \n"
- "preceu.ph.qbr %[tmp_t1], %[tmp_t1] \n"
- "preceu.ph.qbl %[tmp_t6], %[tmp_t2] \n"
- "preceu.ph.qbr %[tmp_t2], %[tmp_t2] \n"
- "preceu.ph.qbl %[tmp_t7], %[tmp_t3] \n"
- "preceu.ph.qbr %[tmp_t3], %[tmp_t3] \n"
- "preceu.ph.qbl %[tmp_t8], %[tmp_t4] \n"
- "preceu.ph.qbr %[tmp_t4], %[tmp_t4] \n"
- "mult $ac0, %[const5], %[const5] \n"
- "mult $ac1, %[const5], %[const5] \n"
- "mult $ac2, %[const5], %[const5] \n"
- "mult $ac3, %[const5], %[const5] \n"
- "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
- "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
- "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
- "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
- "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
- "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
- "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
- "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
- "extr_r.w %[tmp_t1], $ac0, 8 \n"
- "extr_r.w %[tmp_t2], $ac1, 8 \n"
- "extr_r.w %[tmp_t3], $ac2, 8 \n"
- "extr_r.w %[tmp_t4], $ac3, 8 \n"
- "addiu %[dst_y], %[dst_y], 4 \n"
- "addiu %[src_argb0],%[src_argb0], 16 \n"
- "sb %[tmp_t1], -4(%[dst_y]) \n"
- "sb %[tmp_t2], -3(%[dst_y]) \n"
- "sb %[tmp_t3], -2(%[dst_y]) \n"
- "sb %[tmp_t4], -1(%[dst_y]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
- [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
- : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
- : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
- "$ac3hi");
- }
-}
-
-void ARGBToUVRow_DSPR2(const uint8* src_rgb0,
- int src_stride_rgb,
- uint8* dst_u,
- uint8* dst_v,
- int width) {
- const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
- int x;
- int const1 = 0xffb60070;
- int const2 = 0x0000ffda;
- int const3 = 0xffa2ffee;
- int const4 = 0x00000070;
- int const5 = 0x100;
-
- for (x = 0; x < width - 1; x += 2) {
- int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
- int tmp_t6, tmp_t7, tmp_t8;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lw %[tmp_t1], 0(%[src_rgb0]) \n"
- "lw %[tmp_t2], 4(%[src_rgb0]) \n"
- "lw %[tmp_t3], 0(%[src_rgb1]) \n"
- "lw %[tmp_t4], 4(%[src_rgb1]) \n"
- "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
- "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
- "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
- "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
- "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
- "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
- "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
- "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
- "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
- "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
- "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
- "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
- "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
- "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
- "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
- "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
- "mult $ac0, %[const5], %[const5] \n"
- "mult $ac1, %[const5], %[const5] \n"
- "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
- "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
- "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
- "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
- "extr_r.w %[tmp_t7], $ac0, 9 \n"
- "extr_r.w %[tmp_t8], $ac1, 9 \n"
- "addiu %[src_rgb0], %[src_rgb0], 8 \n"
- "addiu %[src_rgb1], %[src_rgb1], 8 \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "addiu %[dst_v], %[dst_v], 1 \n"
- "sb %[tmp_t7], -1(%[dst_u]) \n"
- "sb %[tmp_t8], -1(%[dst_v]) \n"
- ".set pop \n"
- : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
- [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
- [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
- [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
- [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
- : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
- [const4] "r"(const4), [const5] "r"(const5)
- : "hi", "lo", "$ac1lo", "$ac1hi");
- }
-}
-
-#endif // __mips_dsp_rev >= 2
-
-#endif // defined(__mips__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/source/scale.cc b/source/scale.cc
index 9104acb9..6951d8fb 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -103,13 +103,6 @@ static void ScalePlaneDown2(int src_width,
}
}
#endif
-#if defined(HAS_SCALEROWDOWN2_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
- IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- ScaleRowDown2 = filtering ? ScaleRowDown2Box_DSPR2 : ScaleRowDown2_DSPR2;
- }
-#endif
#if defined(HAS_SCALEROWDOWN2_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleRowDown2 =
@@ -176,14 +169,6 @@ static void ScalePlaneDown2_16(int src_width,
: ScaleRowDown2Box_16_SSE2);
}
#endif
-#if defined(HAS_SCALEROWDOWN2_16_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
- IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- ScaleRowDown2 =
- filtering ? ScaleRowDown2Box_16_DSPR2 : ScaleRowDown2_16_DSPR2;
- }
-#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@@ -247,13 +232,6 @@ static void ScalePlaneDown4(int src_width,
}
}
#endif
-#if defined(HAS_SCALEROWDOWN4_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_DSPR2 : ScaleRowDown4_DSPR2;
- }
-#endif
#if defined(HAS_SCALEROWDOWN4_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleRowDown4 =
@@ -306,14 +284,6 @@ static void ScalePlaneDown4_16(int src_width,
filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
}
#endif
-#if defined(HAS_SCALEROWDOWN4_16_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- ScaleRowDown4 =
- filtering ? ScaleRowDown4Box_16_DSPR2 : ScaleRowDown4_16_DSPR2;
- }
-#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@@ -411,19 +381,6 @@ static void ScalePlaneDown34(int src_width,
}
}
#endif
-#if defined(HAS_SCALEROWDOWN34_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_DSPR2;
- ScaleRowDown34_1 = ScaleRowDown34_DSPR2;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_DSPR2;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_DSPR2;
- }
- }
-#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
@@ -495,19 +452,6 @@ static void ScalePlaneDown34_16(int src_width,
}
}
#endif
-#if defined(HAS_SCALEROWDOWN34_16_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_16_DSPR2;
- ScaleRowDown34_1 = ScaleRowDown34_16_DSPR2;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_DSPR2;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_DSPR2;
- }
- }
-#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
@@ -612,19 +556,6 @@ static void ScalePlaneDown38(int src_width,
}
}
#endif
-#if defined(HAS_SCALEROWDOWN38_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_DSPR2;
- ScaleRowDown38_2 = ScaleRowDown38_DSPR2;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_DSPR2;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_DSPR2;
- }
- }
-#endif
#if defined(HAS_SCALEROWDOWN38_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
if (!filtering) {
@@ -716,19 +647,6 @@ static void ScalePlaneDown38_16(int src_width,
}
}
#endif
-#if defined(HAS_SCALEROWDOWN38_16_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_16_DSPR2;
- ScaleRowDown38_2 = ScaleRowDown38_16_DSPR2;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_DSPR2;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_DSPR2;
- }
- }
-#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
@@ -931,14 +849,6 @@ static void ScalePlaneBox(int src_width,
}
}
#endif
-#if defined(HAS_SCALEADDROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- ScaleAddRow = ScaleAddRow_Any_DSPR2;
- if (IS_ALIGNED(src_width, 16)) {
- ScaleAddRow = ScaleAddRow_DSPR2;
- }
- }
-#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
@@ -1070,14 +980,6 @@ void ScalePlaneBilinearDown(int src_width,
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- InterpolateRow = InterpolateRow_Any_DSPR2;
- if (IS_ALIGNED(src_width, 4)) {
- InterpolateRow = InterpolateRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@@ -1193,14 +1095,6 @@ void ScalePlaneBilinearDown_16(int src_width,
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_16_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- InterpolateRow = InterpolateRow_Any_16_DSPR2;
- if (IS_ALIGNED(src_width, 4)) {
- InterpolateRow = InterpolateRow_16_DSPR2;
- }
- }
-#endif
#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
@@ -1281,14 +1175,6 @@ void ScalePlaneBilinearUp(int src_width,
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- InterpolateRow = InterpolateRow_Any_DSPR2;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_DSPR2;
- }
- }
-#endif
if (filtering && src_width >= 32768) {
ScaleFilterCols = ScaleFilterCols64_C;
@@ -1432,14 +1318,6 @@ void ScalePlaneBilinearUp_16(int src_width,
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_16_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2)) {
- InterpolateRow = InterpolateRow_Any_16_DSPR2;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_16_DSPR2;
- }
- }
-#endif
if (filtering && src_width >= 32768) {
ScaleFilterCols = ScaleFilterCols64_16_C;
diff --git a/source/scale_any.cc b/source/scale_any.cc
index c4d6626a..8604c233 100644
--- a/source/scale_any.cc
+++ b/source/scale_any.cc
@@ -456,9 +456,6 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
#ifdef HAS_SCALEADDROW_MSA
SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
#endif
-#ifdef HAS_SCALEADDROW_DSPR2
-SAANY(ScaleAddRow_Any_DSPR2, ScaleAddRow_DSPR2, ScaleAddRow_C, 15)
-#endif
#undef SAANY
#ifdef __cplusplus
diff --git a/source/scale_argb.cc b/source/scale_argb.cc
index c3ec7d6b..cd4683b3 100644
--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -306,15 +306,6 @@ static void ScaleARGBBilinearDown(int src_width,
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
- IS_ALIGNED(src_stride, 4)) {
- InterpolateRow = InterpolateRow_Any_DSPR2;
- if (IS_ALIGNED(clip_src_width, 4)) {
- InterpolateRow = InterpolateRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@@ -419,12 +410,6 @@ static void ScaleARGBBilinearUp(int src_width,
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
- IS_ALIGNED(dst_stride, 4)) {
- InterpolateRow = InterpolateRow_DSPR2;
- }
-#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@@ -587,15 +572,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
-#if defined(HAS_I422TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- I422ToARGBRow = I422ToARGBRow_DSPR2;
- }
-#endif
#if defined(HAS_I422TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGBRow = I422ToARGBRow_Any_MSA;
@@ -632,12 +608,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
- IS_ALIGNED(dst_stride_argb, 4)) {
- InterpolateRow = InterpolateRow_DSPR2;
- }
-#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
diff --git a/source/scale_common.cc b/source/scale_common.cc
index fefb027b..e060c3cb 100644
--- a/source/scale_common.cc
+++ b/source/scale_common.cc
@@ -1063,16 +1063,6 @@ void ScalePlaneVertical(int src_height,
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
- IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
- IS_ALIGNED(dst_stride, 4)) {
- InterpolateRow = InterpolateRow_Any_DSPR2;
- if (IS_ALIGNED(dst_width_bytes, 4)) {
- InterpolateRow = InterpolateRow_DSPR2;
- }
- }
-#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@@ -1151,16 +1141,6 @@ void ScalePlaneVertical_16(int src_height,
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_16_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
- IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
- IS_ALIGNED(dst_stride, 4)) {
- InterpolateRow = InterpolateRow_Any_16_DSPR2;
- if (IS_ALIGNED(dst_width_bytes, 4)) {
- InterpolateRow = InterpolateRow_16_DSPR2;
- }
- }
-#endif
for (j = 0; j < dst_height; ++j) {
int yi;
int yf;
diff --git a/source/scale_dspr2.cc b/source/scale_dspr2.cc
deleted file mode 100644
index ddedcbf4..00000000
--- a/source/scale_dspr2.cc
+++ /dev/null
@@ -1,668 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC MIPS DSPR2
-#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \
- (__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32)
-
-void ScaleRowDown2_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 4 \n" // iterations -> by 16
- "beqz $t9, 2f \n"
- " nop \n"
-
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
- // TODO(fbarchard): Use odd pixels instead of even.
- "precrq.qb.ph $t8, $t1, $t0 \n" // |7|5|3|1|
- "precrq.qb.ph $t0, $t3, $t2 \n" // |15|13|11|9|
- "precrq.qb.ph $t1, $t5, $t4 \n" // |23|21|19|17|
- "precrq.qb.ph $t2, $t7, $t6 \n" // |31|29|27|25|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "addiu $t9, $t9, -1 \n"
- "sw $t8, 0(%[dst]) \n"
- "sw $t0, 4(%[dst]) \n"
- "sw $t1, 8(%[dst]) \n"
- "sw $t2, 12(%[dst]) \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 16 \n"
-
- "2: \n"
- "andi $t9, %[dst_width], 0xf \n" // residue
- "beqz $t9, 3f \n"
- " nop \n"
-
- "21: \n"
- "lbu $t0, 1(%[src_ptr]) \n"
- "addiu %[src_ptr], %[src_ptr], 2 \n"
- "addiu $t9, $t9, -1 \n"
- "sb $t0, 0(%[dst]) \n"
- "bgtz $t9, 21b \n"
- " addiu %[dst], %[dst], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst)
- : [dst_width] "r"(dst_width)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
-}
-
-void ScaleRowDown2Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width) {
- const uint8* t = src_ptr + src_stride;
-
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 3 \n" // iterations -> step 8
- "bltz $t9, 2f \n"
- " nop \n"
-
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t4, 0(%[t]) \n" // |19|18|17|16|
- "lw $t5, 4(%[t]) \n" // |23|22|21|20|
- "lw $t6, 8(%[t]) \n" // |27|26|25|24|
- "lw $t7, 12(%[t]) \n" // |31|30|29|28|
- "addiu $t9, $t9, -1 \n"
- "srl $t8, $t0, 16 \n" // |X|X|3|2|
- "ins $t0, $t4, 16, 16 \n" // |17|16|1|0|
- "ins $t4, $t8, 0, 16 \n" // |19|18|3|2|
- "raddu.w.qb $t0, $t0 \n" // |17+16+1+0|
- "raddu.w.qb $t4, $t4 \n" // |19+18+3+2|
- "shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2
- "shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2
- "srl $t8, $t1, 16 \n" // |X|X|7|6|
- "ins $t1, $t5, 16, 16 \n" // |21|20|5|4|
- "ins $t5, $t8, 0, 16 \n" // |22|23|7|6|
- "raddu.w.qb $t1, $t1 \n" // |21+20+5+4|
- "raddu.w.qb $t5, $t5 \n" // |23+22+7+6|
- "shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2
- "shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2
- "srl $t8, $t2, 16 \n" // |X|X|11|10|
- "ins $t2, $t6, 16, 16 \n" // |25|24|9|8|
- "ins $t6, $t8, 0, 16 \n" // |27|26|11|10|
- "raddu.w.qb $t2, $t2 \n" // |25+24+9+8|
- "raddu.w.qb $t6, $t6 \n" // |27+26+11+10|
- "shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2
- "shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2
- "srl $t8, $t3, 16 \n" // |X|X|15|14|
- "ins $t3, $t7, 16, 16 \n" // |29|28|13|12|
- "ins $t7, $t8, 0, 16 \n" // |31|30|15|14|
- "raddu.w.qb $t3, $t3 \n" // |29+28+13+12|
- "raddu.w.qb $t7, $t7 \n" // |31+30+15+14|
- "shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2
- "shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2
- "addiu %[src_ptr], %[src_ptr], 16 \n"
- "addiu %[t], %[t], 16 \n"
- "sb $t0, 0(%[dst]) \n"
- "sb $t4, 1(%[dst]) \n"
- "sb $t1, 2(%[dst]) \n"
- "sb $t5, 3(%[dst]) \n"
- "sb $t2, 4(%[dst]) \n"
- "sb $t6, 5(%[dst]) \n"
- "sb $t3, 6(%[dst]) \n"
- "sb $t7, 7(%[dst]) \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 8 \n"
-
- "2: \n"
- "andi $t9, %[dst_width], 0x7 \n" // x = residue
- "beqz $t9, 3f \n"
- " nop \n"
-
- "21: \n"
- "lwr $t1, 0(%[src_ptr]) \n"
- "lwl $t1, 3(%[src_ptr]) \n"
- "lwr $t2, 0(%[t]) \n"
- "lwl $t2, 3(%[t]) \n"
- "srl $t8, $t1, 16 \n"
- "ins $t1, $t2, 16, 16 \n"
- "ins $t2, $t8, 0, 16 \n"
- "raddu.w.qb $t1, $t1 \n"
- "raddu.w.qb $t2, $t2 \n"
- "shra_r.w $t1, $t1, 2 \n"
- "shra_r.w $t2, $t2, 2 \n"
- "sb $t1, 0(%[dst]) \n"
- "sb $t2, 1(%[dst]) \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "addiu $t9, $t9, -2 \n"
- "addiu %[t], %[t], 4 \n"
- "bgtz $t9, 21b \n"
- " addiu %[dst], %[dst], 2 \n"
-
- "3: \n"
- ".set pop \n"
-
- : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [t] "+r"(t)
- : [dst_width] "r"(dst_width)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
-}
-
-void ScaleRowDown4_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 3 \n"
- "beqz $t9, 2f \n"
- " nop \n"
-
- "1: \n"
- "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
- "precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0|
- "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
- "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
- "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
- "precrq.qb.ph $t1, $t2, $t1 \n" // |14|10|6|2|
- "precrq.qb.ph $t5, $t6, $t5 \n" // |30|26|22|18|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "addiu $t9, $t9, -1 \n"
- "sw $t1, 0(%[dst]) \n"
- "sw $t5, 4(%[dst]) \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 8 \n"
-
- "2: \n"
- "andi $t9, %[dst_width], 7 \n" // residue
- "beqz $t9, 3f \n"
- " nop \n"
-
- "21: \n"
- "lbu $t1, 2(%[src_ptr]) \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "addiu $t9, $t9, -1 \n"
- "sb $t1, 0(%[dst]) \n"
- "bgtz $t9, 21b \n"
- " addiu %[dst], %[dst], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst)
- : [dst_width] "r"(dst_width)
- : "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
-}
-
-void ScaleRowDown4Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width) {
- intptr_t stride = src_stride;
- const uint8* s1 = src_ptr + stride;
- const uint8* s2 = s1 + stride;
- const uint8* s3 = s2 + stride;
-
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 1 \n"
- "andi $t8, %[dst_width], 1 \n"
-
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 0(%[s1]) \n" // |7|6|5|4|
- "lw $t2, 0(%[s2]) \n" // |11|10|9|8|
- "lw $t3, 0(%[s3]) \n" // |15|14|13|12|
- "lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t5, 4(%[s1]) \n" // |23|22|21|20|
- "lw $t6, 4(%[s2]) \n" // |27|26|25|24|
- "lw $t7, 4(%[s3]) \n" // |31|30|29|28|
- "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
- "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
- "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
- "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
- "raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16|
- "raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20|
- "raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24|
- "raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28|
- "add $t0, $t0, $t1 \n"
- "add $t1, $t2, $t3 \n"
- "add $t0, $t0, $t1 \n"
- "add $t4, $t4, $t5 \n"
- "add $t6, $t6, $t7 \n"
- "add $t4, $t4, $t6 \n"
- "shra_r.w $t0, $t0, 4 \n"
- "shra_r.w $t4, $t4, 4 \n"
- "sb $t0, 0(%[dst]) \n"
- "sb $t4, 1(%[dst]) \n"
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[s1], %[s1], 8 \n"
- "addiu %[s2], %[s2], 8 \n"
- "addiu %[s3], %[s3], 8 \n"
- "addiu $t9, $t9, -1 \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 2 \n"
- "beqz $t8, 2f \n"
- " nop \n"
-
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 0(%[s1]) \n" // |7|6|5|4|
- "lw $t2, 0(%[s2]) \n" // |11|10|9|8|
- "lw $t3, 0(%[s3]) \n" // |15|14|13|12|
- "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
- "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
- "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
- "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
- "add $t0, $t0, $t1 \n"
- "add $t1, $t2, $t3 \n"
- "add $t0, $t0, $t1 \n"
- "shra_r.w $t0, $t0, 4 \n"
- "sb $t0, 0(%[dst]) \n"
-
- "2: \n"
- ".set pop \n"
-
- : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [s1] "+r"(s1), [s2] "+r"(s2),
- [s3] "+r"(s3)
- : [dst_width] "r"(dst_width)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
-}
-
-void ScaleRowDown34_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "1: \n"
- "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
- "precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13|
- "precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30|
- "addiu %[dst_width], %[dst_width], -24 \n"
- "ins $t1, $t1, 8, 16 \n" // |3|1|0|X|
- "ins $t4, $t0, 8, 16 \n" // |X|15|13|12|
- "ins $t5, $t5, 8, 16 \n" // |19|17|16|X|
- "ins $t8, $t9, 8, 16 \n" // |X|31|29|28|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5|
- "packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21|
- "prepend $t1, $t2, 8 \n" // |4|3|1|0|
- "prepend $t3, $t4, 24 \n" // |15|13|12|11|
- "prepend $t5, $t6, 8 \n" // |20|19|17|16|
- "prepend $t7, $t8, 24 \n" // |31|29|28|27|
- "sw $t1, 0(%[dst]) \n"
- "sw $t0, 4(%[dst]) \n"
- "sw $t3, 8(%[dst]) \n"
- "sw $t5, 12(%[dst]) \n"
- "sw $t9, 16(%[dst]) \n"
- "sw $t7, 20(%[dst]) \n"
- "bnez %[dst_width], 1b \n"
- " addiu %[dst], %[dst], 24 \n"
- ".set pop \n"
- : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [dst_width] "+r"(dst_width)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
-}
-
-void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* d,
- int dst_width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "repl.ph $t3, 3 \n" // 0x00030003
-
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
- "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1|
- "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
- "muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3|
- "muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3|
- "andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1|
- "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
- "raddu.w.qb $t0, $t0 \n"
- "raddu.w.qb $t1, $t1 \n"
- "shra_r.w $t0, $t0, 1 \n"
- "shra_r.w $t1, $t1, 1 \n"
- "preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1|
- "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
- "rotr $t2, $t2, 16 \n" // |0|S1|0|S2|
- "rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
- "addu.ph $t2, $t2, $t4 \n"
- "addu.ph $t6, $t6, $t5 \n"
- "sll $t5, $t0, 1 \n"
- "add $t0, $t5, $t0 \n"
- "shra_r.ph $t2, $t2, 2 \n"
- "shra_r.ph $t6, $t6, 2 \n"
- "shll.ph $t4, $t2, 1 \n"
- "addq.ph $t4, $t4, $t2 \n"
- "addu $t0, $t0, $t1 \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "shra_r.w $t0, $t0, 2 \n"
- "addu.ph $t6, $t6, $t4 \n"
- "shra_r.ph $t6, $t6, 2 \n"
- "srl $t1, $t6, 16 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "sb $t1, 0(%[d]) \n"
- "sb $t0, 1(%[d]) \n"
- "sb $t6, 2(%[d]) \n"
- "bgtz %[dst_width], 1b \n"
- " addiu %[d], %[d], 3 \n"
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r"(src_ptr), [src_stride] "+r"(src_stride), [d] "+r"(d),
- [dst_width] "+r"(dst_width)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6");
-}
-
-void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* d,
- int dst_width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "repl.ph $t2, 3 \n" // 0x00030003
-
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
- "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1|
- "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
- "muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3|
- "muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3|
- "andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1|
- "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
- "raddu.w.qb $t0, $t0 \n"
- "raddu.w.qb $t1, $t1 \n"
- "shra_r.w $t0, $t0, 1 \n"
- "shra_r.w $t1, $t1, 1 \n"
- "preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1|
- "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
- "rotr $t4, $t4, 16 \n" // |0|S1|0|S2|
- "rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
- "addu.ph $t4, $t4, $t3 \n"
- "addu.ph $t6, $t6, $t5 \n"
- "shra_r.ph $t6, $t6, 2 \n"
- "shra_r.ph $t4, $t4, 2 \n"
- "addu.ph $t6, $t6, $t4 \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "shra_r.ph $t6, $t6, 1 \n"
- "addu $t0, $t0, $t1 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "shra_r.w $t0, $t0, 1 \n"
- "srl $t1, $t6, 16 \n"
- "sb $t1, 0(%[d]) \n"
- "sb $t0, 1(%[d]) \n"
- "sb $t6, 2(%[d]) \n"
- "bgtz %[dst_width], 1b \n"
- " addiu %[d], %[d], 3 \n"
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r"(src_ptr), [src_stride] "+r"(src_stride), [d] "+r"(d),
- [dst_width] "+r"(dst_width)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6");
-}
-
-void ScaleRowDown38_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst,
- int dst_width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
- "wsbh $t0, $t0 \n" // |2|3|0|1|
- "wsbh $t6, $t6 \n" // |26|27|24|25|
- "srl $t0, $t0, 8 \n" // |X|2|3|0|
- "srl $t3, $t3, 16 \n" // |X|X|15|14|
- "srl $t5, $t5, 16 \n" // |X|X|23|22|
- "srl $t7, $t7, 16 \n" // |X|X|31|30|
- "ins $t1, $t2, 24, 8 \n" // |8|6|5|4|
- "ins $t6, $t5, 0, 8 \n" // |26|27|24|22|
- "ins $t1, $t0, 0, 16 \n" // |8|6|3|0|
- "ins $t6, $t7, 24, 8 \n" // |30|27|24|22|
- "prepend $t2, $t3, 24 \n" // |X|15|14|11|
- "ins $t4, $t4, 16, 8 \n" // |19|16|17|X|
- "ins $t4, $t2, 0, 16 \n" // |19|16|14|11|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "addiu %[dst_width], %[dst_width], -12 \n"
- "addiu $t8,%[dst_width], -12 \n"
- "sw $t1, 0(%[dst]) \n"
- "sw $t4, 4(%[dst]) \n"
- "sw $t6, 8(%[dst]) \n"
- "bgez $t8, 1b \n"
- " addiu %[dst], %[dst], 12 \n"
- ".set pop \n"
- : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [dst_width] "+r"(dst_width)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
-}
-
-void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr,
- int dst_width) {
- intptr_t stride = src_stride;
- const uint8* t = src_ptr + stride;
- const int c = 0x2AAA;
-
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
- "lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0|
- "lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4|
- "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
- "packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6|
- "packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4|
- "raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6
- "raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4
- "precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1|
- "precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3|
- "srl $t4, $t4, 2 \n" // t4 / 4
- "srl $t6, $t6, 16 \n" // |0|0|S3|T3|
- "raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3
- "addu $t6, $t5, $t6 \n"
- "mul $t6, $t6, %[c] \n" // t6 * 0x2AAA
- "sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
- "sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
- "raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0
- "raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0
- "addu $t0, $t0, $t2 \n"
- "mul $t0, $t0, %[c] \n" // t0 * 0x2AAA
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[t], %[t], 8 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "addiu %[dst_ptr], %[dst_ptr], 3 \n"
- "srl $t6, $t6, 16 \n"
- "srl $t0, $t0, 16 \n"
- "sb $t4, -1(%[dst_ptr]) \n"
- "sb $t6, -2(%[dst_ptr]) \n"
- "bgtz %[dst_width], 1b \n"
- " sb $t0, -3(%[dst_ptr]) \n"
- ".set pop \n"
- : [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [t] "+r"(t),
- [dst_width] "+r"(dst_width)
- : [c] "r"(c)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6");
-}
-
-void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr,
- int dst_width) {
- intptr_t stride = src_stride;
- const uint8* s1 = src_ptr + stride;
- stride += stride;
- const uint8* s2 = src_ptr + stride;
- const int c1 = 0x1C71;
- const int c2 = 0x2AAA;
-
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
- "lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0|
- "lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4|
- "lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0|
- "lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4|
- "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
- "packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6|
- "raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6
- "packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4|
- "raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4
- "sll $t8, $t5, 16 \n" // |R5|R4|0|0|
- "raddu.w.qb $t8, $t8 \n" // R5+R4
- "addu $t7, $t7, $t8 \n"
- "srl $t8, $t5, 16 \n" // |0|0|R7|R6|
- "raddu.w.qb $t8, $t8 \n" // R7 + R6
- "addu $t6, $t6, $t8 \n"
- "mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA
- "precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1|
- "precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1|
- "srl $t8, $t8, 8 \n" // |0|S3|T3|R3|
- "raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3
- "addu $t7, $t7, $t8 \n"
- "mul $t7, $t7, %[c1] \n" // t7 * 0x1C71
- "sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
- "sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
- "sll $t4, $t4, 8 \n" // |R2|R1|R0|0|
- "raddu.w.qb $t0, $t0 \n"
- "raddu.w.qb $t2, $t2 \n"
- "raddu.w.qb $t4, $t4 \n"
- "addu $t0, $t0, $t2 \n"
- "addu $t0, $t0, $t4 \n"
- "mul $t0, $t0, %[c1] \n" // t0 * 0x1C71
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[s1], %[s1], 8 \n"
- "addiu %[s2], %[s2], 8 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "addiu %[dst_ptr], %[dst_ptr], 3 \n"
- "srl $t6, $t6, 16 \n"
- "srl $t7, $t7, 16 \n"
- "srl $t0, $t0, 16 \n"
- "sb $t6, -1(%[dst_ptr]) \n"
- "sb $t7, -2(%[dst_ptr]) \n"
- "bgtz %[dst_width], 1b \n"
- " sb $t0, -3(%[dst_ptr]) \n"
- ".set pop \n"
- : [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [s1] "+r"(s1),
- [s2] "+r"(s2), [dst_width] "+r"(dst_width)
- : [c1] "r"(c1), [c2] "r"(c2)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
-}
-
-void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
- int x;
- for (x = 0; x < ((src_width - 1)); x += 8) {
- uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4;
- uint32 tmp_t5, tmp_t6, tmp_t7, tmp_t8;
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
- "lw %[tmp_t5], 0(%[src_ptr]) \n"
- "lw %[tmp_t6], 4(%[src_ptr]) \n"
- "lw %[tmp_t1], 0(%[dst_ptr]) \n"
- "lw %[tmp_t2], 4(%[dst_ptr]) \n"
- "lw %[tmp_t3], 8(%[dst_ptr]) \n"
- "lw %[tmp_t4], 12(%[dst_ptr]) \n"
- "preceu.ph.qbr %[tmp_t7], %[tmp_t5] \n"
- "preceu.ph.qbl %[tmp_t8], %[tmp_t5] \n"
- "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t7] \n"
- "addu.ph %[tmp_t2], %[tmp_t2], %[tmp_t8] \n"
- "preceu.ph.qbr %[tmp_t7], %[tmp_t6] \n"
- "preceu.ph.qbl %[tmp_t8], %[tmp_t6] \n"
- "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t7] \n"
- "addu.ph %[tmp_t4], %[tmp_t4], %[tmp_t8] \n"
- "sw %[tmp_t1], 0(%[dst_ptr]) \n"
- "sw %[tmp_t2], 4(%[dst_ptr]) \n"
- "sw %[tmp_t3], 8(%[dst_ptr]) \n"
- "sw %[tmp_t4], 12(%[dst_ptr]) \n"
- ".set pop \n"
- :
- [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), [tmp_t3] "=&r"(tmp_t3),
- [tmp_t4] "=&r"(tmp_t4), [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
- [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [src_ptr] "+r"(src_ptr)
- : [dst_ptr] "r"(dst_ptr));
- src_ptr += 8;
- dst_ptr += 8;
- }
-
- if ((src_width)&7) {
- for (x = 0; x < ((src_width - 1) & 7); x += 1) {
- dst_ptr[0] += src_ptr[0];
- src_ptr += 1;
- dst_ptr += 1;
- }
- }
-}
-
-#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc
index 5fd438a5..a8fb4b4a 100644
--- a/unit_test/cpu_test.cc
+++ b/unit_test/cpu_test.cc
@@ -65,8 +65,6 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
#if defined(__mips__)
int has_mips = TestCpuFlag(kCpuHasMIPS);
printf("Has MIPS %x\n", has_mips);
- int has_dspr2 = TestCpuFlag(kCpuHasDSPR2);
- printf("Has DSPR2 %x\n", has_dspr2);
int has_msa = TestCpuFlag(kCpuHasMSA);
printf("Has MSA %x\n", has_msa);
#endif
diff --git a/util/cpuid.c b/util/cpuid.c
index 9ff618e0..59c65d60 100644
--- a/util/cpuid.c
+++ b/util/cpuid.c
@@ -69,8 +69,8 @@ int main(int argc, const char* argv[]) {
printf("Has NEON %x\n", has_neon);
}
if (has_mips) {
- int has_dspr2 = TestCpuFlag(kCpuHasDSPR2);
- printf("Has DSPR2 %x\n", has_dspr2);
+ int has_msa = TestCpuFlag(kCpuHasMSA);
+ printf("Has MSA %x\n", has_msa);
}
if (has_x86) {
int has_sse2 = TestCpuFlag(kCpuHasSSE2);