diff options
author | Frank Barchard <fbarchard@google.com> | 2017-12-13 17:38:52 -0800 |
---|---|---|
committer | Commit Bot <commit-bot@chromium.org> | 2017-12-14 18:22:16 +0000 |
commit | 3b81288ecef7ff63ca773040431cba728c9a3621 (patch) | |
tree | 05d07d0a6af8541c11e338aab6074a6408fe66f7 | |
parent | bb3180ae807ddf55335926d5f53b3856e2882b1c (diff) | |
download | libyuv-3b81288ecef7ff63ca773040431cba728c9a3621.tar.gz |
Remove Mips DSPR2 code
Bug: libyuv:765
Test: build for mips still passes
Change-Id: I99105ad3951d2210c0793e3b9241c178442fdc37
Reviewed-on: https://chromium-review.googlesource.com/826404
Reviewed-by: Weiyong Yao <braveyao@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
33 files changed, 13 insertions, 3810 deletions
@@ -24,14 +24,12 @@ cc_library { "source/rotate_any.cc", "source/rotate_argb.cc", "source/rotate_common.cc", - "source/rotate_dspr2.cc", "source/rotate_gcc.cc", "source/rotate_msa.cc", "source/rotate_neon.cc", "source/rotate_neon64.cc", "source/row_any.cc", "source/row_common.cc", - "source/row_dspr2.cc", "source/row_gcc.cc", "source/row_msa.cc", "source/row_neon.cc", @@ -40,7 +38,6 @@ cc_library { "source/scale_any.cc", "source/scale_argb.cc", "source/scale_common.cc", - "source/scale_dspr2.cc", "source/scale_gcc.cc", "source/scale_msa.cc", "source/scale_neon.cc", @@ -24,14 +24,12 @@ LOCAL_SRC_FILES := \ source/rotate_any.cc \ source/rotate_argb.cc \ source/rotate_common.cc \ - source/rotate_dspr2.cc \ source/rotate_gcc.cc \ source/rotate_msa.cc \ source/rotate_neon.cc \ source/rotate_neon64.cc \ source/row_any.cc \ source/row_common.cc \ - source/row_dspr2.cc \ source/row_gcc.cc \ source/row_msa.cc \ source/row_neon.cc \ @@ -40,7 +38,6 @@ LOCAL_SRC_FILES := \ source/scale_any.cc \ source/scale_argb.cc \ source/scale_common.cc \ - source/scale_dspr2.cc \ source/scale_gcc.cc \ source/scale_msa.cc \ source/scale_neon.cc \ @@ -110,19 +110,16 @@ static_library("libyuv_internal") { "source/rotate_any.cc", "source/rotate_argb.cc", "source/rotate_common.cc", - "source/rotate_dspr2.cc", "source/rotate_gcc.cc", "source/rotate_win.cc", "source/row_any.cc", "source/row_common.cc", - "source/row_dspr2.cc", "source/row_gcc.cc", "source/row_win.cc", "source/scale.cc", "source/scale_any.cc", "source/scale_argb.cc", "source/scale_common.cc", - "source/scale_dspr2.cc", "source/scale_gcc.cc", "source/scale_win.cc", "source/video_common.cc", @@ -302,7 +299,6 @@ if (libyuv_include_tests) { # Enable the following 3 macros to turn off assembly for specified CPU. # "LIBYUV_DISABLE_X86", # "LIBYUV_DISABLE_NEON", - # "LIBYUV_DISABLE_DSPR2", # Enable the following macro to build libyuv as a shared library (dll). # "LIBYUV_USING_SHARED_LIBRARY" ] @@ -1,12 +1,12 @@ **libyuv** is an open source project that includes YUV scaling and conversion functionality. * Scale YUV to prepare content for compression, with point, bilinear or box filter. -* Convert to YUV from webcam formats. -* Convert from YUV to formats for rendering/effects. +* Convert to YUV from webcam formats for compression. +* Convert to RGB formats for rendering/effects. * Rotate by 90/180/270 degrees to adjust for mobile devices in portrait mode. -* Optimized for SSE2/SSSE3/AVX2 on x86/x64. +* Optimized for SSSE3/AVX2 on x86/x64. * Optimized for Neon on Arm. -* Optimized for DSP R2 on Mips. +* Optimized for MSA on Mips. ### Development diff --git a/docs/environment_variables.md b/docs/environment_variables.md index 9071c54d..c28d83e7 100644 --- a/docs/environment_variables.md +++ b/docs/environment_variables.md @@ -17,7 +17,7 @@ By default the cpu is detected and the most advanced form of SIMD is used. But LIBYUV_DISABLE_AVX512BW LIBYUV_DISABLE_ERMS LIBYUV_DISABLE_FMA3 - LIBYUV_DISABLE_DSPR2 + LIBYUV_DISABLE_MSA LIBYUV_DISABLE_NEON # Test Width/Height/Repeat diff --git a/docs/getting_started.md b/docs/getting_started.md index 58e05f3c..fefffce4 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -129,15 +129,10 @@ ia32 ninja -v -C out/Debug libyuv_unittest ninja -v -C out/Release libyuv_unittest -mipsel +mips - gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mipsel\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false" - gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mipsel\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false" - ninja -v -C out/Debug libyuv_unittest - ninja -v -C out/Release libyuv_unittest - - gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false" - gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false" + gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=true" + gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=true" ninja -v -C out/Debug libyuv_unittest ninja -v -C out/Release libyuv_unittest diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index c2e9bbbd..14f735f5 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -47,8 +47,7 @@ static const int kCpuHasAVX512VPOPCNTDQ = 0x100000; // These flags are only valid on MIPS processors. static const int kCpuHasMIPS = 0x200000; -static const int kCpuHasDSPR2 = 0x400000; -static const int kCpuHasMSA = 0x800000; +static const int kCpuHasMSA = 0x400000; // Optional init function. TestCpuFlag does an auto-init. // Returns cpu_info flags. diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index 973fc152..7e9dfd2c 100644 --- a/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -54,12 +54,6 @@ extern "C" { #define HAS_TRANSPOSEUVWX8_NEON #endif -#if !defined(LIBYUV_DISABLE_DSPR2) && !defined(__native_client__) && \ - defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_TRANSPOSEWX8_DSPR2 -#define HAS_TRANSPOSEUVWX8_DSPR2 -#endif // defined(__mips__) - #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_TRANSPOSEWX16_MSA #define HAS_TRANSPOSEUVWX16_MSA @@ -97,16 +91,6 @@ void TransposeWx8_Fast_SSSE3(const uint8* src, uint8* dst, int dst_stride, int width); -void TransposeWx8_DSPR2(const uint8* src, - int src_stride, - uint8* dst, - int dst_stride, - int width); -void TransposeWx8_Fast_DSPR2(const uint8* src, - int src_stride, - uint8* dst, - int dst_stride, - int width); void TransposeWx16_MSA(const uint8* src, int src_stride, uint8* dst, @@ -128,11 +112,6 @@ void TransposeWx8_Fast_Any_SSSE3(const uint8* src, uint8* dst, int dst_stride, int width); -void TransposeWx8_Any_DSPR2(const uint8* src, - int src_stride, - uint8* dst, - int dst_stride, - int width); void TransposeWx16_Any_MSA(const uint8* src, int src_stride, uint8* dst, @@ -176,13 +155,6 @@ void TransposeUVWx8_NEON(const uint8* src, uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_DSPR2(const uint8* src, - int src_stride, - uint8* dst_a, - int dst_stride_a, - uint8* dst_b, - int dst_stride_b, - int width); void TransposeUVWx16_MSA(const uint8* src, int src_stride, uint8* dst_a, @@ -205,13 +177,6 @@ void TransposeUVWx8_Any_NEON(const uint8* src, uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_Any_DSPR2(const uint8* src, - int src_stride, - uint8* dst_a, - int dst_stride_a, - uint8* dst_b, - int dst_stride_b, - int width); void TransposeUVWx16_Any_MSA(const uint8* src, int src_stride, uint8* dst_a, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index cb719693..7c9ca04a 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -380,37 +380,6 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #define HAS_SCALESUMSAMPLES_NEON #endif - -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips__) && \ - (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6) -#define HAS_COPYROW_MIPS -#if defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_I422TOARGBROW_DSPR2 -#define HAS_INTERPOLATEROW_DSPR2 -#define HAS_MIRRORROW_DSPR2 -#define HAS_MIRRORUVROW_DSPR2 -#define HAS_SPLITUVROW_DSPR2 -#define HAS_RGB24TOARGBROW_DSPR2 -#define HAS_RAWTOARGBROW_DSPR2 -#define HAS_RGB565TOARGBROW_DSPR2 -#define HAS_ARGB1555TOARGBROW_DSPR2 -#define HAS_ARGB4444TOARGBROW_DSPR2 -#define HAS_I444TOARGBROW_DSPR2 -#define HAS_I422TOARGB4444ROW_DSPR2 -#define HAS_I422TOARGB1555ROW_DSPR2 -#define HAS_NV12TOARGBROW_DSPR2 -#define HAS_BGRATOUVROW_DSPR2 -#define HAS_BGRATOYROW_DSPR2 -#define HAS_ABGRTOUVROW_DSPR2 -#define HAS_ARGBTOYROW_DSPR2 -#define HAS_ABGRTOYROW_DSPR2 -#define HAS_RGBATOUVROW_DSPR2 -#define HAS_RGBATOYROW_DSPR2 -#define HAS_ARGBTOUVROW_DSPR2 -#endif -#endif - #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_ABGRTOUVROW_MSA #define HAS_ABGRTOYROW_MSA @@ -797,29 +766,6 @@ void I444ToARGBRow_MSA(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGB4444Row_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGB1555Row_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u, @@ -1021,30 +967,6 @@ void RGB24ToYRow_MSA(const uint8* src_rgb24, uint8* dst_y, int width); void RAWToYRow_MSA(const uint8* src_raw, uint8* dst_y, int width); void RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width); void ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width); -void BGRAToUVRow_DSPR2(const uint8* src_bgra, - int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, - int width); -void BGRAToYRow_DSPR2(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToUVRow_DSPR2(const uint8* src_abgr, - int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, - int width); -void ARGBToYRow_DSPR2(const uint8* src_argb, uint8* dst_y, int width); -void ABGRToYRow_DSPR2(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToUVRow_DSPR2(const uint8* src_rgba, - int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, - int width); -void RGBAToYRow_DSPR2(const uint8* src_rgba, uint8* dst_y, int width); -void ARGBToUVRow_DSPR2(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, - int width); void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width); void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width); void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width); @@ -1073,10 +995,6 @@ void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width); void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int width); -void BGRAToYRow_Any_DSPR2(const uint8* src_bgra, uint8* dst_y, int width); -void ARGBToYRow_Any_DSPR2(const uint8* src_argb, uint8* dst_y, int width); -void ABGRToYRow_Any_DSPR2(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_Any_DSPR2(const uint8* src_rgba, uint8* dst_y, int width); void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int width); @@ -1263,26 +1181,6 @@ void ARGB1555ToUVRow_Any_MSA(const uint8* src_argb1555, uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Any_DSPR2(const uint8* src_bgra, - int src_stride_bgra, - uint8* dst_u, - uint8* dst_v, - int width); -void ABGRToUVRow_Any_DSPR2(const uint8* src_abgr, - int src_stride_abgr, - uint8* dst_u, - uint8* dst_v, - int width); -void RGBAToUVRow_Any_DSPR2(const uint8* src_rgba, - int src_stride_rgba, - uint8* dst_u, - uint8* dst_v, - int width); -void ARGBToUVRow_Any_DSPR2(const uint8* src_argb, - int src_stride_argb, - uint8* dst_u, - uint8* dst_v, - int width); void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb, uint8* dst_u, @@ -1361,7 +1259,6 @@ void ARGBToUV444Row_C(const uint8* src_argb, void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); void MirrorRow_NEON(const uint8* src, uint8* dst, int width); -void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width); void MirrorRow_MSA(const uint8* src, uint8* dst, int width); void MirrorRow_C(const uint8* src, uint8* dst, int width); void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); @@ -1378,10 +1275,6 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void MirrorUVRow_DSPR2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, - int width); void MirrorUVRow_MSA(const uint8* src_uv, uint8* dst_u, uint8* dst_v, @@ -1411,10 +1304,6 @@ void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void SplitUVRow_DSPR2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, - int width); void SplitUVRow_MSA(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, @@ -1428,10 +1317,6 @@ void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void SplitUVRow_Any_DSPR2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, - int width); void SplitUVRow_Any_MSA(const uint8* src_uv, uint8* dst_u, uint8* dst_v, @@ -1707,15 +1592,6 @@ void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555, void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, int width); -void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width); -void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555, - uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444, - uint8* dst_argb, - int width); void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, uint8* dst_argb, int width); @@ -1773,19 +1649,6 @@ void ARGB1555ToARGBRow_Any_MSA(const uint8* src_argb1555, void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb, int width); -void RGB24ToARGBRow_Any_DSPR2(const uint8* src_rgb24, - uint8* dst_argb, - int width); -void RAWToARGBRow_Any_DSPR2(const uint8* src_raw, uint8* dst_argb, int width); -void RGB565ToARGBRow_Any_DSPR2(const uint8* src_rgb565, - uint8* dst_argb, - int width); -void ARGB1555ToARGBRow_Any_DSPR2(const uint8* src_argb1555, - uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_Any_DSPR2(const uint8* src_argb4444, - uint8* dst_argb, - int width); void ARGB4444ToARGBRow_Any_MSA(const uint8* src_argb4444, uint8* dst_argb, @@ -2543,53 +2406,6 @@ void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_Any_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGB4444Row_Any_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGBRow_Any_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGB1555Row_Any_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I411ToARGBRow_Any_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToARGBRow_Any_DSPR2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I444ToARGBRow_Any_MSA(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -3088,11 +2904,6 @@ void InterpolateRow_NEON(uint8* dst_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_DSPR2(uint8* dst_ptr, - const uint8* src_ptr, - ptrdiff_t src_stride_ptr, - int width, - int source_y_fraction); void InterpolateRow_MSA(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, @@ -3113,11 +2924,6 @@ void InterpolateRow_Any_AVX2(uint8* dst_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_Any_DSPR2(uint8* dst_ptr, - const uint8* src_ptr, - ptrdiff_t src_stride_ptr, - int width, - int source_y_fraction); void InterpolateRow_Any_MSA(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index c4a66aa0..3db46d39 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -94,16 +94,6 @@ extern "C" { #define HAS_SCALEARGBFILTERCOLS_NEON #endif -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_DSPR2) && !defined(__native_client__) && \ - defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_SCALEROWDOWN2_DSPR2 -#define HAS_SCALEROWDOWN4_DSPR2 -#define HAS_SCALEROWDOWN34_DSPR2 -#define HAS_SCALEROWDOWN38_DSPR2 -#define HAS_SCALEADDROW_DSPR2 -#endif - #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_SCALEADDROW_MSA #define HAS_SCALEARGBCOLS_MSA @@ -831,51 +821,6 @@ void ScaleFilterCols_Any_NEON(uint8* dst_ptr, int x, int dx); -void ScaleRowDown2_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown4_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown34_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* d, - int dst_width); -void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* d, - int dst_width); -void ScaleRowDown38_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width); -void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, - int dst_width); -void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, - int dst_width); -void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_Any_DSPR2(const uint8* src_ptr, - uint16* dst_ptr, - int src_width); - void ScaleRowDown2_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, @@ -121,7 +121,6 @@ # Enable the following 3 macros to turn off assembly for specified CPU. # 'LIBYUV_DISABLE_X86', # 'LIBYUV_DISABLE_NEON', - # 'LIBYUV_DISABLE_DSPR2', # Enable the following macro to build libyuv as a shared library (dll). # 'LIBYUV_USING_SHARED_LIBRARY', # TODO(fbarchard): Make these into gyp defines. diff --git a/libyuv.gypi b/libyuv.gypi index ec81bc9b..9467adfc 100644 --- a/libyuv.gypi +++ b/libyuv.gypi @@ -55,7 +55,6 @@ 'source/rotate_argb.cc', 'source/rotate_common.cc', 'source/rotate_gcc.cc', - 'source/rotate_dspr2.cc', 'source/rotate_msa.cc', 'source/rotate_neon.cc', 'source/rotate_neon64.cc', @@ -63,7 +62,6 @@ 'source/row_any.cc', 'source/row_common.cc', 'source/row_gcc.cc', - 'source/row_dspr2.cc', 'source/row_msa.cc', 'source/row_neon.cc', 'source/row_neon64.cc', @@ -73,7 +71,6 @@ 'source/scale_argb.cc', 'source/scale_common.cc', 'source/scale_gcc.cc', - 'source/scale_dspr2.cc', 'source/scale_msa.cc', 'source/scale_neon.cc', 'source/scale_neon64.cc', diff --git a/libyuv_test.gyp b/libyuv_test.gyp index 4222cf26..5fe154c6 100644 --- a/libyuv_test.gyp +++ b/libyuv_test.gyp @@ -100,7 +100,6 @@ # Enable the following 3 macros to turn off assembly for specified CPU. # 'LIBYUV_DISABLE_X86', # 'LIBYUV_DISABLE_NEON', - # 'LIBYUV_DISABLE_DSPR2', # Enable the following macro to build libyuv as a shared library (dll). # 'LIBYUV_USING_SHARED_LIBRARY', ], @@ -32,14 +32,12 @@ LOCAL_OBJ_FILES := \ source/rotate.o \ source/rotate_common.o \ source/rotate_gcc.o \ - source/rotate_dspr2.o \ source/rotate_neon64.o \ source/rotate_neon.o \ source/rotate_win.o \ source/row_any.o \ source/row_common.o \ source/row_gcc.o \ - source/row_dspr2.o \ source/row_neon64.o \ source/row_neon.o \ source/row_win.o \ @@ -48,7 +46,6 @@ LOCAL_OBJ_FILES := \ source/scale.o \ source/scale_common.o \ source/scale_gcc.o \ - source/scale_dspr2.o \ source/scale_neon64.o \ source/scale_neon.o \ source/scale_win.o \ diff --git a/source/convert.cc b/source/convert.cc index dfa83a5a..ef78fb5f 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -212,11 +212,6 @@ static void CopyPlane2(const uint8* src, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Copy plane for (y = 0; y < height - 1; y += 2) { @@ -579,14 +574,6 @@ int ARGBToI420(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; @@ -595,14 +582,6 @@ int ARGBToI420(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_ARGBTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVRow = ARGBToUVRow_Any_MSA; @@ -680,22 +659,6 @@ int BGRAToI420(const uint8* src_bgra, } } #endif -#if defined(HAS_BGRATOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - BGRAToYRow = BGRAToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - BGRAToYRow = BGRAToYRow_DSPR2; - } - } -#endif -#if defined(HAS_BGRATOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - BGRAToUVRow = BGRAToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_DSPR2; - } - } -#endif #if defined(HAS_BGRATOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { BGRAToYRow = BGRAToYRow_Any_MSA; @@ -781,22 +744,6 @@ int ABGRToI420(const uint8* src_abgr, } } #endif -#if defined(HAS_ABGRTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ABGRToYRow = ABGRToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ABGRToYRow = ABGRToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ABGRToUVRow = ABGRToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_DSPR2; - } - } -#endif #if defined(HAS_ABGRTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ABGRToYRow = ABGRToYRow_Any_MSA; @@ -882,22 +829,6 @@ int RGBAToI420(const uint8* src_rgba, } } #endif -#if defined(HAS_RGBATOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RGBAToYRow = RGBAToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - RGBAToYRow = RGBAToYRow_DSPR2; - } - } -#endif -#if defined(HAS_RGBATOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RGBAToUVRow = RGBAToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - RGBAToUVRow = RGBAToUVRow_DSPR2; - } - } -#endif #if defined(HAS_RGBATOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGBAToYRow = RGBAToYRow_Any_MSA; @@ -1287,14 +1218,6 @@ int RGB565ToI420(const uint8* src_rgb565, } } #endif -#if defined(HAS_RGB565TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - RGB565ToARGBRow = RGB565ToARGBRow_DSPR2; - } - } -#endif #endif { #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 5b6ddadb..2e7c0f8f 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -97,15 +97,6 @@ static int I420ToARGBMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; - } -#endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; @@ -292,15 +283,6 @@ static int I422ToARGBMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; - } -#endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; @@ -769,14 +751,6 @@ static int I444ToARGBMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I444TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - I444ToARGBRow = I444ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_I444TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I444ToARGBRow = I444ToARGBRow_Any_MSA; @@ -905,15 +879,6 @@ static int I420AlphaToARGBMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I422ALPHATOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422AlphaToARGBRow = I422AlphaToARGBRow_DSPR2; - } -#endif #if defined(HAS_I422ALPHATOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422AlphaToARGBRow = I422AlphaToARGBRow_Any_MSA; @@ -1262,14 +1227,6 @@ int RGB24ToARGB(const uint8* src_rgb24, } } #endif -#if defined(HAS_RGB24TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - RGB24ToARGBRow = RGB24ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_RGB24TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_MSA; @@ -1329,14 +1286,6 @@ int RAWToARGB(const uint8* src_raw, } } #endif -#if defined(HAS_RAWTOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RAWToARGBRow = RAWToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - RAWToARGBRow = RAWToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_RAWTOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RAWToARGBRow = RAWToARGBRow_Any_MSA; @@ -1404,14 +1353,6 @@ int RGB565ToARGB(const uint8* src_rgb565, } } #endif -#if defined(HAS_RGB565TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - RGB565ToARGBRow = RGB565ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_RGB565TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB565ToARGBRow = RGB565ToARGBRow_Any_MSA; @@ -1479,14 +1420,6 @@ int ARGB1555ToARGB(const uint8* src_argb1555, } } #endif -#if defined(HAS_ARGB1555TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 4)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_ARGB1555TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MSA; @@ -1554,14 +1487,6 @@ int ARGB4444ToARGB(const uint8* src_argb4444, } } #endif -#if defined(HAS_ARGB4444TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 4)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_ARGB4444TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA; @@ -1626,14 +1551,6 @@ static int NV12ToARGBMatrix(const uint8* src_y, } } #endif -#if defined(HAS_NV12TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - NV12ToARGBRow = NV12ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_NV12TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { NV12ToARGBRow = NV12ToARGBRow_Any_MSA; @@ -1823,14 +1740,6 @@ int M420ToARGB(const uint8* src_m420, } } #endif -#if defined(HAS_NV12TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - NV12ToARGBRow = NV12ToARGBRow_DSPR2; - } - } -#endif #if defined(HAS_NV12TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { NV12ToARGBRow = NV12ToARGBRow_Any_MSA; diff --git a/source/convert_from.cc b/source/convert_from.cc index 509fe232..5c803753 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -484,15 +484,6 @@ static int I420ToRGBAMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I422TORGBAROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) { - I422ToRGBARow = I422ToRGBARow_DSPR2; - } -#endif #if defined(HAS_I422TORGBAROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToRGBARow = I422ToRGBARow_Any_MSA; @@ -744,14 +735,6 @@ int I420ToARGB1555(const uint8* src_y, } } #endif -#if defined(HAS_I422TOARGB1555ROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - I422ToARGB1555Row = I422ToARGB1555Row_Any_DSPR2; - if (IS_ALIGNED(width, 4)) { - I422ToARGB1555Row = I422ToARGB1555Row_DSPR2; - } - } -#endif #if defined(HAS_I422TOARGB1555ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA; @@ -825,14 +808,6 @@ int I420ToARGB4444(const uint8* src_y, } } #endif -#if defined(HAS_I422TOARGB4444ROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - I422ToARGB4444Row = I422ToARGB4444Row_Any_DSPR2; - if (IS_ALIGNED(width, 4)) { - I422ToARGB4444Row = I422ToARGB4444Row_DSPR2; - } - } -#endif #if defined(HAS_I422TOARGB4444ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA; @@ -1057,14 +1032,6 @@ int I420ToRGB565Dither(const uint8* src_y, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; - } -#endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index 4dca2405..02e12a12 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -100,14 +100,6 @@ int ARGBToI444(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; @@ -197,22 +189,6 @@ int ARGBToI422(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { @@ -344,22 +320,6 @@ int ARGBToNV12(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_MERGEUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MergeUVRow_ = MergeUVRow_Any_MSA; @@ -495,22 +455,6 @@ int ARGBToNV21(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_MERGEUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MergeUVRow_ = MergeUVRow_Any_MSA; @@ -643,22 +587,6 @@ int ARGBToYUY2(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_I422TOYUY2ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToYUY2Row = I422ToYUY2Row_Any_MSA; @@ -787,22 +715,6 @@ int ARGBToUYVY(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToUVRow = ARGBToUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_DSPR2; - } - } -#endif #if defined(HAS_I422TOUYVYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToUYVYRow = I422ToUYVYRow_Any_MSA; @@ -880,14 +792,6 @@ int ARGBToI400(const uint8* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ARGBToYRow = ARGBToYRow_Any_DSPR2; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_DSPR2; - } - } -#endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 344f3c06..d08fc365 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -179,7 +179,7 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name, if (strcmp(ase, " msa") == 0) { return kCpuHasMSA; } - return kCpuHasDSPR2; + return 0; } while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) { @@ -189,7 +189,7 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name, if (strcmp(ase, " msa") == 0) { return kCpuHasMSA; } - return kCpuHasDSPR2; + return 0; } } } @@ -290,16 +290,10 @@ static SAFEBUFFERS int GetCpuFlags(void) { #endif #if defined(__mips__) && defined(__linux__) -#if defined(__mips_dspr2) - cpu_info |= kCpuHasDSPR2; -#endif #if defined(__mips_msa) cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa"); #endif cpu_info |= kCpuHasMIPS; - if (getenv("LIBYUV_DISABLE_DSPR2")) { - cpu_info &= ~kCpuHasDSPR2; - } if (getenv("LIBYUV_DISABLE_MSA")) { cpu_info &= ~kCpuHasMSA; } diff --git a/source/planar_functions.cc b/source/planar_functions.cc index dd311d1f..e65f1788 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -70,11 +70,6 @@ void CopyPlane(const uint8* src_y, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Copy plane for (y = 0; y < height; ++y) { @@ -116,11 +111,6 @@ void CopyPlane_16(const uint16* src_y, CopyRow = CopyRow_16_NEON; } #endif -#if defined(HAS_COPYROW_16_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_16_MIPS; - } -#endif // Copy plane for (y = 0; y < height; ++y) { @@ -311,16 +301,6 @@ void SplitUVPlane(const uint8* src_uv, } } #endif -#if defined(HAS_SPLITUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_u, 4) && - IS_ALIGNED(dst_stride_u, 4) && IS_ALIGNED(dst_v, 4) && - IS_ALIGNED(dst_stride_v, 4)) { - SplitUVRow = SplitUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - SplitUVRow = SplitUVRow_DSPR2; - } - } -#endif #if defined(HAS_SPLITUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { SplitUVRow = SplitUVRow_Any_MSA; @@ -562,14 +542,6 @@ void MirrorPlane(const uint8* src_y, } } #endif -// TODO(fbarchard): Mirror on mips handle unaligned memory. -#if defined(HAS_MIRRORROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_y, 4) && - IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(dst_y, 4) && - IS_ALIGNED(dst_stride_y, 4)) { - MirrorRow = MirrorRow_DSPR2; - } -#endif #if defined(HAS_MIRRORROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MirrorRow = MirrorRow_Any_MSA; @@ -1473,15 +1445,6 @@ static int I422ToRGBAMatrix(const uint8* src_y, } } #endif -#if defined(HAS_I422TORGBAROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) { - I422ToRGBARow = I422ToRGBARow_DSPR2; - } -#endif #if defined(HAS_I422TORGBAROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToRGBARow = I422ToRGBARow_Any_MSA; @@ -2534,14 +2497,6 @@ int InterpolatePlane(const uint8* src0, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src0, 4) && - IS_ALIGNED(src_stride0, 4) && IS_ALIGNED(src1, 4) && - IS_ALIGNED(src_stride1, 4) && IS_ALIGNED(dst, 4) && - IS_ALIGNED(dst_stride, 4) && IS_ALIGNED(width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; diff --git a/source/rotate.cc b/source/rotate.cc index b16af507..1f74cd07 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -57,16 +57,6 @@ void TransposePlane(const uint8* src, } } #endif -#if defined(HAS_TRANSPOSEWX8_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - if (IS_ALIGNED(width, 4) && IS_ALIGNED(src, 4) && - IS_ALIGNED(src_stride, 4)) { - TransposeWx8 = TransposeWx8_Fast_DSPR2; - } else { - TransposeWx8 = TransposeWx8_DSPR2; - } - } -#endif #if defined(HAS_TRANSPOSEWX16_MSA) if (TestCpuFlag(kCpuHasMSA)) { TransposeWx16 = TransposeWx16_Any_MSA; @@ -168,14 +158,6 @@ void RotatePlane180(const uint8* src, } } #endif -// TODO(fbarchard): Mirror on mips handle unaligned memory. -#if defined(HAS_MIRRORROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst, 4) && - IS_ALIGNED(dst_stride, 4)) { - MirrorRow = MirrorRow_DSPR2; - } -#endif #if defined(HAS_MIRRORROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MirrorRow = MirrorRow_Any_MSA; @@ -204,11 +186,6 @@ void RotatePlane180(const uint8* src, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { @@ -255,12 +232,6 @@ void TransposeUV(const uint8* src, } } #endif -#if defined(HAS_TRANSPOSEUVWX8_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) && IS_ALIGNED(src, 4) && - IS_ALIGNED(src_stride, 4)) { - TransposeUVWx8 = TransposeUVWx8_DSPR2; - } -#endif #if defined(HAS_TRANSPOSEUVWX16_MSA) if (TestCpuFlag(kCpuHasMSA)) { TransposeUVWx16 = TransposeUVWx16_Any_MSA; @@ -355,12 +326,6 @@ void RotateUV180(const uint8* src, MirrorUVRow = MirrorUVRow_SSSE3; } #endif -#if defined(HAS_MIRRORUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src, 4) && - IS_ALIGNED(src_stride, 4)) { - MirrorUVRow = MirrorUVRow_DSPR2; - } -#endif #if defined(HAS_MIRRORUVROW_MSA) if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 32)) { MirrorUVRow = MirrorUVRow_MSA; diff --git a/source/rotate_any.cc b/source/rotate_any.cc index 562096b9..eb4f7418 100644 --- a/source/rotate_any.cc +++ b/source/rotate_any.cc @@ -38,9 +38,6 @@ TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7) #ifdef HAS_TRANSPOSEWX8_FAST_SSSE3 TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15) #endif -#ifdef HAS_TRANSPOSEWX8_DSPR2 -TANY(TransposeWx8_Any_DSPR2, TransposeWx8_DSPR2, 7) -#endif #ifdef HAS_TRANSPOSEWX16_MSA TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15) #endif @@ -64,9 +61,6 @@ TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7) #ifdef HAS_TRANSPOSEUVWX8_SSE2 TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7) #endif -#ifdef HAS_TRANSPOSEUVWX8_DSPR2 -TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7) -#endif #ifdef HAS_TRANSPOSEUVWX16_MSA TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7) #endif diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc index ede4eafa..f6a2bf69 100644 --- a/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -173,11 +173,6 @@ void ARGBRotate180(const uint8* src, CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { diff --git a/source/rotate_dspr2.cc b/source/rotate_dspr2.cc deleted file mode 100644 index 5d2338de..00000000 --- a/source/rotate_dspr2.cc +++ /dev/null @@ -1,475 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/rotate_row.h" -#include "libyuv/row.h" - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \ - (__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32) - -void TransposeWx8_DSPR2(const uint8* src, - int src_stride, - uint8* dst, - int dst_stride, - int width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - "andi $t0, %[dst], 0x3 \n" - "andi $t1, %[dst_stride], 0x3 \n" - "or $t0, $t0, $t1 \n" - "bnez $t0, 11f \n" - " subu $t7, $t9, %[src_stride] \n" - // dst + dst_stride word aligned - "1: \n" - "lbu $t0, 0(%[src]) \n" - "lbux $t1, %[src_stride](%[src]) \n" - "lbux $t8, $t2(%[src]) \n" - "lbux $t9, $t3(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s0, $t8, $t0 \n" - "lbux $t0, $t4(%[src]) \n" - "lbux $t1, $t5(%[src]) \n" - "lbux $t8, $t6(%[src]) \n" - "lbux $t9, $t7(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s1, $t8, $t0 \n" - "sw $s0, 0(%[dst]) \n" - "addiu %[width], -1 \n" - "addiu %[src], 1 \n" - "sw $s1, 4(%[dst]) \n" - "bnez %[width], 1b \n" - " addu %[dst], %[dst], %[dst_stride] \n" - "b 2f \n" - // dst + dst_stride unaligned - "11: \n" - "lbu $t0, 0(%[src]) \n" - "lbux $t1, %[src_stride](%[src]) \n" - "lbux $t8, $t2(%[src]) \n" - "lbux $t9, $t3(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s0, $t8, $t0 \n" - "lbux $t0, $t4(%[src]) \n" - "lbux $t1, $t5(%[src]) \n" - "lbux $t8, $t6(%[src]) \n" - "lbux $t9, $t7(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s1, $t8, $t0 \n" - "swr $s0, 0(%[dst]) \n" - "swl $s0, 3(%[dst]) \n" - "addiu %[width], -1 \n" - "addiu %[src], 1 \n" - "swr $s1, 4(%[dst]) \n" - "swl $s1, 7(%[dst]) \n" - "bnez %[width], 11b \n" - "addu %[dst], %[dst], %[dst_stride] \n" - "2: \n" - ".set pop \n" - : [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width) - : [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1"); -} - -void TransposeWx8_Fast_DSPR2(const uint8* src, - int src_stride, - uint8* dst, - int dst_stride, - int width) { - __asm__ __volatile__( - ".set noat \n" - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - - "srl $AT, %[width], 0x2 \n" - "andi $t0, %[dst], 0x3 \n" - "andi $t1, %[dst_stride], 0x3 \n" - "or $t0, $t0, $t1 \n" - "bnez $t0, 11f \n" - " subu $t7, $t9, %[src_stride] \n" - // dst + dst_stride word aligned - "1: \n" - "lw $t0, 0(%[src]) \n" - "lwx $t1, %[src_stride](%[src]) \n" - "lwx $t8, $t2(%[src]) \n" - "lwx $t9, $t3(%[src]) \n" - - // t0 = | 30 | 20 | 10 | 00 | - // t1 = | 31 | 21 | 11 | 01 | - // t8 = | 32 | 22 | 12 | 02 | - // t9 = | 33 | 23 | 13 | 03 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 21 | 01 | 20 | 00 | - // s1 = | 23 | 03 | 22 | 02 | - // s2 = | 31 | 11 | 30 | 10 | - // s3 = | 33 | 13 | 32 | 12 | - - "precr.qb.ph $s4, $s1, $s0 \n" - "precrq.qb.ph $s5, $s1, $s0 \n" - "precr.qb.ph $s6, $s3, $s2 \n" - "precrq.qb.ph $s7, $s3, $s2 \n" - - // s4 = | 03 | 02 | 01 | 00 | - // s5 = | 23 | 22 | 21 | 20 | - // s6 = | 13 | 12 | 11 | 10 | - // s7 = | 33 | 32 | 31 | 30 | - - "lwx $t0, $t4(%[src]) \n" - "lwx $t1, $t5(%[src]) \n" - "lwx $t8, $t6(%[src]) \n" - "lwx $t9, $t7(%[src]) \n" - - // t0 = | 34 | 24 | 14 | 04 | - // t1 = | 35 | 25 | 15 | 05 | - // t8 = | 36 | 26 | 16 | 06 | - // t9 = | 37 | 27 | 17 | 07 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 25 | 05 | 24 | 04 | - // s1 = | 27 | 07 | 26 | 06 | - // s2 = | 35 | 15 | 34 | 14 | - // s3 = | 37 | 17 | 36 | 16 | - - "precr.qb.ph $t0, $s1, $s0 \n" - "precrq.qb.ph $t1, $s1, $s0 \n" - "precr.qb.ph $t8, $s3, $s2 \n" - "precrq.qb.ph $t9, $s3, $s2 \n" - - // t0 = | 07 | 06 | 05 | 04 | - // t1 = | 27 | 26 | 25 | 24 | - // t8 = | 17 | 16 | 15 | 14 | - // t9 = | 37 | 36 | 35 | 34 | - - "addu $s0, %[dst], %[dst_stride] \n" - "addu $s1, $s0, %[dst_stride] \n" - "addu $s2, $s1, %[dst_stride] \n" - - "sw $s4, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $s6, 0($s0) \n" - "sw $t8, 4($s0) \n" - "sw $s5, 0($s1) \n" - "sw $t1, 4($s1) \n" - "sw $s7, 0($s2) \n" - "sw $t9, 4($s2) \n" - - "addiu $AT, -1 \n" - "addiu %[src], 4 \n" - - "bnez $AT, 1b \n" - " addu %[dst], $s2, %[dst_stride] \n" - "b 2f \n" - // dst + dst_stride unaligned - "11: \n" - "lw $t0, 0(%[src]) \n" - "lwx $t1, %[src_stride](%[src]) \n" - "lwx $t8, $t2(%[src]) \n" - "lwx $t9, $t3(%[src]) \n" - - // t0 = | 30 | 20 | 10 | 00 | - // t1 = | 31 | 21 | 11 | 01 | - // t8 = | 32 | 22 | 12 | 02 | - // t9 = | 33 | 23 | 13 | 03 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 21 | 01 | 20 | 00 | - // s1 = | 23 | 03 | 22 | 02 | - // s2 = | 31 | 11 | 30 | 10 | - // s3 = | 33 | 13 | 32 | 12 | - - "precr.qb.ph $s4, $s1, $s0 \n" - "precrq.qb.ph $s5, $s1, $s0 \n" - "precr.qb.ph $s6, $s3, $s2 \n" - "precrq.qb.ph $s7, $s3, $s2 \n" - - // s4 = | 03 | 02 | 01 | 00 | - // s5 = | 23 | 22 | 21 | 20 | - // s6 = | 13 | 12 | 11 | 10 | - // s7 = | 33 | 32 | 31 | 30 | - - "lwx $t0, $t4(%[src]) \n" - "lwx $t1, $t5(%[src]) \n" - "lwx $t8, $t6(%[src]) \n" - "lwx $t9, $t7(%[src]) \n" - - // t0 = | 34 | 24 | 14 | 04 | - // t1 = | 35 | 25 | 15 | 05 | - // t8 = | 36 | 26 | 16 | 06 | - // t9 = | 37 | 27 | 17 | 07 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 25 | 05 | 24 | 04 | - // s1 = | 27 | 07 | 26 | 06 | - // s2 = | 35 | 15 | 34 | 14 | - // s3 = | 37 | 17 | 36 | 16 | - - "precr.qb.ph $t0, $s1, $s0 \n" - "precrq.qb.ph $t1, $s1, $s0 \n" - "precr.qb.ph $t8, $s3, $s2 \n" - "precrq.qb.ph $t9, $s3, $s2 \n" - - // t0 = | 07 | 06 | 05 | 04 | - // t1 = | 27 | 26 | 25 | 24 | - // t8 = | 17 | 16 | 15 | 14 | - // t9 = | 37 | 36 | 35 | 34 | - - "addu $s0, %[dst], %[dst_stride] \n" - "addu $s1, $s0, %[dst_stride] \n" - "addu $s2, $s1, %[dst_stride] \n" - - "swr $s4, 0(%[dst]) \n" - "swl $s4, 3(%[dst]) \n" - "swr $t0, 4(%[dst]) \n" - "swl $t0, 7(%[dst]) \n" - "swr $s6, 0($s0) \n" - "swl $s6, 3($s0) \n" - "swr $t8, 4($s0) \n" - "swl $t8, 7($s0) \n" - "swr $s5, 0($s1) \n" - "swl $s5, 3($s1) \n" - "swr $t1, 4($s1) \n" - "swl $t1, 7($s1) \n" - "swr $s7, 0($s2) \n" - "swl $s7, 3($s2) \n" - "swr $t9, 4($s2) \n" - "swl $t9, 7($s2) \n" - - "addiu $AT, -1 \n" - "addiu %[src], 4 \n" - - "bnez $AT, 11b \n" - " addu %[dst], $s2, %[dst_stride] \n" - "2: \n" - ".set pop \n" - ".set at \n" - : [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width) - : [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1", - "s2", "s3", "s4", "s5", "s6", "s7"); -} - -void TransposeUVWx8_DSPR2(const uint8* src, - int src_stride, - uint8* dst_a, - int dst_stride_a, - uint8* dst_b, - int dst_stride_b, - int width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - "subu $t7, $t9, %[src_stride] \n" - "srl $t1, %[width], 1 \n" - - // check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b - "andi $t0, %[dst_a], 0x3 \n" - "andi $t8, %[dst_b], 0x3 \n" - "or $t0, $t0, $t8 \n" - "andi $t8, %[dst_stride_a], 0x3 \n" - "andi $s5, %[dst_stride_b], 0x3 \n" - "or $t8, $t8, $s5 \n" - "or $t0, $t0, $t8 \n" - "bnez $t0, 11f \n" - " nop \n" - // dst + dst_stride word aligned (both, a & b dst addresses) - "1: \n" - "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0| - "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1| - "addu $s5, %[dst_a], %[dst_stride_a] \n" - "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2| - "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3| - "addu $s6, %[dst_b], %[dst_stride_b] \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0| - "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2| - "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2| - - "sw $s3, 0($s5) \n" - "sw $s4, 0($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0| - - "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4| - "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5| - "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6| - "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7| - "sw $s3, 0(%[dst_a]) \n" - "sw $s4, 0(%[dst_b]) \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4| - "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7| - "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6| - "sw $s3, 4($s5) \n" - "sw $s4, 4($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4| - - "addiu %[src], 4 \n" - "addiu $t1, -1 \n" - "sll $t0, %[dst_stride_a], 1 \n" - "sll $t8, %[dst_stride_b], 1 \n" - "sw $s3, 4(%[dst_a]) \n" - "sw $s4, 4(%[dst_b]) \n" - "addu %[dst_a], %[dst_a], $t0 \n" - "bnez $t1, 1b \n" - " addu %[dst_b], %[dst_b], $t8 \n" - "b 2f \n" - " nop \n" - - // dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned - "11: \n" - "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0| - "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1| - "addu $s5, %[dst_a], %[dst_stride_a] \n" - "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2| - "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3| - "addu $s6, %[dst_b], %[dst_stride_b] \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0| - "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2| - "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2| - - "swr $s3, 0($s5) \n" - "swl $s3, 3($s5) \n" - "swr $s4, 0($s6) \n" - "swl $s4, 3($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0| - - "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4| - "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5| - "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6| - "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7| - "swr $s3, 0(%[dst_a]) \n" - "swl $s3, 3(%[dst_a]) \n" - "swr $s4, 0(%[dst_b]) \n" - "swl $s4, 3(%[dst_b]) \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4| - "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7| - "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6| - - "swr $s3, 4($s5) \n" - "swl $s3, 7($s5) \n" - "swr $s4, 4($s6) \n" - "swl $s4, 7($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4| - - "addiu %[src], 4 \n" - "addiu $t1, -1 \n" - "sll $t0, %[dst_stride_a], 1 \n" - "sll $t8, %[dst_stride_b], 1 \n" - "swr $s3, 4(%[dst_a]) \n" - "swl $s3, 7(%[dst_a]) \n" - "swr $s4, 4(%[dst_b]) \n" - "swl $s4, 7(%[dst_b]) \n" - "addu %[dst_a], %[dst_a], $t0 \n" - "bnez $t1, 11b \n" - " addu %[dst_b], %[dst_b], $t8 \n" - - "2: \n" - ".set pop \n" - : [src] "+r"(src), [dst_a] "+r"(dst_a), [dst_b] "+r"(dst_b), - [width] "+r"(width), [src_stride] "+r"(src_stride) - : [dst_stride_a] "r"(dst_stride_a), [dst_stride_b] "r"(dst_stride_b) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1", - "s2", "s3", "s4", "s5", "s6"); -} - -#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/source/row_any.cc b/source/row_any.cc index 6d65ca7d..cc9fb50c 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -183,12 +183,6 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) #endif -#ifdef HAS_I422TOARGBROW_DSPR2 -ANY31C(I444ToARGBRow_Any_DSPR2, I444ToARGBRow_DSPR2, 0, 0, 4, 7) -ANY31C(I422ToARGBRow_Any_DSPR2, I422ToARGBRow_DSPR2, 1, 0, 4, 7) -ANY31C(I422ToARGB4444Row_Any_DSPR2, I422ToARGB4444Row_DSPR2, 1, 0, 2, 7) -ANY31C(I422ToARGB1555Row_Any_DSPR2, I422ToARGB1555Row_DSPR2, 1, 0, 2, 7) -#endif #ifdef HAS_I422TOARGBROW_MSA ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7) ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7) @@ -326,9 +320,6 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) #ifdef HAS_NV12TOARGBROW_NEON ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) #endif -#ifdef HAS_NV12TOARGBROW_DSPR2 -ANY21C(NV12ToARGBRow_Any_DSPR2, NV12ToARGBRow_DSPR2, 1, 1, 2, 4, 7) -#endif #ifdef HAS_NV12TOARGBROW_MSA ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7) #endif @@ -578,33 +569,6 @@ ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15) #ifdef HAS_ARGB4444TOARGBROW_NEON ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) #endif -#ifdef HAS_RGB24TOARGBROW_DSPR2 -ANY11(RGB24ToARGBRow_Any_DSPR2, RGB24ToARGBRow_DSPR2, 0, 3, 4, 7) -#endif -#ifdef HAS_RAWTOARGBROW_DSPR2 -ANY11(RAWToARGBRow_Any_DSPR2, RAWToARGBRow_DSPR2, 0, 3, 4, 7) -#endif -#ifdef HAS_RGB565TOARGBROW_DSPR2 -ANY11(RGB565ToARGBRow_Any_DSPR2, RGB565ToARGBRow_DSPR2, 0, 2, 4, 7) -#endif -#ifdef HAS_ARGB1555TOARGBROW_DSPR2 -ANY11(ARGB1555ToARGBRow_Any_DSPR2, ARGB1555ToARGBRow_DSPR2, 0, 2, 4, 7) -#endif -#ifdef HAS_ARGB4444TOARGBROW_DSPR2 -ANY11(ARGB4444ToARGBRow_Any_DSPR2, ARGB4444ToARGBRow_DSPR2, 0, 2, 4, 7) -#endif -#ifdef HAS_BGRATOYROW_DSPR2 -ANY11(BGRAToYRow_Any_DSPR2, BGRAToYRow_DSPR2, 0, 4, 1, 7) -#endif -#ifdef HAS_ARGBTOYROW_DSPR2 -ANY11(ARGBToYRow_Any_DSPR2, ARGBToYRow_DSPR2, 0, 4, 1, 7) -#endif -#ifdef HAS_ABGRTOYROW_DSPR2 -ANY11(ABGRToYRow_Any_DSPR2, ABGRToYRow_DSPR2, 0, 4, 1, 7) -#endif -#ifdef HAS_RGBATOYROW_DSPR2 -ANY11(RGBAToYRow_Any_DSPR2, RGBAToYRow_DSPR2, 0, 4, 1, 7) -#endif #ifdef HAS_ARGB4444TOARGBROW_MSA ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) #endif @@ -851,9 +815,6 @@ ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15) #ifdef HAS_INTERPOLATEROW_NEON ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) #endif -#ifdef HAS_INTERPOLATEROW_DSPR2 -ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3) -#endif #ifdef HAS_INTERPOLATEROW_MSA ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31) #endif @@ -952,9 +913,6 @@ ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) #ifdef HAS_SPLITUVROW_NEON ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) #endif -#ifdef HAS_SPLITUVROW_DSPR2 -ANY12(SplitUVRow_Any_DSPR2, SplitUVRow_DSPR2, 0, 2, 0, 15) -#endif #ifdef HAS_SPLITUVROW_MSA ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31) #endif @@ -1116,18 +1074,6 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) #ifdef HAS_UYVYTOUVROW_NEON ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) #endif -#ifdef HAS_BGRATOUVROW_DSPR2 -ANY12S(BGRAToUVRow_Any_DSPR2, BGRAToUVRow_DSPR2, 0, 4, 15) -#endif -#ifdef HAS_ABGRTOUVROW_DSPR2 -ANY12S(ABGRToUVRow_Any_DSPR2, ABGRToUVRow_DSPR2, 0, 4, 15) -#endif -#ifdef HAS_RGBATOUVROW_DSPR2 -ANY12S(RGBAToUVRow_Any_DSPR2, RGBAToUVRow_DSPR2, 0, 4, 15) -#endif -#ifdef HAS_ARGBTOUVROW_DSPR2 -ANY12S(ARGBToUVRow_Any_DSPR2, ARGBToUVRow_DSPR2, 0, 4, 15) -#endif #ifdef HAS_YUY2TOUVROW_MSA ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) #endif diff --git a/source/row_dspr2.cc b/source/row_dspr2.cc deleted file mode 100644 index 11f78e0d..00000000 --- a/source/row_dspr2.cc +++ /dev/null @@ -1,1721 +0,0 @@ -/* - * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips__) && \ - (_MIPS_SIM == _MIPS_SIM_ABI32) - -#ifdef HAS_COPYROW_MIPS -void CopyRow_MIPS(const uint8* src, uint8* dst, int count) { - __asm__ __volatile__( - ".set noreorder \n" - ".set noat \n" - "slti $at, %[count], 8 \n" - "bne $at ,$zero, $last8 \n" - "xor $t8, %[src], %[dst] \n" - "andi $t8, $t8, 0x3 \n" - - "bne $t8, $zero, unaligned \n" - "negu $a3, %[dst] \n" - // make dst/src aligned - "andi $a3, $a3, 0x3 \n" - "beq $a3, $zero, $chk16w \n" - // word-aligned now count is the remining bytes count - "subu %[count], %[count], $a3 \n" - - "lwr $t8, 0(%[src]) \n" - "addu %[src], %[src], $a3 \n" - "swr $t8, 0(%[dst]) \n" - "addu %[dst], %[dst], $a3 \n" - - // Now the dst/src are mutually word-aligned with word-aligned addresses - "$chk16w: \n" - "andi $t8, %[count], 0x3f \n" // whole 64-B chunks? - // t8 is the byte count after 64-byte chunks - "beq %[count], $t8, chk8w \n" - // There will be at most 1 32-byte chunk after it - "subu $a3, %[count], $t8 \n" // the reminder - // Here a3 counts bytes in 16w chunks - "addu $a3, %[dst], $a3 \n" - // Now a3 is the final dst after 64-byte chunks - "addu $t0, %[dst], %[count] \n" - // t0 is the "past the end" address - - // When in the loop we exercise "pref 30,x(a1)", the a1+x should not be - // past - // the "t0-32" address - // This means: for x=128 the last "safe" a1 address is "t0-160" - // Alternatively, for x=64 the last "safe" a1 address is "t0-96" - // we will use "pref 30,128(a1)", so "t0-160" is the limit - "subu $t9, $t0, 160 \n" - // t9 is the "last safe pref 30,128(a1)" address - "pref 0, 0(%[src]) \n" // first line of src - "pref 0, 32(%[src]) \n" // second line of src - "pref 0, 64(%[src]) \n" - "pref 30, 32(%[dst]) \n" - // In case the a1 > t9 don't use "pref 30" at all - "sltu $v1, $t9, %[dst] \n" - "bgtz $v1, $loop16w \n" - "nop \n" - // otherwise, start with using pref30 - "pref 30, 64(%[dst]) \n" - "$loop16w: \n" - "pref 0, 96(%[src]) \n" - "lw $t0, 0(%[src]) \n" - "bgtz $v1, $skip_pref30_96 \n" // skip - "lw $t1, 4(%[src]) \n" - "pref 30, 96(%[dst]) \n" // continue - "$skip_pref30_96: \n" - "lw $t2, 8(%[src]) \n" - "lw $t3, 12(%[src]) \n" - "lw $t4, 16(%[src]) \n" - "lw $t5, 20(%[src]) \n" - "lw $t6, 24(%[src]) \n" - "lw $t7, 28(%[src]) \n" - "pref 0, 128(%[src]) \n" - // bring the next lines of src, addr 128 - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "lw $t0, 32(%[src]) \n" - "bgtz $v1, $skip_pref30_128 \n" // skip pref 30,128(a1) - "lw $t1, 36(%[src]) \n" - "pref 30, 128(%[dst]) \n" // set dest, addr 128 - "$skip_pref30_128: \n" - "lw $t2, 40(%[src]) \n" - "lw $t3, 44(%[src]) \n" - "lw $t4, 48(%[src]) \n" - "lw $t5, 52(%[src]) \n" - "lw $t6, 56(%[src]) \n" - "lw $t7, 60(%[src]) \n" - "pref 0, 160(%[src]) \n" - // bring the next lines of src, addr 160 - "sw $t0, 32(%[dst]) \n" - "sw $t1, 36(%[dst]) \n" - "sw $t2, 40(%[dst]) \n" - "sw $t3, 44(%[dst]) \n" - "sw $t4, 48(%[dst]) \n" - "sw $t5, 52(%[dst]) \n" - "sw $t6, 56(%[dst]) \n" - "sw $t7, 60(%[dst]) \n" - - "addiu %[dst], %[dst], 64 \n" // adding 64 to dest - "sltu $v1, $t9, %[dst] \n" - "bne %[dst], $a3, $loop16w \n" - " addiu %[src], %[src], 64 \n" // adding 64 to src - "move %[count], $t8 \n" - - // Here we have src and dest word-aligned but less than 64-bytes to go - - "chk8w: \n" - "pref 0, 0x0(%[src]) \n" - "andi $t8, %[count], 0x1f \n" // 32-byte chunk? - // the t8 is the reminder count past 32-bytes - "beq %[count], $t8, chk1w \n" - // count=t8,no 32-byte chunk - " nop \n" - - "lw $t0, 0(%[src]) \n" - "lw $t1, 4(%[src]) \n" - "lw $t2, 8(%[src]) \n" - "lw $t3, 12(%[src]) \n" - "lw $t4, 16(%[src]) \n" - "lw $t5, 20(%[src]) \n" - "lw $t6, 24(%[src]) \n" - "lw $t7, 28(%[src]) \n" - "addiu %[src], %[src], 32 \n" - - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "addiu %[dst], %[dst], 32 \n" - - "chk1w: \n" - "andi %[count], $t8, 0x3 \n" - // now count is the reminder past 1w chunks - "beq %[count], $t8, $last8 \n" - " subu $a3, $t8, %[count] \n" - // a3 is count of bytes in 1w chunks - "addu $a3, %[dst], $a3 \n" - // now a3 is the dst address past the 1w chunks - // copying in words (4-byte chunks) - "$wordCopy_loop: \n" - "lw $t3, 0(%[src]) \n" - // the first t3 may be equal t0 ... optimize? - "addiu %[src], %[src],4 \n" - "addiu %[dst], %[dst],4 \n" - "bne %[dst], $a3,$wordCopy_loop \n" - " sw $t3, -4(%[dst]) \n" - - // For the last (<8) bytes - "$last8: \n" - "blez %[count], leave \n" - " addu $a3, %[dst], %[count] \n" // a3 -last dst address - "$last8loop: \n" - "lb $v1, 0(%[src]) \n" - "addiu %[src], %[src], 1 \n" - "addiu %[dst], %[dst], 1 \n" - "bne %[dst], $a3, $last8loop \n" - " sb $v1, -1(%[dst]) \n" - - "leave: \n" - " j $ra \n" - " nop \n" - - // - // UNALIGNED case - // - - "unaligned: \n" - // got here with a3="negu a1" - "andi $a3, $a3, 0x3 \n" // a1 is word aligned? - "beqz $a3, $ua_chk16w \n" - " subu %[count], %[count], $a3 \n" - // bytes left after initial a3 bytes - "lwr $v1, 0(%[src]) \n" - "lwl $v1, 3(%[src]) \n" - "addu %[src], %[src], $a3 \n" // a3 may be 1, 2 or 3 - "swr $v1, 0(%[dst]) \n" - "addu %[dst], %[dst], $a3 \n" - // below the dst will be word aligned (NOTE1) - "$ua_chk16w: \n" - "andi $t8, %[count], 0x3f \n" // whole 64-B chunks? - // t8 is the byte count after 64-byte chunks - "beq %[count], $t8, ua_chk8w \n" - // if a2==t8, no 64-byte chunks - // There will be at most 1 32-byte chunk after it - "subu $a3, %[count], $t8 \n" // the reminder - // Here a3 counts bytes in 16w chunks - "addu $a3, %[dst], $a3 \n" - // Now a3 is the final dst after 64-byte chunks - "addu $t0, %[dst], %[count] \n" // t0 "past the end" - "subu $t9, $t0, 160 \n" - // t9 is the "last safe pref 30,128(a1)" address - "pref 0, 0(%[src]) \n" // first line of src - "pref 0, 32(%[src]) \n" // second line addr 32 - "pref 0, 64(%[src]) \n" - "pref 30, 32(%[dst]) \n" - // safe, as we have at least 64 bytes ahead - // In case the a1 > t9 don't use "pref 30" at all - "sltu $v1, $t9, %[dst] \n" - "bgtz $v1, $ua_loop16w \n" - // skip "pref 30,64(a1)" for too short arrays - " nop \n" - // otherwise, start with using pref30 - "pref 30, 64(%[dst]) \n" - "$ua_loop16w: \n" - "pref 0, 96(%[src]) \n" - "lwr $t0, 0(%[src]) \n" - "lwl $t0, 3(%[src]) \n" - "lwr $t1, 4(%[src]) \n" - "bgtz $v1, $ua_skip_pref30_96 \n" - " lwl $t1, 7(%[src]) \n" - "pref 30, 96(%[dst]) \n" - // continue setting up the dest, addr 96 - "$ua_skip_pref30_96: \n" - "lwr $t2, 8(%[src]) \n" - "lwl $t2, 11(%[src]) \n" - "lwr $t3, 12(%[src]) \n" - "lwl $t3, 15(%[src]) \n" - "lwr $t4, 16(%[src]) \n" - "lwl $t4, 19(%[src]) \n" - "lwr $t5, 20(%[src]) \n" - "lwl $t5, 23(%[src]) \n" - "lwr $t6, 24(%[src]) \n" - "lwl $t6, 27(%[src]) \n" - "lwr $t7, 28(%[src]) \n" - "lwl $t7, 31(%[src]) \n" - "pref 0, 128(%[src]) \n" - // bring the next lines of src, addr 128 - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "lwr $t0, 32(%[src]) \n" - "lwl $t0, 35(%[src]) \n" - "lwr $t1, 36(%[src]) \n" - "bgtz $v1, ua_skip_pref30_128 \n" - " lwl $t1, 39(%[src]) \n" - "pref 30, 128(%[dst]) \n" - // continue setting up the dest, addr 128 - "ua_skip_pref30_128: \n" - - "lwr $t2, 40(%[src]) \n" - "lwl $t2, 43(%[src]) \n" - "lwr $t3, 44(%[src]) \n" - "lwl $t3, 47(%[src]) \n" - "lwr $t4, 48(%[src]) \n" - "lwl $t4, 51(%[src]) \n" - "lwr $t5, 52(%[src]) \n" - "lwl $t5, 55(%[src]) \n" - "lwr $t6, 56(%[src]) \n" - "lwl $t6, 59(%[src]) \n" - "lwr $t7, 60(%[src]) \n" - "lwl $t7, 63(%[src]) \n" - "pref 0, 160(%[src]) \n" - // bring the next lines of src, addr 160 - "sw $t0, 32(%[dst]) \n" - "sw $t1, 36(%[dst]) \n" - "sw $t2, 40(%[dst]) \n" - "sw $t3, 44(%[dst]) \n" - "sw $t4, 48(%[dst]) \n" - "sw $t5, 52(%[dst]) \n" - "sw $t6, 56(%[dst]) \n" - "sw $t7, 60(%[dst]) \n" - - "addiu %[dst],%[dst],64 \n" // adding 64 to dest - "sltu $v1,$t9,%[dst] \n" - "bne %[dst],$a3,$ua_loop16w \n" - " addiu %[src],%[src],64 \n" // adding 64 to src - "move %[count],$t8 \n" - - // Here we have src and dest word-aligned but less than 64-bytes to go - - "ua_chk8w: \n" - "pref 0, 0x0(%[src]) \n" - "andi $t8, %[count], 0x1f \n" // 32-byte chunk? - // the t8 is the reminder count - "beq %[count], $t8, $ua_chk1w \n" - // when count==t8, no 32-byte chunk - - "lwr $t0, 0(%[src]) \n" - "lwl $t0, 3(%[src]) \n" - "lwr $t1, 4(%[src]) \n" - "lwl $t1, 7(%[src]) \n" - "lwr $t2, 8(%[src]) \n" - "lwl $t2, 11(%[src]) \n" - "lwr $t3, 12(%[src]) \n" - "lwl $t3, 15(%[src]) \n" - "lwr $t4, 16(%[src]) \n" - "lwl $t4, 19(%[src]) \n" - "lwr $t5, 20(%[src]) \n" - "lwl $t5, 23(%[src]) \n" - "lwr $t6, 24(%[src]) \n" - "lwl $t6, 27(%[src]) \n" - "lwr $t7, 28(%[src]) \n" - "lwl $t7, 31(%[src]) \n" - "addiu %[src], %[src], 32 \n" - - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "addiu %[dst], %[dst], 32 \n" - - "$ua_chk1w: \n" - "andi %[count], $t8, 0x3 \n" - // now count is the reminder past 1w chunks - "beq %[count], $t8, ua_smallCopy \n" - "subu $a3, $t8, %[count] \n" - // a3 is count of bytes in 1w chunks - "addu $a3, %[dst], $a3 \n" - // now a3 is the dst address past the 1w chunks - - // copying in words (4-byte chunks) - "$ua_wordCopy_loop: \n" - "lwr $v1, 0(%[src]) \n" - "lwl $v1, 3(%[src]) \n" - "addiu %[src], %[src], 4 \n" - "addiu %[dst], %[dst], 4 \n" - // note: dst=a1 is word aligned here, see NOTE1 - "bne %[dst], $a3, $ua_wordCopy_loop \n" - " sw $v1,-4(%[dst]) \n" - - // Now less than 4 bytes (value in count) left to copy - "ua_smallCopy: \n" - "beqz %[count], leave \n" - " addu $a3, %[dst], %[count] \n" // a3 = last dst address - "$ua_smallCopy_loop: \n" - "lb $v1, 0(%[src]) \n" - "addiu %[src], %[src], 1 \n" - "addiu %[dst], %[dst], 1 \n" - "bne %[dst],$a3,$ua_smallCopy_loop \n" - " sb $v1, -1(%[dst]) \n" - - "j $ra \n" - " nop \n" - ".set at \n" - ".set reorder \n" - : [dst] "+r"(dst), [src] "+r"(src) - : [count] "r"(count) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "a3", "v1", - "at"); -} -#endif // HAS_COPYROW_MIPS - -// DSPR2 functions -#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \ - (__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32) && \ - (__mips_isa_rev < 6) - -void SplitUVRow_DSPR2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, - int width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "srl $t4, %[width], 4 \n" // multiplies of 16 - "blez $t4, 2f \n" - " andi %[width], %[width], 0xf \n" // residual - - "1: \n" - "addiu $t4, $t4, -1 \n" - "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0 - "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2 - "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4 - "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6 - "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8 - "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | - // U10 - "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | - // U12 - "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | - // U14 - "addiu %[src_uv], %[src_uv], 32 \n" - "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0 - "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0 - "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4 - "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4 - "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8 - "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8 - "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | - // V12 - "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | - // U12 - "sw $t9, 0(%[dst_v]) \n" - "sw $t0, 0(%[dst_u]) \n" - "sw $t1, 4(%[dst_v]) \n" - "sw $t2, 4(%[dst_u]) \n" - "sw $t3, 8(%[dst_v]) \n" - "sw $t5, 8(%[dst_u]) \n" - "sw $t6, 12(%[dst_v]) \n" - "sw $t7, 12(%[dst_u]) \n" - "addiu %[dst_v], %[dst_v], 16 \n" - "bgtz $t4, 1b \n" - " addiu %[dst_u], %[dst_u], 16 \n" - - "beqz %[width], 3f \n" - " nop \n" - - "2: \n" - "lbu $t0, 0(%[src_uv]) \n" - "lbu $t1, 1(%[src_uv]) \n" - "addiu %[src_uv], %[src_uv], 2 \n" - "addiu %[width], %[width], -1 \n" - "sb $t0, 0(%[dst_u]) \n" - "sb $t1, 0(%[dst_v]) \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "bgtz %[width], 2b \n" - " addiu %[dst_v], %[dst_v], 1 \n" - - "3: \n" - ".set pop \n" - : [src_uv] "+r"(src_uv), [width] "+r"(width), [dst_u] "+r"(dst_u), - [dst_v] "+r"(dst_v) - : - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t4, %[width], 4 \n" // multiplies of 16 - "andi $t5, %[width], 0xf \n" - "blez $t4, 2f \n" - " addu %[src], %[src], %[width] \n" // src += width - - "1: \n" - "lw $t0, -16(%[src]) \n" // |3|2|1|0| - "lw $t1, -12(%[src]) \n" // |7|6|5|4| - "lw $t2, -8(%[src]) \n" // |11|10|9|8| - "lw $t3, -4(%[src]) \n" // |15|14|13|12| - "wsbh $t0, $t0 \n" // |2|3|0|1| - "wsbh $t1, $t1 \n" // |6|7|4|5| - "wsbh $t2, $t2 \n" // |10|11|8|9| - "wsbh $t3, $t3 \n" // |14|15|12|13| - "rotr $t0, $t0, 16 \n" // |0|1|2|3| - "rotr $t1, $t1, 16 \n" // |4|5|6|7| - "rotr $t2, $t2, 16 \n" // |8|9|10|11| - "rotr $t3, $t3, 16 \n" // |12|13|14|15| - "addiu %[src], %[src], -16 \n" - "addiu $t4, $t4, -1 \n" - "sw $t3, 0(%[dst]) \n" // |15|14|13|12| - "sw $t2, 4(%[dst]) \n" // |11|10|9|8| - "sw $t1, 8(%[dst]) \n" // |7|6|5|4| - "sw $t0, 12(%[dst]) \n" // |3|2|1|0| - "bgtz $t4, 1b \n" - " addiu %[dst], %[dst], 16 \n" - "beqz $t5, 3f \n" - " nop \n" - - "2: \n" - "lbu $t0, -1(%[src]) \n" - "addiu $t5, $t5, -1 \n" - "addiu %[src], %[src], -1 \n" - "sb $t0, 0(%[dst]) \n" - "bgez $t5, 2b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src] "+r"(src), [dst] "+r"(dst) - : [width] "r"(width) - : "t0", "t1", "t2", "t3", "t4", "t5"); -} - -void MirrorUVRow_DSPR2(const uint8* src_uv, - uint8* dst_u, - uint8* dst_v, - int width) { - int x; - int y; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "addu $t4, %[width], %[width] \n" - "srl %[x], %[width], 4 \n" - "andi %[y], %[width], 0xf \n" - "blez %[x], 2f \n" - " addu %[src_uv], %[src_uv], $t4 \n" - - "1: \n" - "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0| - "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4| - "lw $t2, -24(%[src_uv]) \n" // |11|10|9|8| - "lw $t3, -20(%[src_uv]) \n" // |15|14|13|12| - "lw $t4, -16(%[src_uv]) \n" // |19|18|17|16| - "lw $t6, -12(%[src_uv]) \n" // |23|22|21|20| - "lw $t7, -8(%[src_uv]) \n" // |27|26|25|24| - "lw $t8, -4(%[src_uv]) \n" // |31|30|29|28| - - "rotr $t0, $t0, 16 \n" // |1|0|3|2| - "rotr $t1, $t1, 16 \n" // |5|4|7|6| - "rotr $t2, $t2, 16 \n" // |9|8|11|10| - "rotr $t3, $t3, 16 \n" // |13|12|15|14| - "rotr $t4, $t4, 16 \n" // |17|16|19|18| - "rotr $t6, $t6, 16 \n" // |21|20|23|22| - "rotr $t7, $t7, 16 \n" // |25|24|27|26| - "rotr $t8, $t8, 16 \n" // |29|28|31|30| - "precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6| - "precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7| - "precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14| - "precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15| - "precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22| - "precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23| - "precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30| - "precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31| - "addiu %[src_uv], %[src_uv], -32 \n" - "addiu %[x], %[x], -1 \n" - "swr $t4, 0(%[dst_u]) \n" - "swl $t4, 3(%[dst_u]) \n" // |30|28|26|24| - "swr $t6, 0(%[dst_v]) \n" - "swl $t6, 3(%[dst_v]) \n" // |31|29|27|25| - "swr $t2, 4(%[dst_u]) \n" - "swl $t2, 7(%[dst_u]) \n" // |22|20|18|16| - "swr $t3, 4(%[dst_v]) \n" - "swl $t3, 7(%[dst_v]) \n" // |23|21|19|17| - "swr $t0, 8(%[dst_u]) \n" - "swl $t0, 11(%[dst_u]) \n" // |14|12|10|8| - "swr $t1, 8(%[dst_v]) \n" - "swl $t1, 11(%[dst_v]) \n" // |15|13|11|9| - "swr $t9, 12(%[dst_u]) \n" - "swl $t9, 15(%[dst_u]) \n" // |6|4|2|0| - "swr $t5, 12(%[dst_v]) \n" - "swl $t5, 15(%[dst_v]) \n" // |7|5|3|1| - "addiu %[dst_v], %[dst_v], 16 \n" - "bgtz %[x], 1b \n" - " addiu %[dst_u], %[dst_u], 16 \n" - "beqz %[y], 3f \n" - " nop \n" - "b 2f \n" - " nop \n" - - "2: \n" - "lbu $t0, -2(%[src_uv]) \n" - "lbu $t1, -1(%[src_uv]) \n" - "addiu %[src_uv], %[src_uv], -2 \n" - "addiu %[y], %[y], -1 \n" - "sb $t0, 0(%[dst_u]) \n" - "sb $t1, 0(%[dst_v]) \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "bgtz %[y], 2b \n" - " addiu %[dst_v], %[dst_v], 1 \n" - - "3: \n" - ".set pop \n" - : [src_uv] "+r"(src_uv), [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v), - [x] "=&r"(x), [y] "=&r"(y) - : [width] "r"(width) - : "t0", "t1", "t2", "t3", "t4", "t5", "t7", "t8", "t9"); -} - -void I422ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint32 tmp_ub = yuvconstants->kUVToB[0]; - uint32 tmp_ug = yuvconstants->kUVToG[0]; - uint32 tmp_vg = yuvconstants->kUVToG[1]; - uint32 tmp_vr = yuvconstants->kUVToR[1]; - uint32 tmp_bb = yuvconstants->kUVBiasB[0]; - uint32 tmp_bg = yuvconstants->kUVBiasG[0]; - uint32 tmp_br = yuvconstants->kUVBiasR[0]; - uint32 yg = yuvconstants->kYToRgb[0]; - uint32 tmp_yg; - uint32 tmp_mask = 0x7fff7fff; - tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); - tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); - tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); - tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); - tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; - tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); - tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); - tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; - yg = yg * 0x0101; - - for (x = 0; x < width - 1; x += 2) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lbu %[tmp_t7], 0(%[src_y]) \n" - "lbu %[tmp_t1], 1(%[src_y]) \n" - "mul %[tmp_t7], %[tmp_t7], %[yg] \n" - "mul %[tmp_t1], %[tmp_t1], %[yg] \n" - "lbu %[tmp_t2], 0(%[src_u]) \n" - "lbu %[tmp_t3], 0(%[src_v]) \n" - "replv.ph %[tmp_t2], %[tmp_t2] \n" - "replv.ph %[tmp_t3], %[tmp_t3] \n" - "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n" - "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n" - "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n" - "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n" - "srl %[tmp_t7], %[tmp_t7], 16 \n" - "ins %[tmp_t1], %[tmp_t7], 0, 16 \n" - "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n" - "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n" - "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n" - "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n" - "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n" - "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n" - "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n" - "shra.ph %[tmp_t7], %[tmp_t7], 6 \n" - "shra.ph %[tmp_t8], %[tmp_t8], 6 \n" - "shra.ph %[tmp_t9], %[tmp_t9], 6 \n" - "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n" - "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n" - "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n" - "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n" - "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "precrq.ph.w %[tmp_t9], %[tmp_t8], %[tmp_t7] \n" - "ins %[tmp_t7], %[tmp_t8], 16, 16 \n" - "precr.qb.ph %[tmp_t8], %[tmp_t9], %[tmp_t7] \n" - "precrq.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "sw %[tmp_t8], 0(%[rgb_buf]) \n" - "sw %[tmp_t7], 4(%[rgb_buf]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) - : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v), - [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [yg] "r"(yg), - [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), - [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), - [rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask)); - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 4 pixels. - } -} - -// Bilinear filter 8x2 -> 8x1 -void InterpolateRow_DSPR2(uint8* dst_ptr, - const uint8* src_ptr, - ptrdiff_t src_stride, - int dst_width, - int source_y_fraction) { - int y0_fraction = 256 - source_y_fraction; - const uint8* src_ptr1 = src_ptr + src_stride; - - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "replv.ph $t0, %[y0_fraction] \n" - "replv.ph $t1, %[source_y_fraction] \n" - - "1: \n" - "lw $t2, 0(%[src_ptr]) \n" - "lw $t3, 0(%[src_ptr1]) \n" - "lw $t4, 4(%[src_ptr]) \n" - "lw $t5, 4(%[src_ptr1]) \n" - "muleu_s.ph.qbl $t6, $t2, $t0 \n" - "muleu_s.ph.qbr $t7, $t2, $t0 \n" - "muleu_s.ph.qbl $t8, $t3, $t1 \n" - "muleu_s.ph.qbr $t9, $t3, $t1 \n" - "muleu_s.ph.qbl $t2, $t4, $t0 \n" - "muleu_s.ph.qbr $t3, $t4, $t0 \n" - "muleu_s.ph.qbl $t4, $t5, $t1 \n" - "muleu_s.ph.qbr $t5, $t5, $t1 \n" - "addq.ph $t6, $t6, $t8 \n" - "addq.ph $t7, $t7, $t9 \n" - "addq.ph $t2, $t2, $t4 \n" - "addq.ph $t3, $t3, $t5 \n" - "shra_r.ph $t6, $t6, 8 \n" - "shra_r.ph $t7, $t7, 8 \n" - "shra_r.ph $t2, $t2, 8 \n" - "shra_r.ph $t3, $t3, 8 \n" - "precr.qb.ph $t6, $t6, $t7 \n" - "precr.qb.ph $t2, $t2, $t3 \n" - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[src_ptr1], %[src_ptr1], 8 \n" - "addiu %[dst_width], %[dst_width], -8 \n" - "sw $t6, 0(%[dst_ptr]) \n" - "sw $t2, 4(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[dst_ptr], %[dst_ptr], 8 \n" - - ".set pop \n" - : [dst_ptr] "+r"(dst_ptr), [src_ptr1] "+r"(src_ptr1), - [src_ptr] "+r"(src_ptr), [dst_width] "+r"(dst_width) - : [source_y_fraction] "r"(source_y_fraction), - [y0_fraction] "r"(y0_fraction), [src_stride] "r"(src_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} -#include <stdio.h> -void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width) { - int x; - uint32 tmp_mask = 0xff; - uint32 tmp_t1; - for (x = 0; x < (width - 1); ++x) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "ulw %[tmp_t1], 0(%[src_rgb24]) \n" - "addiu %[dst_argb], %[dst_argb], 4 \n" - "addiu %[src_rgb24], %[src_rgb24], 3 \n" - "ins %[tmp_t1], %[tmp_mask], 24, 8 \n" - "sw %[tmp_t1], -4(%[dst_argb]) \n" - ".set pop \n" - : [src_rgb24] "+r"(src_rgb24), [dst_argb] "+r"(dst_argb), - [tmp_t1] "=&r"(tmp_t1) - : [tmp_mask] "r"(tmp_mask) - : "memory"); - } - uint8 b = src_rgb24[0]; - uint8 g = src_rgb24[1]; - uint8 r = src_rgb24[2]; - dst_argb[0] = b; - dst_argb[1] = g; - dst_argb[2] = r; - dst_argb[3] = 255u; -} - -void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width) { - int x; - uint32 tmp_mask = 0xff; - uint32 tmp_t1, tmp_t2; - for (x = 0; x < (width - 1); ++x) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "ulw %[tmp_t1], 0(%[src_raw]) \n" - "addiu %[dst_argb], %[dst_argb], 4 \n" - "addiu %[src_raw], %[src_raw], 3 \n" - "srl %[tmp_t2], %[tmp_t1], 16 \n" - "ins %[tmp_t1], %[tmp_mask], 24, 8 \n" - "ins %[tmp_t1], %[tmp_t1], 16, 8 \n" - "ins %[tmp_t1], %[tmp_t2], 0, 8 \n" - "sw %[tmp_t1], -4(%[dst_argb]) \n" - ".set pop \n" - : [src_raw] "+r"(src_raw), [dst_argb] "+r"(dst_argb), - [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2) - : [tmp_mask] "r"(tmp_mask) - : "memory"); - } - uint8 r = src_raw[0]; - uint8 g = src_raw[1]; - uint8 b = src_raw[2]; - dst_argb[0] = b; - dst_argb[1] = g; - dst_argb[2] = r; - dst_argb[3] = 255u; -} - -void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565, - uint8* dst_argb, - int width) { - int x; - uint32 tmp_mask = 0xff; - uint32 tmp_t1, tmp_t2, tmp_t3; - for (x = 0; x < width; ++x) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lhu %[tmp_t1], 0(%[src_rgb565]) \n" - "addiu %[dst_argb], %[dst_argb], 4 \n" - "addiu %[src_rgb565], %[src_rgb565], 2 \n" - "sll %[tmp_t2], %[tmp_t1], 8 \n" - "ins %[tmp_t2], %[tmp_mask], 24,8 \n" - "ins %[tmp_t2], %[tmp_t1], 3, 16 \n" - "ins %[tmp_t2], %[tmp_t1], 5, 11 \n" - "srl %[tmp_t3], %[tmp_t1], 9 \n" - "ins %[tmp_t2], %[tmp_t3], 8, 2 \n" - "ins %[tmp_t2], %[tmp_t1], 3, 5 \n" - "srl %[tmp_t3], %[tmp_t1], 2 \n" - "ins %[tmp_t2], %[tmp_t3], 0, 3 \n" - "sw %[tmp_t2], -4(%[dst_argb]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [src_rgb565] "+r"(src_rgb565), - [dst_argb] "+r"(dst_argb) - : [tmp_mask] "r"(tmp_mask)); - } -} - -void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555, - uint8* dst_argb, - int width) { - int x; - uint32 tmp_t1, tmp_t2, tmp_t3; - for (x = 0; x < width; ++x) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lh %[tmp_t1], 0(%[src_argb1555]) \n" - "addiu %[dst_argb], %[dst_argb], 4 \n" - "addiu %[src_argb1555], %[src_argb1555], 2 \n" - "sll %[tmp_t2], %[tmp_t1], 9 \n" - "ins %[tmp_t2], %[tmp_t1], 4, 15 \n" - "ins %[tmp_t2], %[tmp_t1], 6, 10 \n" - "srl %[tmp_t3], %[tmp_t1], 7 \n" - "ins %[tmp_t2], %[tmp_t3], 8, 3 \n" - "ins %[tmp_t2], %[tmp_t1], 3, 5 \n" - "srl %[tmp_t3], %[tmp_t1], 2 \n" - "ins %[tmp_t2], %[tmp_t3], 0, 3 \n" - "sw %[tmp_t2], -4(%[dst_argb]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [src_argb1555] "+r"(src_argb1555), - [dst_argb] "+r"(dst_argb) - :); - } -} - -void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444, - uint8* dst_argb, - int width) { - int x; - uint32 tmp_t1; - for (x = 0; x < width; ++x) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lh %[tmp_t1], 0(%[src_argb4444]) \n" - "addiu %[dst_argb], %[dst_argb], 4 \n" - "addiu %[src_argb4444], %[src_argb4444], 2 \n" - "ins %[tmp_t1], %[tmp_t1], 16, 16 \n" - "ins %[tmp_t1], %[tmp_t1], 12, 16 \n" - "ins %[tmp_t1], %[tmp_t1], 8, 12 \n" - "ins %[tmp_t1], %[tmp_t1], 4, 8 \n" - "sw %[tmp_t1], -4(%[dst_argb]) \n" - ".set pop \n" - : [src_argb4444] "+r"(src_argb4444), [dst_argb] "+r"(dst_argb), - [tmp_t1] "=&r"(tmp_t1)); - } -} - -void I444ToARGBRow_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint32 tmp_ub = yuvconstants->kUVToB[0]; - uint32 tmp_ug = yuvconstants->kUVToG[0]; - uint32 tmp_vg = yuvconstants->kUVToG[1]; - uint32 tmp_vr = yuvconstants->kUVToR[1]; - uint32 tmp_bb = yuvconstants->kUVBiasB[0]; - uint32 tmp_bg = yuvconstants->kUVBiasG[0]; - uint32 tmp_br = yuvconstants->kUVBiasR[0]; - uint32 yg = yuvconstants->kYToRgb[0]; - uint32 tmp_mask = 0x7fff7fff; - uint32 tmp_yg; - - tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); - tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); - tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); - tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); - tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; - tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); - tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); - tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; - yg = yg * 0x0101; - - for (x = 0; x < width - 1; x += 2) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lbu %[tmp_t7], 0(%[y_buf]) \n" - "lbu %[tmp_t1], 1(%[y_buf]) \n" - "mul %[tmp_t7], %[tmp_t7], %[yg] \n" - "mul %[tmp_t1], %[tmp_t1], %[yg] \n" - "lh %[tmp_t2], 0(%[u_buf]) \n" - "lh %[tmp_t3], 0(%[v_buf]) \n" - "preceu.ph.qbr %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t3], %[tmp_t3] \n" - "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n" - "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n" - "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n" - "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n" - "srl %[tmp_t7], %[tmp_t7], 16 \n" - "ins %[tmp_t1], %[tmp_t7], 0, 16 \n" - "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n" - "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n" - "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n" - "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n" - "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n" - "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n" - "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n" - "shra.ph %[tmp_t7], %[tmp_t7], 6 \n" - "shra.ph %[tmp_t8], %[tmp_t8], 6 \n" - "shra.ph %[tmp_t9], %[tmp_t9], 6 \n" - "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n" - "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n" - "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n" - "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n" - "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n" - "ins %[tmp_t7], %[tmp_t8], 16, 16 \n" - "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n" - "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n" - "sw %[tmp_t8], 0(%[rgb_buf]) \n" - "sw %[tmp_t7], 4(%[rgb_buf]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) - : [y_buf] "r"(y_buf), [yg] "r"(yg), [u_buf] "r"(u_buf), - [v_buf] "r"(v_buf), [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), - [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), - [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), - [rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask)); - y_buf += 2; - u_buf += 2; - v_buf += 2; - rgb_buf += 8; // Advance 1 pixel. - } -} - -void I422ToARGB4444Row_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint32 tmp_ub = yuvconstants->kUVToB[0]; - uint32 tmp_ug = yuvconstants->kUVToG[0]; - uint32 tmp_vg = yuvconstants->kUVToG[1]; - uint32 tmp_vr = yuvconstants->kUVToR[1]; - uint32 tmp_bb = yuvconstants->kUVBiasB[0]; - uint32 tmp_bg = yuvconstants->kUVBiasG[0]; - uint32 tmp_br = yuvconstants->kUVBiasR[0]; - uint32 yg = yuvconstants->kYToRgb[0]; - uint32 tmp_yg; - uint32 tmp_mask = 0x7fff7fff; - tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); - tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); - tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); - tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); - tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; - tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); - tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); - tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; - yg = yg * 0x0101; - - for (x = 0; x < width - 1; x += 2) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lbu %[tmp_t7], 0(%[src_y]) \n" - "lbu %[tmp_t1], 1(%[src_y]) \n" - "mul %[tmp_t7], %[tmp_t7], %[yg] \n" - "mul %[tmp_t1], %[tmp_t1], %[yg] \n" - "lbu %[tmp_t2], 0(%[src_u]) \n" - "lbu %[tmp_t3], 0(%[src_v]) \n" - "replv.ph %[tmp_t2], %[tmp_t2] \n" - "replv.ph %[tmp_t3], %[tmp_t3] \n" - "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n" - "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n" - "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n" - "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n" - "srl %[tmp_t7], %[tmp_t7], 16 \n" - "ins %[tmp_t1], %[tmp_t7], 0, 16 \n" - "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n" - "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n" - "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n" - "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n" - "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n" - "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n" - "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n" - "shra.ph %[tmp_t7], %[tmp_t7], 6 \n" - "shra.ph %[tmp_t8], %[tmp_t8], 6 \n" - "shra.ph %[tmp_t9], %[tmp_t9], 6 \n" - "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n" - "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n" - "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n" - "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n" - "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n" - "ins %[tmp_t7], %[tmp_t8], 16, 16 \n" - "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n" - "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n" - "shrl.qb %[tmp_t1], %[tmp_t8], 4 \n" - "shrl.qb %[tmp_t2], %[tmp_t7], 4 \n" - "shrl.ph %[tmp_t8], %[tmp_t1], 4 \n" - "shrl.ph %[tmp_t7], %[tmp_t2], 4 \n" - "or %[tmp_t8], %[tmp_t8], %[tmp_t1] \n" - "or %[tmp_t7], %[tmp_t7], %[tmp_t2] \n" - "precr.qb.ph %[tmp_t8], %[tmp_t7], %[tmp_t8] \n" - "sw %[tmp_t8], 0(%[dst_argb4444]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) - : [dst_argb4444] "r"(dst_argb4444), [yg] "r"(yg), [src_u] "r"(src_u), - [src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub), - [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), - [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), - [tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask)); - src_y += 2; - src_u += 1; - src_v += 1; - dst_argb4444 += 4; // Advance 2 pixels. - } -} - -void I422ToARGB1555Row_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint32 tmp_ub = yuvconstants->kUVToB[0]; - uint32 tmp_ug = yuvconstants->kUVToG[0]; - uint32 tmp_vg = yuvconstants->kUVToG[1]; - uint32 tmp_vr = yuvconstants->kUVToR[1]; - uint32 tmp_bb = yuvconstants->kUVBiasB[0]; - uint32 tmp_bg = yuvconstants->kUVBiasG[0]; - uint32 tmp_br = yuvconstants->kUVBiasR[0]; - uint32 yg = yuvconstants->kYToRgb[0]; - uint32 tmp_yg; - uint32 tmp_mask = 0x80008000; - tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); - tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); - tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); - tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); - tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; - tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); - tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); - tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; - yg = yg * 0x0101; - - for (x = 0; x < width - 1; x += 2) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lbu %[tmp_t7], 0(%[src_y]) \n" - "lbu %[tmp_t1], 1(%[src_y]) \n" - "mul %[tmp_t7], %[tmp_t7], %[yg] \n" - "mul %[tmp_t1], %[tmp_t1], %[yg] \n" - "lbu %[tmp_t2], 0(%[src_u]) \n" - "lbu %[tmp_t3], 0(%[src_v]) \n" - "replv.ph %[tmp_t2], %[tmp_t2] \n" - "replv.ph %[tmp_t3], %[tmp_t3] \n" - "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n" - "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n" - "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n" - "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n" - "srl %[tmp_t7], %[tmp_t7], 16 \n" - "ins %[tmp_t1], %[tmp_t7], 0, 16 \n" - "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n" - "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n" - "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n" - "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n" - "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n" - "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n" - "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n" - "shra.ph %[tmp_t7], %[tmp_t7], 6 \n" - "shra.ph %[tmp_t8], %[tmp_t8], 6 \n" - "shra.ph %[tmp_t9], %[tmp_t9], 6 \n" - "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n" - "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n" - "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n" - "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n" - "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n" - "ins %[tmp_t7], %[tmp_t8], 16, 16 \n" - "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n" - "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n" - "ins %[tmp_t3], %[tmp_t8], 7, 24 \n" - "ins %[tmp_t3], %[tmp_t8], 10, 16 \n" - "ins %[tmp_t3], %[tmp_t8], 13, 8 \n" - "ins %[tmp_t4], %[tmp_t7], 7, 24 \n" - "ins %[tmp_t4], %[tmp_t7], 10, 16 \n" - "ins %[tmp_t4], %[tmp_t7], 13, 8 \n" - "precrq.ph.w %[tmp_t8], %[tmp_t4], %[tmp_t3] \n" - "or %[tmp_t8], %[tmp_t8], %[tmp_mask]\n" - "sw %[tmp_t8], 0(%[dst_argb1555]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) - : [dst_argb1555] "r"(dst_argb1555), [yg] "r"(yg), [src_u] "r"(src_u), - [src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub), - [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), - [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), - [tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask)); - src_y += 2; - src_u += 1; - src_v += 1; - dst_argb1555 += 4; // Advance 2 pixels. - } -} - -void NV12ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_uv, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint32 tmp_ub = yuvconstants->kUVToB[0]; - uint32 tmp_ug = yuvconstants->kUVToG[0]; - uint32 tmp_vg = yuvconstants->kUVToG[1]; - uint32 tmp_vr = yuvconstants->kUVToR[1]; - uint32 tmp_bb = yuvconstants->kUVBiasB[0]; - uint32 tmp_bg = yuvconstants->kUVBiasG[0]; - uint32 tmp_br = yuvconstants->kUVBiasR[0]; - uint32 yg = yuvconstants->kYToRgb[0]; - uint32 tmp_mask = 0x7fff7fff; - uint32 tmp_yg; - tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); - tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); - tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); - tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); - tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; - tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); - tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); - tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; - yg = yg * 0x0101; - - for (x = 0; x < width - 1; x += 2) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lbu %[tmp_t7], 0(%[src_y]) \n" - "lbu %[tmp_t1], 1(%[src_y]) \n" - "mul %[tmp_t7], %[tmp_t7], %[yg] \n" - "mul %[tmp_t1], %[tmp_t1], %[yg] \n" - "lbu %[tmp_t2], 0(%[src_uv]) \n" - "lbu %[tmp_t3], 1(%[src_uv]) \n" - "replv.ph %[tmp_t2], %[tmp_t2] \n" - "replv.ph %[tmp_t3], %[tmp_t3] \n" - "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n" - "mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n" - "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n" - "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n" - "srl %[tmp_t7], %[tmp_t7], 16 \n" - "ins %[tmp_t1], %[tmp_t7], 0, 16 \n" - "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n" - "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n" - "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n" - "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n" - "addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n" - "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n" - "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n" - "shra.ph %[tmp_t7], %[tmp_t7], 6 \n" - "shra.ph %[tmp_t8], %[tmp_t8], 6 \n" - "shra.ph %[tmp_t9], %[tmp_t9], 6 \n" - "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n" - "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n" - "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n" - "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n" - "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n" - "precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n" - "ins %[tmp_t7], %[tmp_t8], 16, 16 \n" - "precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n" - "precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n" - "sw %[tmp_t8], 0(%[rgb_buf]) \n" - "sw %[tmp_t7], 4(%[rgb_buf]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) - : [src_y] "r"(src_y), [src_uv] "r"(src_uv), [yg] "r"(yg), - [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), - [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), - [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), [rgb_buf] "r"(rgb_buf), - [tmp_mask] "r"(tmp_mask)); - - src_y += 2; - src_uv += 2; - rgb_buf += 8; // Advance 2 pixels. - } -} - -void BGRAToUVRow_DSPR2(const uint8* src_rgb0, - int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, - int width) { - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; - int x; - int const1 = 0xffda0000; - int const2 = 0x0070ffb6; - int const3 = 0x00700000; - int const4 = 0xffeeffa2; - int const5 = 0x100; - for (x = 0; x < width - 1; x += 2) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_rgb0]) \n" - "lw %[tmp_t2], 4(%[src_rgb0]) \n" - "lw %[tmp_t3], 0(%[src_rgb1]) \n" - "lw %[tmp_t4], 4(%[src_rgb1]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n" - "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n" - "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n" - "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n" - "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n" - "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n" - "extr_r.w %[tmp_t7], $ac0, 9 \n" - "extr_r.w %[tmp_t8], $ac1, 9 \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "addiu %[dst_v], %[dst_v], 1 \n" - "addiu %[src_rgb0], %[src_rgb0], 8 \n" - "addiu %[src_rgb1], %[src_rgb1], 8 \n" - "sb %[tmp_t7], -1(%[dst_u]) \n" - "sb %[tmp_t8], -1(%[dst_v]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), - [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) - : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), - [const4] "r"(const4), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi"); - } -} - -void BGRAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { - int x; - int const1 = 0x00420000; - int const2 = 0x00190081; - int const5 = 0x40; - for (x = 0; x < width; x += 4) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_argb0]) \n" - "lw %[tmp_t2], 4(%[src_argb0]) \n" - "lw %[tmp_t3], 8(%[src_argb0]) \n" - "lw %[tmp_t4], 12(%[src_argb0]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "mult $ac2, %[const5], %[const5] \n" - "mult $ac3, %[const5], %[const5] \n" - "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n" - "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n" - "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n" - "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n" - "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n" - "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n" - "extr_r.w %[tmp_t1], $ac0, 8 \n" - "extr_r.w %[tmp_t2], $ac1, 8 \n" - "extr_r.w %[tmp_t3], $ac2, 8 \n" - "extr_r.w %[tmp_t4], $ac3, 8 \n" - "addiu %[src_argb0],%[src_argb0], 16 \n" - "addiu %[dst_y], %[dst_y], 4 \n" - "sb %[tmp_t1], -4(%[dst_y]) \n" - "sb %[tmp_t2], -3(%[dst_y]) \n" - "sb %[tmp_t3], -2(%[dst_y]) \n" - "sb %[tmp_t4], -1(%[dst_y]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) - : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", - "$ac3hi"); - } -} - -void ABGRToUVRow_DSPR2(const uint8* src_rgb0, - int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, - int width) { - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; - int x; - int const1 = 0xffb6ffda; - int const2 = 0x00000070; - int const3 = 0xffa20070; - int const4 = 0x0000ffee; - int const5 = 0x100; - - for (x = 0; x < width - 1; x += 2) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_rgb0]) \n" - "lw %[tmp_t2], 4(%[src_rgb0]) \n" - "lw %[tmp_t3], 0(%[src_rgb1]) \n" - "lw %[tmp_t4], 4(%[src_rgb1]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n" - "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n" - "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n" - "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n" - "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n" - "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n" - "extr_r.w %[tmp_t7], $ac0, 9 \n" - "extr_r.w %[tmp_t8], $ac1, 9 \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "addiu %[dst_v], %[dst_v], 1 \n" - "addiu %[src_rgb0], %[src_rgb0], 8 \n" - "addiu %[src_rgb1], %[src_rgb1], 8 \n" - "sb %[tmp_t7], -1(%[dst_u]) \n" - "sb %[tmp_t8], -1(%[dst_v]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), - [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) - : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), - [const4] "r"(const4), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi"); - } -} - -void ARGBToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { - int x; - int const1 = 0x00810019; - int const2 = 0x00000042; - int const5 = 0x40; - for (x = 0; x < width; x += 4) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_argb0]) \n" - "lw %[tmp_t2], 4(%[src_argb0]) \n" - "lw %[tmp_t3], 8(%[src_argb0]) \n" - "lw %[tmp_t4], 12(%[src_argb0]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "mult $ac2, %[const5], %[const5] \n" - "mult $ac3, %[const5], %[const5] \n" - "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n" - "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n" - "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n" - "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n" - "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n" - "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n" - "extr_r.w %[tmp_t1], $ac0, 8 \n" - "extr_r.w %[tmp_t2], $ac1, 8 \n" - "extr_r.w %[tmp_t3], $ac2, 8 \n" - "extr_r.w %[tmp_t4], $ac3, 8 \n" - "addiu %[dst_y], %[dst_y], 4 \n" - "addiu %[src_argb0],%[src_argb0], 16 \n" - "sb %[tmp_t1], -4(%[dst_y]) \n" - "sb %[tmp_t2], -3(%[dst_y]) \n" - "sb %[tmp_t3], -2(%[dst_y]) \n" - "sb %[tmp_t4], -1(%[dst_y]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) - : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", - "$ac3hi"); - } -} - -void ABGRToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { - int x; - int const1 = 0x00810042; - int const2 = 0x00000019; - int const5 = 0x40; - for (x = 0; x < width; x += 4) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_argb0]) \n" - "lw %[tmp_t2], 4(%[src_argb0]) \n" - "lw %[tmp_t3], 8(%[src_argb0]) \n" - "lw %[tmp_t4], 12(%[src_argb0]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "mult $ac2, %[const5], %[const5] \n" - "mult $ac3, %[const5], %[const5] \n" - "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n" - "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n" - "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n" - "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n" - "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n" - "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n" - "extr_r.w %[tmp_t1], $ac0, 8 \n" - "extr_r.w %[tmp_t2], $ac1, 8 \n" - "extr_r.w %[tmp_t3], $ac2, 8 \n" - "extr_r.w %[tmp_t4], $ac3, 8 \n" - "addiu %[src_argb0],%[src_argb0], 16 \n" - "addiu %[dst_y], %[dst_y], 4 \n" - "sb %[tmp_t1], -4(%[dst_y]) \n" - "sb %[tmp_t2], -3(%[dst_y]) \n" - "sb %[tmp_t3], -2(%[dst_y]) \n" - "sb %[tmp_t4], -1(%[dst_y]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) - : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", - "$ac3hi"); - } -} - -void RGBAToUVRow_DSPR2(const uint8* src_rgb0, - int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, - int width) { - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; - int x; - int const1 = 0xffb60070; - int const2 = 0x0000ffda; - int const3 = 0xffa2ffee; - int const4 = 0x00000070; - int const5 = 0x100; - - for (x = 0; x < width - 1; x += 2) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "ulw %[tmp_t1], 0+1(%[src_rgb0]) \n" - "ulw %[tmp_t2], 4+1(%[src_rgb0]) \n" - "ulw %[tmp_t3], 0+1(%[src_rgb1]) \n" - "ulw %[tmp_t4], 4+1(%[src_rgb1]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n" - "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n" - "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n" - "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n" - "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n" - "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n" - "extr_r.w %[tmp_t7], $ac0, 9 \n" - "extr_r.w %[tmp_t8], $ac1, 9 \n" - "addiu %[src_rgb0], %[src_rgb0], 8 \n" - "addiu %[src_rgb1], %[src_rgb1], 8 \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "addiu %[dst_v], %[dst_v], 1 \n" - "sb %[tmp_t7], -1(%[dst_u]) \n" - "sb %[tmp_t8], -1(%[dst_v]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), - [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) - : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), - [const4] "r"(const4), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi"); - } -} - -void RGBAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { - int x; - int const1 = 0x00420081; - int const2 = 0x00190000; - int const5 = 0x40; - for (x = 0; x < width; x += 4) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_argb0]) \n" - "lw %[tmp_t2], 4(%[src_argb0]) \n" - "lw %[tmp_t3], 8(%[src_argb0]) \n" - "lw %[tmp_t4], 12(%[src_argb0]) \n" - "preceu.ph.qbl %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbr %[tmp_t4], %[tmp_t4] \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "mult $ac2, %[const5], %[const5] \n" - "mult $ac3, %[const5], %[const5] \n" - "dpa.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpa.w.ph $ac1, %[tmp_t6], %[const1] \n" - "dpa.w.ph $ac2, %[tmp_t7], %[const1] \n" - "dpa.w.ph $ac3, %[tmp_t8], %[const1] \n" - "dpa.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpa.w.ph $ac1, %[tmp_t2], %[const2] \n" - "dpa.w.ph $ac2, %[tmp_t3], %[const2] \n" - "dpa.w.ph $ac3, %[tmp_t4], %[const2] \n" - "extr_r.w %[tmp_t1], $ac0, 8 \n" - "extr_r.w %[tmp_t2], $ac1, 8 \n" - "extr_r.w %[tmp_t3], $ac2, 8 \n" - "extr_r.w %[tmp_t4], $ac3, 8 \n" - "addiu %[dst_y], %[dst_y], 4 \n" - "addiu %[src_argb0],%[src_argb0], 16 \n" - "sb %[tmp_t1], -4(%[dst_y]) \n" - "sb %[tmp_t2], -3(%[dst_y]) \n" - "sb %[tmp_t3], -2(%[dst_y]) \n" - "sb %[tmp_t4], -1(%[dst_y]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) - : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", - "$ac3hi"); - } -} - -void ARGBToUVRow_DSPR2(const uint8* src_rgb0, - int src_stride_rgb, - uint8* dst_u, - uint8* dst_v, - int width) { - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; - int x; - int const1 = 0xffb60070; - int const2 = 0x0000ffda; - int const3 = 0xffa2ffee; - int const4 = 0x00000070; - int const5 = 0x100; - - for (x = 0; x < width - 1; x += 2) { - int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; - int tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t1], 0(%[src_rgb0]) \n" - "lw %[tmp_t2], 4(%[src_rgb0]) \n" - "lw %[tmp_t3], 0(%[src_rgb1]) \n" - "lw %[tmp_t4], 4(%[src_rgb1]) \n" - "preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n" - "preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n" - "preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n" - "preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n" - "preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n" - "preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n" - "preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n" - "addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n" - "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n" - "addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n" - "shrl.ph %[tmp_t5], %[tmp_t5], 2 \n" - "shrl.ph %[tmp_t1], %[tmp_t1], 2 \n" - "mult $ac0, %[const5], %[const5] \n" - "mult $ac1, %[const5], %[const5] \n" - "dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n" - "dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n" - "dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n" - "dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n" - "extr_r.w %[tmp_t7], $ac0, 9 \n" - "extr_r.w %[tmp_t8], $ac1, 9 \n" - "addiu %[src_rgb0], %[src_rgb0], 8 \n" - "addiu %[src_rgb1], %[src_rgb1], 8 \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "addiu %[dst_v], %[dst_v], 1 \n" - "sb %[tmp_t7], -1(%[dst_u]) \n" - "sb %[tmp_t8], -1(%[dst_v]) \n" - ".set pop \n" - : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), - [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), - [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), - [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), - [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) - : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), - [const4] "r"(const4), [const5] "r"(const5) - : "hi", "lo", "$ac1lo", "$ac1hi"); - } -} - -#endif // __mips_dsp_rev >= 2 - -#endif // defined(__mips__) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/source/scale.cc b/source/scale.cc index 9104acb9..6951d8fb 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -103,13 +103,6 @@ static void ScalePlaneDown2(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN2_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown2 = filtering ? ScaleRowDown2Box_DSPR2 : ScaleRowDown2_DSPR2; - } -#endif #if defined(HAS_SCALEROWDOWN2_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleRowDown2 = @@ -176,14 +169,6 @@ static void ScalePlaneDown2_16(int src_width, : ScaleRowDown2Box_16_SSE2); } #endif -#if defined(HAS_SCALEROWDOWN2_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown2 = - filtering ? ScaleRowDown2Box_16_DSPR2 : ScaleRowDown2_16_DSPR2; - } -#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -247,13 +232,6 @@ static void ScalePlaneDown4(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN4_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown4 = filtering ? ScaleRowDown4Box_DSPR2 : ScaleRowDown4_DSPR2; - } -#endif #if defined(HAS_SCALEROWDOWN4_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleRowDown4 = @@ -306,14 +284,6 @@ static void ScalePlaneDown4_16(int src_width, filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2; } #endif -#if defined(HAS_SCALEROWDOWN4_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown4 = - filtering ? ScaleRowDown4Box_16_DSPR2 : ScaleRowDown4_16_DSPR2; - } -#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -411,19 +381,6 @@ static void ScalePlaneDown34(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN34_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_DSPR2; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_DSPR2; - } - } -#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); @@ -495,19 +452,6 @@ static void ScalePlaneDown34_16(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN34_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_16_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_16_DSPR2; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_DSPR2; - } - } -#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); @@ -612,19 +556,6 @@ static void ScalePlaneDown38(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN38_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_DSPR2; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_DSPR2; - } - } -#endif #if defined(HAS_SCALEROWDOWN38_MSA) if (TestCpuFlag(kCpuHasMSA)) { if (!filtering) { @@ -716,19 +647,6 @@ static void ScalePlaneDown38_16(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN38_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_16_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_16_DSPR2; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_DSPR2; - } - } -#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); @@ -931,14 +849,6 @@ static void ScalePlaneBox(int src_width, } } #endif -#if defined(HAS_SCALEADDROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - ScaleAddRow = ScaleAddRow_Any_DSPR2; - if (IS_ALIGNED(src_width, 16)) { - ScaleAddRow = ScaleAddRow_DSPR2; - } - } -#endif for (j = 0; j < dst_height; ++j) { int boxheight; @@ -1070,14 +980,6 @@ void ScalePlaneBilinearDown(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(src_width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; @@ -1193,14 +1095,6 @@ void ScalePlaneBilinearDown_16(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_16_DSPR2; - if (IS_ALIGNED(src_width, 4)) { - InterpolateRow = InterpolateRow_16_DSPR2; - } - } -#endif #if defined(HAS_SCALEFILTERCOLS_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { @@ -1281,14 +1175,6 @@ void ScalePlaneBilinearUp(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } - } -#endif if (filtering && src_width >= 32768) { ScaleFilterCols = ScaleFilterCols64_C; @@ -1432,14 +1318,6 @@ void ScalePlaneBilinearUp_16(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_16_DSPR2; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_16_DSPR2; - } - } -#endif if (filtering && src_width >= 32768) { ScaleFilterCols = ScaleFilterCols64_16_C; diff --git a/source/scale_any.cc b/source/scale_any.cc index c4d6626a..8604c233 100644 --- a/source/scale_any.cc +++ b/source/scale_any.cc @@ -456,9 +456,6 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) #ifdef HAS_SCALEADDROW_MSA SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) #endif -#ifdef HAS_SCALEADDROW_DSPR2 -SAANY(ScaleAddRow_Any_DSPR2, ScaleAddRow_DSPR2, ScaleAddRow_C, 15) -#endif #undef SAANY #ifdef __cplusplus diff --git a/source/scale_argb.cc b/source/scale_argb.cc index c3ec7d6b..cd4683b3 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -306,15 +306,6 @@ static void ScaleARGBBilinearDown(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) && - IS_ALIGNED(src_stride, 4)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(clip_src_width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; @@ -419,12 +410,6 @@ static void ScaleARGBBilinearUp(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) && - IS_ALIGNED(dst_stride, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; @@ -587,15 +572,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; - } -#endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; @@ -632,12 +608,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) && - IS_ALIGNED(dst_stride_argb, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; diff --git a/source/scale_common.cc b/source/scale_common.cc index fefb027b..e060c3cb 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -1063,16 +1063,6 @@ void ScalePlaneVertical(int src_height, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) && - IS_ALIGNED(dst_stride, 4)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(dst_width_bytes, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } - } -#endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; @@ -1151,16 +1141,6 @@ void ScalePlaneVertical_16(int src_height, } } #endif -#if defined(HAS_INTERPOLATEROW_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) && - IS_ALIGNED(dst_stride, 4)) { - InterpolateRow = InterpolateRow_Any_16_DSPR2; - if (IS_ALIGNED(dst_width_bytes, 4)) { - InterpolateRow = InterpolateRow_16_DSPR2; - } - } -#endif for (j = 0; j < dst_height; ++j) { int yi; int yf; diff --git a/source/scale_dspr2.cc b/source/scale_dspr2.cc deleted file mode 100644 index ddedcbf4..00000000 --- a/source/scale_dspr2.cc +++ /dev/null @@ -1,668 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for GCC MIPS DSPR2 -#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \ - (__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32) - -void ScaleRowDown2_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 4 \n" // iterations -> by 16 - "beqz $t9, 2f \n" - " nop \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28| - // TODO(fbarchard): Use odd pixels instead of even. - "precrq.qb.ph $t8, $t1, $t0 \n" // |7|5|3|1| - "precrq.qb.ph $t0, $t3, $t2 \n" // |15|13|11|9| - "precrq.qb.ph $t1, $t5, $t4 \n" // |23|21|19|17| - "precrq.qb.ph $t2, $t7, $t6 \n" // |31|29|27|25| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu $t9, $t9, -1 \n" - "sw $t8, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $t1, 8(%[dst]) \n" - "sw $t2, 12(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 16 \n" - - "2: \n" - "andi $t9, %[dst_width], 0xf \n" // residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lbu $t0, 1(%[src_ptr]) \n" - "addiu %[src_ptr], %[src_ptr], 2 \n" - "addiu $t9, $t9, -1 \n" - "sb $t0, 0(%[dst]) \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst) - : [dst_width] "r"(dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - const uint8* t = src_ptr + src_stride; - - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 3 \n" // iterations -> step 8 - "bltz $t9, 2f \n" - " nop \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 0(%[t]) \n" // |19|18|17|16| - "lw $t5, 4(%[t]) \n" // |23|22|21|20| - "lw $t6, 8(%[t]) \n" // |27|26|25|24| - "lw $t7, 12(%[t]) \n" // |31|30|29|28| - "addiu $t9, $t9, -1 \n" - "srl $t8, $t0, 16 \n" // |X|X|3|2| - "ins $t0, $t4, 16, 16 \n" // |17|16|1|0| - "ins $t4, $t8, 0, 16 \n" // |19|18|3|2| - "raddu.w.qb $t0, $t0 \n" // |17+16+1+0| - "raddu.w.qb $t4, $t4 \n" // |19+18+3+2| - "shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2 - "shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2 - "srl $t8, $t1, 16 \n" // |X|X|7|6| - "ins $t1, $t5, 16, 16 \n" // |21|20|5|4| - "ins $t5, $t8, 0, 16 \n" // |22|23|7|6| - "raddu.w.qb $t1, $t1 \n" // |21+20+5+4| - "raddu.w.qb $t5, $t5 \n" // |23+22+7+6| - "shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2 - "shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2 - "srl $t8, $t2, 16 \n" // |X|X|11|10| - "ins $t2, $t6, 16, 16 \n" // |25|24|9|8| - "ins $t6, $t8, 0, 16 \n" // |27|26|11|10| - "raddu.w.qb $t2, $t2 \n" // |25+24+9+8| - "raddu.w.qb $t6, $t6 \n" // |27+26+11+10| - "shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2 - "shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2 - "srl $t8, $t3, 16 \n" // |X|X|15|14| - "ins $t3, $t7, 16, 16 \n" // |29|28|13|12| - "ins $t7, $t8, 0, 16 \n" // |31|30|15|14| - "raddu.w.qb $t3, $t3 \n" // |29+28+13+12| - "raddu.w.qb $t7, $t7 \n" // |31+30+15+14| - "shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2 - "shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2 - "addiu %[src_ptr], %[src_ptr], 16 \n" - "addiu %[t], %[t], 16 \n" - "sb $t0, 0(%[dst]) \n" - "sb $t4, 1(%[dst]) \n" - "sb $t1, 2(%[dst]) \n" - "sb $t5, 3(%[dst]) \n" - "sb $t2, 4(%[dst]) \n" - "sb $t6, 5(%[dst]) \n" - "sb $t3, 6(%[dst]) \n" - "sb $t7, 7(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 8 \n" - - "2: \n" - "andi $t9, %[dst_width], 0x7 \n" // x = residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lwr $t1, 0(%[src_ptr]) \n" - "lwl $t1, 3(%[src_ptr]) \n" - "lwr $t2, 0(%[t]) \n" - "lwl $t2, 3(%[t]) \n" - "srl $t8, $t1, 16 \n" - "ins $t1, $t2, 16, 16 \n" - "ins $t2, $t8, 0, 16 \n" - "raddu.w.qb $t1, $t1 \n" - "raddu.w.qb $t2, $t2 \n" - "shra_r.w $t1, $t1, 2 \n" - "shra_r.w $t2, $t2, 2 \n" - "sb $t1, 0(%[dst]) \n" - "sb $t2, 1(%[dst]) \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "addiu $t9, $t9, -2 \n" - "addiu %[t], %[t], 4 \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 2 \n" - - "3: \n" - ".set pop \n" - - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [t] "+r"(t) - : [dst_width] "r"(dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void ScaleRowDown4_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 3 \n" - "beqz $t9, 2f \n" - " nop \n" - - "1: \n" - "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28| - "precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0| - "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8| - "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16| - "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24| - "precrq.qb.ph $t1, $t2, $t1 \n" // |14|10|6|2| - "precrq.qb.ph $t5, $t6, $t5 \n" // |30|26|22|18| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu $t9, $t9, -1 \n" - "sw $t1, 0(%[dst]) \n" - "sw $t5, 4(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 8 \n" - - "2: \n" - "andi $t9, %[dst_width], 7 \n" // residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lbu $t1, 2(%[src_ptr]) \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "addiu $t9, $t9, -1 \n" - "sb $t1, 0(%[dst]) \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst) - : [dst_width] "r"(dst_width) - : "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - intptr_t stride = src_stride; - const uint8* s1 = src_ptr + stride; - const uint8* s2 = s1 + stride; - const uint8* s3 = s2 + stride; - - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 1 \n" - "andi $t8, %[dst_width], 1 \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 0(%[s1]) \n" // |7|6|5|4| - "lw $t2, 0(%[s2]) \n" // |11|10|9|8| - "lw $t3, 0(%[s3]) \n" // |15|14|13|12| - "lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 4(%[s1]) \n" // |23|22|21|20| - "lw $t6, 4(%[s2]) \n" // |27|26|25|24| - "lw $t7, 4(%[s3]) \n" // |31|30|29|28| - "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0| - "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4| - "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8| - "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12| - "raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16| - "raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20| - "raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24| - "raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28| - "add $t0, $t0, $t1 \n" - "add $t1, $t2, $t3 \n" - "add $t0, $t0, $t1 \n" - "add $t4, $t4, $t5 \n" - "add $t6, $t6, $t7 \n" - "add $t4, $t4, $t6 \n" - "shra_r.w $t0, $t0, 4 \n" - "shra_r.w $t4, $t4, 4 \n" - "sb $t0, 0(%[dst]) \n" - "sb $t4, 1(%[dst]) \n" - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[s1], %[s1], 8 \n" - "addiu %[s2], %[s2], 8 \n" - "addiu %[s3], %[s3], 8 \n" - "addiu $t9, $t9, -1 \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 2 \n" - "beqz $t8, 2f \n" - " nop \n" - - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 0(%[s1]) \n" // |7|6|5|4| - "lw $t2, 0(%[s2]) \n" // |11|10|9|8| - "lw $t3, 0(%[s3]) \n" // |15|14|13|12| - "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0| - "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4| - "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8| - "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12| - "add $t0, $t0, $t1 \n" - "add $t1, $t2, $t3 \n" - "add $t0, $t0, $t1 \n" - "shra_r.w $t0, $t0, 4 \n" - "sb $t0, 0(%[dst]) \n" - - "2: \n" - ".set pop \n" - - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [s1] "+r"(s1), [s2] "+r"(s2), - [s3] "+r"(s3) - : [dst_width] "r"(dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void ScaleRowDown34_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "1: \n" - "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28| - "precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13| - "precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30| - "addiu %[dst_width], %[dst_width], -24 \n" - "ins $t1, $t1, 8, 16 \n" // |3|1|0|X| - "ins $t4, $t0, 8, 16 \n" // |X|15|13|12| - "ins $t5, $t5, 8, 16 \n" // |19|17|16|X| - "ins $t8, $t9, 8, 16 \n" // |X|31|29|28| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5| - "packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21| - "prepend $t1, $t2, 8 \n" // |4|3|1|0| - "prepend $t3, $t4, 24 \n" // |15|13|12|11| - "prepend $t5, $t6, 8 \n" // |20|19|17|16| - "prepend $t7, $t8, 24 \n" // |31|29|28|27| - "sw $t1, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $t3, 8(%[dst]) \n" - "sw $t5, 12(%[dst]) \n" - "sw $t9, 16(%[dst]) \n" - "sw $t7, 20(%[dst]) \n" - "bnez %[dst_width], 1b \n" - " addiu %[dst], %[dst], 24 \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [dst_width] "+r"(dst_width) - : - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); -} - -void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* d, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "repl.ph $t3, 3 \n" // 0x00030003 - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| - "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1| - "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| - "muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3| - "muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3| - "andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1| - "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t1, $t1 \n" - "shra_r.w $t0, $t0, 1 \n" - "shra_r.w $t1, $t1, 1 \n" - "preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1| - "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1| - "rotr $t2, $t2, 16 \n" // |0|S1|0|S2| - "rotr $t6, $t6, 16 \n" // |0|T1|0|T2| - "addu.ph $t2, $t2, $t4 \n" - "addu.ph $t6, $t6, $t5 \n" - "sll $t5, $t0, 1 \n" - "add $t0, $t5, $t0 \n" - "shra_r.ph $t2, $t2, 2 \n" - "shra_r.ph $t6, $t6, 2 \n" - "shll.ph $t4, $t2, 1 \n" - "addq.ph $t4, $t4, $t2 \n" - "addu $t0, $t0, $t1 \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "shra_r.w $t0, $t0, 2 \n" - "addu.ph $t6, $t6, $t4 \n" - "shra_r.ph $t6, $t6, 2 \n" - "srl $t1, $t6, 16 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "sb $t1, 0(%[d]) \n" - "sb $t0, 1(%[d]) \n" - "sb $t6, 2(%[d]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[d], %[d], 3 \n" - "3: \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [src_stride] "+r"(src_stride), [d] "+r"(d), - [dst_width] "+r"(dst_width) - : - : "t0", "t1", "t2", "t3", "t4", "t5", "t6"); -} - -void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* d, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "repl.ph $t2, 3 \n" // 0x00030003 - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| - "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1| - "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| - "muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3| - "muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3| - "andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1| - "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t1, $t1 \n" - "shra_r.w $t0, $t0, 1 \n" - "shra_r.w $t1, $t1, 1 \n" - "preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1| - "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1| - "rotr $t4, $t4, 16 \n" // |0|S1|0|S2| - "rotr $t6, $t6, 16 \n" // |0|T1|0|T2| - "addu.ph $t4, $t4, $t3 \n" - "addu.ph $t6, $t6, $t5 \n" - "shra_r.ph $t6, $t6, 2 \n" - "shra_r.ph $t4, $t4, 2 \n" - "addu.ph $t6, $t6, $t4 \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "shra_r.ph $t6, $t6, 1 \n" - "addu $t0, $t0, $t1 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "shra_r.w $t0, $t0, 1 \n" - "srl $t1, $t6, 16 \n" - "sb $t1, 0(%[d]) \n" - "sb $t0, 1(%[d]) \n" - "sb $t6, 2(%[d]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[d], %[d], 3 \n" - "3: \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [src_stride] "+r"(src_stride), [d] "+r"(d), - [dst_width] "+r"(dst_width) - : - : "t0", "t1", "t2", "t3", "t4", "t5", "t6"); -} - -void ScaleRowDown38_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst, - int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28| - "wsbh $t0, $t0 \n" // |2|3|0|1| - "wsbh $t6, $t6 \n" // |26|27|24|25| - "srl $t0, $t0, 8 \n" // |X|2|3|0| - "srl $t3, $t3, 16 \n" // |X|X|15|14| - "srl $t5, $t5, 16 \n" // |X|X|23|22| - "srl $t7, $t7, 16 \n" // |X|X|31|30| - "ins $t1, $t2, 24, 8 \n" // |8|6|5|4| - "ins $t6, $t5, 0, 8 \n" // |26|27|24|22| - "ins $t1, $t0, 0, 16 \n" // |8|6|3|0| - "ins $t6, $t7, 24, 8 \n" // |30|27|24|22| - "prepend $t2, $t3, 24 \n" // |X|15|14|11| - "ins $t4, $t4, 16, 8 \n" // |19|16|17|X| - "ins $t4, $t2, 0, 16 \n" // |19|16|14|11| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu %[dst_width], %[dst_width], -12 \n" - "addiu $t8,%[dst_width], -12 \n" - "sw $t1, 0(%[dst]) \n" - "sw $t4, 4(%[dst]) \n" - "sw $t6, 8(%[dst]) \n" - "bgez $t8, 1b \n" - " addiu %[dst], %[dst], 12 \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [dst_width] "+r"(dst_width) - : - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"); -} - -void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, - int dst_width) { - intptr_t stride = src_stride; - const uint8* t = src_ptr + stride; - const int c = 0x2AAA; - - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4| - "lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0| - "lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4| - "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6| - "packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6| - "packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4| - "raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6 - "raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4 - "precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1| - "precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3| - "srl $t4, $t4, 2 \n" // t4 / 4 - "srl $t6, $t6, 16 \n" // |0|0|S3|T3| - "raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3 - "addu $t6, $t5, $t6 \n" - "mul $t6, $t6, %[c] \n" // t6 * 0x2AAA - "sll $t0, $t0, 8 \n" // |S2|S1|S0|0| - "sll $t2, $t2, 8 \n" // |T2|T1|T0|0| - "raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0 - "raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0 - "addu $t0, $t0, $t2 \n" - "mul $t0, $t0, %[c] \n" // t0 * 0x2AAA - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[t], %[t], 8 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "addiu %[dst_ptr], %[dst_ptr], 3 \n" - "srl $t6, $t6, 16 \n" - "srl $t0, $t0, 16 \n" - "sb $t4, -1(%[dst_ptr]) \n" - "sb $t6, -2(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " sb $t0, -3(%[dst_ptr]) \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [t] "+r"(t), - [dst_width] "+r"(dst_width) - : [c] "r"(c) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6"); -} - -void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, - int dst_width) { - intptr_t stride = src_stride; - const uint8* s1 = src_ptr + stride; - stride += stride; - const uint8* s2 = src_ptr + stride; - const int c1 = 0x1C71; - const int c2 = 0x2AAA; - - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4| - "lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0| - "lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4| - "lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0| - "lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4| - "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6| - "packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6| - "raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6 - "packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4| - "raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4 - "sll $t8, $t5, 16 \n" // |R5|R4|0|0| - "raddu.w.qb $t8, $t8 \n" // R5+R4 - "addu $t7, $t7, $t8 \n" - "srl $t8, $t5, 16 \n" // |0|0|R7|R6| - "raddu.w.qb $t8, $t8 \n" // R7 + R6 - "addu $t6, $t6, $t8 \n" - "mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA - "precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1| - "precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1| - "srl $t8, $t8, 8 \n" // |0|S3|T3|R3| - "raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3 - "addu $t7, $t7, $t8 \n" - "mul $t7, $t7, %[c1] \n" // t7 * 0x1C71 - "sll $t0, $t0, 8 \n" // |S2|S1|S0|0| - "sll $t2, $t2, 8 \n" // |T2|T1|T0|0| - "sll $t4, $t4, 8 \n" // |R2|R1|R0|0| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t2, $t2 \n" - "raddu.w.qb $t4, $t4 \n" - "addu $t0, $t0, $t2 \n" - "addu $t0, $t0, $t4 \n" - "mul $t0, $t0, %[c1] \n" // t0 * 0x1C71 - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[s1], %[s1], 8 \n" - "addiu %[s2], %[s2], 8 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "addiu %[dst_ptr], %[dst_ptr], 3 \n" - "srl $t6, $t6, 16 \n" - "srl $t7, $t7, 16 \n" - "srl $t0, $t0, 16 \n" - "sb $t6, -1(%[dst_ptr]) \n" - "sb $t7, -2(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " sb $t0, -3(%[dst_ptr]) \n" - ".set pop \n" - : [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [s1] "+r"(s1), - [s2] "+r"(s2), [dst_width] "+r"(dst_width) - : [c1] "r"(c1), [c2] "r"(c2) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"); -} - -void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { - int x; - for (x = 0; x < ((src_width - 1)); x += 8) { - uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4; - uint32 tmp_t5, tmp_t6, tmp_t7, tmp_t8; - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - "lw %[tmp_t5], 0(%[src_ptr]) \n" - "lw %[tmp_t6], 4(%[src_ptr]) \n" - "lw %[tmp_t1], 0(%[dst_ptr]) \n" - "lw %[tmp_t2], 4(%[dst_ptr]) \n" - "lw %[tmp_t3], 8(%[dst_ptr]) \n" - "lw %[tmp_t4], 12(%[dst_ptr]) \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t5] \n" - "preceu.ph.qbl %[tmp_t8], %[tmp_t5] \n" - "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t7] \n" - "addu.ph %[tmp_t2], %[tmp_t2], %[tmp_t8] \n" - "preceu.ph.qbr %[tmp_t7], %[tmp_t6] \n" - "preceu.ph.qbl %[tmp_t8], %[tmp_t6] \n" - "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t7] \n" - "addu.ph %[tmp_t4], %[tmp_t4], %[tmp_t8] \n" - "sw %[tmp_t1], 0(%[dst_ptr]) \n" - "sw %[tmp_t2], 4(%[dst_ptr]) \n" - "sw %[tmp_t3], 8(%[dst_ptr]) \n" - "sw %[tmp_t4], 12(%[dst_ptr]) \n" - ".set pop \n" - : - [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), [tmp_t3] "=&r"(tmp_t3), - [tmp_t4] "=&r"(tmp_t4), [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), - [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [src_ptr] "+r"(src_ptr) - : [dst_ptr] "r"(dst_ptr)); - src_ptr += 8; - dst_ptr += 8; - } - - if ((src_width)&7) { - for (x = 0; x < ((src_width - 1) & 7); x += 1) { - dst_ptr[0] += src_ptr[0]; - src_ptr += 1; - dst_ptr += 1; - } - } -} - -#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index 5fd438a5..a8fb4b4a 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -65,8 +65,6 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { #if defined(__mips__) int has_mips = TestCpuFlag(kCpuHasMIPS); printf("Has MIPS %x\n", has_mips); - int has_dspr2 = TestCpuFlag(kCpuHasDSPR2); - printf("Has DSPR2 %x\n", has_dspr2); int has_msa = TestCpuFlag(kCpuHasMSA); printf("Has MSA %x\n", has_msa); #endif diff --git a/util/cpuid.c b/util/cpuid.c index 9ff618e0..59c65d60 100644 --- a/util/cpuid.c +++ b/util/cpuid.c @@ -69,8 +69,8 @@ int main(int argc, const char* argv[]) { printf("Has NEON %x\n", has_neon); } if (has_mips) { - int has_dspr2 = TestCpuFlag(kCpuHasDSPR2); - printf("Has DSPR2 %x\n", has_dspr2); + int has_msa = TestCpuFlag(kCpuHasMSA); + printf("Has MSA %x\n", has_msa); } if (has_x86) { int has_sse2 = TestCpuFlag(kCpuHasSSE2); |