aboutsummaryrefslogtreecommitdiff
path: root/silk
diff options
context:
space:
mode:
authorLinfeng Zhang <linfengz@google.com>2016-09-01 13:44:11 -0700
committerJean-Marc Valin <jmvalin@jmvalin.ca>2017-05-17 15:55:42 -0400
commit60eb7d88b4eace91395e553cf70fc4578a950353 (patch)
tree489d13c37dbb73fe5ad385df7057e5cef49d4d5b /silk
parent1eaa67c0dce1f642af872cc26b7ff4b057a55bfb (diff)
downloadlibopus-60eb7d88b4eace91395e553cf70fc4578a950353.tar.gz
Update silk_biquad_alt()
Split to silk_biquad_alt_stride1() and silk_biquad_alt_stride2(), so that it can be optimized more efficiently when stride is 2. This change in C code is bit exact with the origin. Change-Id: Idaefe670397016ace2a489e3435ac61b7dbe79d5 Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
Diffstat (limited to 'silk')
-rw-r--r--silk/LP_variable_cutoff.c2
-rw-r--r--silk/SigProc_FIX.h14
-rw-r--r--silk/biquad_alt.c53
3 files changed, 60 insertions, 9 deletions
diff --git a/silk/LP_variable_cutoff.c b/silk/LP_variable_cutoff.c
index f639e1f8..79112ad3 100644
--- a/silk/LP_variable_cutoff.c
+++ b/silk/LP_variable_cutoff.c
@@ -130,6 +130,6 @@ void silk_LP_variable_cutoff(
/* ARMA low-pass filtering */
silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 );
- silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1);
+ silk_biquad_alt_stride1( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length);
}
}
diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h
index e0c39679..43f9ffac 100644
--- a/silk/SigProc_FIX.h
+++ b/silk/SigProc_FIX.h
@@ -100,14 +100,22 @@ void silk_resampler_down2_3(
* slower than biquad() but uses more precise coefficients
* can handle (slowly) varying coefficients
*/
-void silk_biquad_alt(
+void silk_biquad_alt_stride1(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [2] */
opus_int16 *out, /* O output signal */
- const opus_int32 len, /* I signal length (must be even) */
- opus_int stride /* I Operate on interleaved signal if > 1 */
+ const opus_int32 len /* I signal length (must be even) */
+);
+
+void silk_biquad_alt_stride2(
+ const opus_int16 *in, /* I input signal */
+ const opus_int32 *B_Q28, /* I MA coefficients [3] */
+ const opus_int32 *A_Q28, /* I AR coefficients [2] */
+ opus_int32 *S, /* I/O State vector [4] */
+ opus_int16 *out, /* O output signal */
+ const opus_int32 len /* I signal length (must be even) */
);
/* Variable order MA prediction error filter. */
diff --git a/silk/biquad_alt.c b/silk/biquad_alt.c
index d55f5ee9..73381294 100644
--- a/silk/biquad_alt.c
+++ b/silk/biquad_alt.c
@@ -39,14 +39,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include "SigProc_FIX.h"
/* Second order ARMA filter, alternative implementation */
-void silk_biquad_alt(
+void silk_biquad_alt_stride1(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [2] */
opus_int16 *out, /* O output signal */
- const opus_int32 len, /* I signal length (must be even) */
- opus_int stride /* I Operate on interleaved signal if > 1 */
+ const opus_int32 len /* I signal length (must be even) */
)
{
/* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
@@ -61,7 +60,7 @@ void silk_biquad_alt(
for( k = 0; k < len; k++ ) {
/* S[ 0 ], S[ 1 ]: Q12 */
- inval = in[ k * stride ];
+ inval = in[ k ];
out32_Q14 = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], inval ), 2 );
S[ 0 ] = S[1] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A0_L_Q28 ), 14 );
@@ -73,6 +72,50 @@ void silk_biquad_alt(
S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], inval );
/* Scale back to Q0 and saturate */
- out[ k * stride ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) );
+ out[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) );
+ }
+}
+
+void silk_biquad_alt_stride2(
+ const opus_int16 *in, /* I input signal */
+ const opus_int32 *B_Q28, /* I MA coefficients [3] */
+ const opus_int32 *A_Q28, /* I AR coefficients [2] */
+ opus_int32 *S, /* I/O State vector [4] */
+ opus_int16 *out, /* O output signal */
+ const opus_int32 len /* I signal length (must be even) */
+)
+{
+ /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
+ opus_int k;
+ opus_int32 A0_U_Q28, A0_L_Q28, A1_U_Q28, A1_L_Q28, out32_Q14[ 2 ];
+
+ /* Negate A_Q28 values and split in two parts */
+ A0_L_Q28 = ( -A_Q28[ 0 ] ) & 0x00003FFF; /* lower part */
+ A0_U_Q28 = silk_RSHIFT( -A_Q28[ 0 ], 14 ); /* upper part */
+ A1_L_Q28 = ( -A_Q28[ 1 ] ) & 0x00003FFF; /* lower part */
+ A1_U_Q28 = silk_RSHIFT( -A_Q28[ 1 ], 14 ); /* upper part */
+
+ for( k = 0; k < len; k++ ) {
+ /* S[ 0 ], S[ 1 ], S[ 2 ], S[ 3 ]: Q12 */
+ out32_Q14[ 0 ] = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], in[ 2 * k + 0 ] ), 2 );
+ out32_Q14[ 1 ] = silk_LSHIFT( silk_SMLAWB( S[ 2 ], B_Q28[ 0 ], in[ 2 * k + 1 ] ), 2 );
+
+ S[ 0 ] = S[ 1 ] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 0 ], A0_L_Q28 ), 14 );
+ S[ 2 ] = S[ 3 ] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 1 ], A0_L_Q28 ), 14 );
+ S[ 0 ] = silk_SMLAWB( S[ 0 ], out32_Q14[ 0 ], A0_U_Q28 );
+ S[ 2 ] = silk_SMLAWB( S[ 2 ], out32_Q14[ 1 ], A0_U_Q28 );
+ S[ 0 ] = silk_SMLAWB( S[ 0 ], B_Q28[ 1 ], in[ 2 * k + 0 ] );
+ S[ 2 ] = silk_SMLAWB( S[ 2 ], B_Q28[ 1 ], in[ 2 * k + 1 ] );
+
+ S[ 1 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 0 ], A1_L_Q28 ), 14 );
+ S[ 3 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 1 ], A1_L_Q28 ), 14 );
+ S[ 1 ] = silk_SMLAWB( S[ 1 ], out32_Q14[ 0 ], A1_U_Q28 );
+ S[ 3 ] = silk_SMLAWB( S[ 3 ], out32_Q14[ 1 ], A1_U_Q28 );
+ S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], in[ 2 * k + 0 ] );
+ S[ 3 ] = silk_SMLAWB( S[ 3 ], B_Q28[ 2 ], in[ 2 * k + 1 ] );
+
+ /* Scale back to Q0 and saturate */
+ out[ 2 * k + 0 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14[ 0 ] + (1<<14) - 1, 14 ) );
+ out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14[ 1 ] + (1<<14) - 1, 14 ) );
}
}