diff options
author | Linfeng Zhang <linfengz@google.com> | 2016-09-01 13:44:11 -0700 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2017-05-17 15:55:42 -0400 |
commit | 60eb7d88b4eace91395e553cf70fc4578a950353 (patch) | |
tree | 489d13c37dbb73fe5ad385df7057e5cef49d4d5b /silk | |
parent | 1eaa67c0dce1f642af872cc26b7ff4b057a55bfb (diff) | |
download | libopus-60eb7d88b4eace91395e553cf70fc4578a950353.tar.gz |
Update silk_biquad_alt()
Split to silk_biquad_alt_stride1() and silk_biquad_alt_stride2(),
so that it can be optimized more efficiently when stride is 2.
This change in C code is bit exact with the origin.
Change-Id: Idaefe670397016ace2a489e3435ac61b7dbe79d5
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
Diffstat (limited to 'silk')
-rw-r--r-- | silk/LP_variable_cutoff.c | 2 | ||||
-rw-r--r-- | silk/SigProc_FIX.h | 14 | ||||
-rw-r--r-- | silk/biquad_alt.c | 53 |
3 files changed, 60 insertions, 9 deletions
diff --git a/silk/LP_variable_cutoff.c b/silk/LP_variable_cutoff.c index f639e1f8..79112ad3 100644 --- a/silk/LP_variable_cutoff.c +++ b/silk/LP_variable_cutoff.c @@ -130,6 +130,6 @@ void silk_LP_variable_cutoff( /* ARMA low-pass filtering */ silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 ); - silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1); + silk_biquad_alt_stride1( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length); } } diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h index e0c39679..43f9ffac 100644 --- a/silk/SigProc_FIX.h +++ b/silk/SigProc_FIX.h @@ -100,14 +100,22 @@ void silk_resampler_down2_3( * slower than biquad() but uses more precise coefficients * can handle (slowly) varying coefficients */ -void silk_biquad_alt( +void silk_biquad_alt_stride1( const opus_int16 *in, /* I input signal */ const opus_int32 *B_Q28, /* I MA coefficients [3] */ const opus_int32 *A_Q28, /* I AR coefficients [2] */ opus_int32 *S, /* I/O State vector [2] */ opus_int16 *out, /* O output signal */ - const opus_int32 len, /* I signal length (must be even) */ - opus_int stride /* I Operate on interleaved signal if > 1 */ + const opus_int32 len /* I signal length (must be even) */ +); + +void silk_biquad_alt_stride2( + const opus_int16 *in, /* I input signal */ + const opus_int32 *B_Q28, /* I MA coefficients [3] */ + const opus_int32 *A_Q28, /* I AR coefficients [2] */ + opus_int32 *S, /* I/O State vector [4] */ + opus_int16 *out, /* O output signal */ + const opus_int32 len /* I signal length (must be even) */ ); /* Variable order MA prediction error filter. */ diff --git a/silk/biquad_alt.c b/silk/biquad_alt.c index d55f5ee9..73381294 100644 --- a/silk/biquad_alt.c +++ b/silk/biquad_alt.c @@ -39,14 +39,13 @@ POSSIBILITY OF SUCH DAMAGE. #include "SigProc_FIX.h" /* Second order ARMA filter, alternative implementation */ -void silk_biquad_alt( +void silk_biquad_alt_stride1( const opus_int16 *in, /* I input signal */ const opus_int32 *B_Q28, /* I MA coefficients [3] */ const opus_int32 *A_Q28, /* I AR coefficients [2] */ opus_int32 *S, /* I/O State vector [2] */ opus_int16 *out, /* O output signal */ - const opus_int32 len, /* I signal length (must be even) */ - opus_int stride /* I Operate on interleaved signal if > 1 */ + const opus_int32 len /* I signal length (must be even) */ ) { /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ @@ -61,7 +60,7 @@ void silk_biquad_alt( for( k = 0; k < len; k++ ) { /* S[ 0 ], S[ 1 ]: Q12 */ - inval = in[ k * stride ]; + inval = in[ k ]; out32_Q14 = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], inval ), 2 ); S[ 0 ] = S[1] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A0_L_Q28 ), 14 ); @@ -73,6 +72,50 @@ void silk_biquad_alt( S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], inval ); /* Scale back to Q0 and saturate */ - out[ k * stride ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) ); + out[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) ); + } +} + +void silk_biquad_alt_stride2( + const opus_int16 *in, /* I input signal */ + const opus_int32 *B_Q28, /* I MA coefficients [3] */ + const opus_int32 *A_Q28, /* I AR coefficients [2] */ + opus_int32 *S, /* I/O State vector [4] */ + opus_int16 *out, /* O output signal */ + const opus_int32 len /* I signal length (must be even) */ +) +{ + /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ + opus_int k; + opus_int32 A0_U_Q28, A0_L_Q28, A1_U_Q28, A1_L_Q28, out32_Q14[ 2 ]; + + /* Negate A_Q28 values and split in two parts */ + A0_L_Q28 = ( -A_Q28[ 0 ] ) & 0x00003FFF; /* lower part */ + A0_U_Q28 = silk_RSHIFT( -A_Q28[ 0 ], 14 ); /* upper part */ + A1_L_Q28 = ( -A_Q28[ 1 ] ) & 0x00003FFF; /* lower part */ + A1_U_Q28 = silk_RSHIFT( -A_Q28[ 1 ], 14 ); /* upper part */ + + for( k = 0; k < len; k++ ) { + /* S[ 0 ], S[ 1 ], S[ 2 ], S[ 3 ]: Q12 */ + out32_Q14[ 0 ] = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], in[ 2 * k + 0 ] ), 2 ); + out32_Q14[ 1 ] = silk_LSHIFT( silk_SMLAWB( S[ 2 ], B_Q28[ 0 ], in[ 2 * k + 1 ] ), 2 ); + + S[ 0 ] = S[ 1 ] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 0 ], A0_L_Q28 ), 14 ); + S[ 2 ] = S[ 3 ] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 1 ], A0_L_Q28 ), 14 ); + S[ 0 ] = silk_SMLAWB( S[ 0 ], out32_Q14[ 0 ], A0_U_Q28 ); + S[ 2 ] = silk_SMLAWB( S[ 2 ], out32_Q14[ 1 ], A0_U_Q28 ); + S[ 0 ] = silk_SMLAWB( S[ 0 ], B_Q28[ 1 ], in[ 2 * k + 0 ] ); + S[ 2 ] = silk_SMLAWB( S[ 2 ], B_Q28[ 1 ], in[ 2 * k + 1 ] ); + + S[ 1 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 0 ], A1_L_Q28 ), 14 ); + S[ 3 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 1 ], A1_L_Q28 ), 14 ); + S[ 1 ] = silk_SMLAWB( S[ 1 ], out32_Q14[ 0 ], A1_U_Q28 ); + S[ 3 ] = silk_SMLAWB( S[ 3 ], out32_Q14[ 1 ], A1_U_Q28 ); + S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], in[ 2 * k + 0 ] ); + S[ 3 ] = silk_SMLAWB( S[ 3 ], B_Q28[ 2 ], in[ 2 * k + 1 ] ); + + /* Scale back to Q0 and saturate */ + out[ 2 * k + 0 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14[ 0 ] + (1<<14) - 1, 14 ) ); + out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14[ 1 ] + (1<<14) - 1, 14 ) ); } } |