diff options
author | Timothy B. Terriberry <tterribe@xiph.org> | 2013-11-18 13:30:13 -0500 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2013-11-18 13:41:17 -0500 |
commit | 39386e0b85ec0f978aa104d312604badb9047d58 (patch) | |
tree | e1171628bb638ec1b770b049e39609d7a268c584 /silk | |
parent | 530198f955e49571b3f890b4da4d933a4cd5df4e (diff) | |
download | libopus-39386e0b85ec0f978aa104d312604badb9047d58.tar.gz |
Adds Neon assembly for correlation/convolution
Optimizing celt_pitch_xcorr()/xcorr_kernel() which also speeds up
FIRs, IIRs and auto-correlations
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
Diffstat (limited to 'silk')
-rw-r--r-- | silk/API.h | 1 | ||||
-rw-r--r-- | silk/SigProc_FIX.h | 13 | ||||
-rw-r--r-- | silk/enc_API.c | 10 | ||||
-rw-r--r-- | silk/fixed/autocorr_FIX.c | 5 | ||||
-rw-r--r-- | silk/fixed/burg_modified_FIX.c | 5 | ||||
-rw-r--r-- | silk/fixed/encode_frame_FIX.c | 4 | ||||
-rw-r--r-- | silk/fixed/find_LPC_FIX.c | 4 | ||||
-rw-r--r-- | silk/fixed/find_pitch_lags_FIX.c | 8 | ||||
-rw-r--r-- | silk/fixed/main_FIX.h | 9 | ||||
-rw-r--r-- | silk/fixed/noise_shape_analysis_FIX.c | 5 | ||||
-rw-r--r-- | silk/fixed/pitch_analysis_core_FIX.c | 15 | ||||
-rw-r--r-- | silk/float/SigProc_FLP.h | 3 | ||||
-rw-r--r-- | silk/float/encode_frame_FLP.c | 2 | ||||
-rw-r--r-- | silk/float/find_pitch_lags_FLP.c | 5 | ||||
-rw-r--r-- | silk/float/main_FLP.h | 6 | ||||
-rw-r--r-- | silk/float/pitch_analysis_core_FLP.c | 15 | ||||
-rw-r--r-- | silk/init_encoder.c | 6 | ||||
-rw-r--r-- | silk/macros.h | 4 | ||||
-rw-r--r-- | silk/structs.h | 2 |
19 files changed, 75 insertions, 47 deletions
@@ -64,6 +64,7 @@ opus_int silk_Get_Encoder_Size( /* O Returns error co /*************************/ opus_int silk_InitEncoder( /* O Returns error code */ void *encState, /* I/O State */ + int arch, /* I Run-time architecture */ silk_EncControlStruct *encStatus /* O Encoder Status */ ); diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h index 97f79d45..1b580579 100644 --- a/silk/SigProc_FIX.h +++ b/silk/SigProc_FIX.h @@ -227,7 +227,8 @@ void silk_autocorr( opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *inputData, /* I Input data to correlate */ const opus_int inputDataSize, /* I Length of input */ - const opus_int correlationCount /* I Number of correlation taps to compute */ + const opus_int correlationCount, /* I Number of correlation taps to compute */ + int arch /* I Run-time architecture */ ); void silk_decode_pitch( @@ -249,7 +250,8 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ const opus_int Fs_kHz, /* I Sample frequency (kHz) */ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr /* I number of 5 ms subframes */ + const opus_int nb_subfr, /* I number of 5 ms subframes */ + int arch /* I Run-time architecture */ ); /* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */ @@ -309,7 +311,8 @@ void silk_burg_modified( const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D /* I Order */ + const opus_int D, /* I Order */ + int arch /* I Run-time architecture */ ); /* Copy and multiply a vector by a constant */ @@ -576,11 +579,11 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) #include "MacroCount.h" #include "MacroDebug.h" -#ifdef ARMv4_ASM +#ifdef OPUS_ARM_INLINE_ASM #include "arm/SigProc_FIX_armv4.h" #endif -#ifdef ARMv5E_ASM +#ifdef OPUS_ARM_INLINE_EDSP #include "arm/SigProc_FIX_armv5e.h" #endif diff --git a/silk/enc_API.c b/silk/enc_API.c index b3424872..43739efc 100644 --- a/silk/enc_API.c +++ b/silk/enc_API.c @@ -69,6 +69,7 @@ opus_int silk_Get_Encoder_Size( /* O Returns error co /*************************/ opus_int silk_InitEncoder( /* O Returns error code */ void *encState, /* I/O State */ + int arch, /* I Run-time architecture */ silk_EncControlStruct *encStatus /* O Encoder Status */ ) { @@ -80,7 +81,7 @@ opus_int silk_InitEncoder( /* O Returns error co /* Reset encoder */ silk_memset( psEnc, 0, sizeof( silk_encoder ) ); for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) { - if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ] ) ) { + if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) { silk_assert( 0 ); } } @@ -174,7 +175,7 @@ opus_int silk_Encode( /* O Returns error co if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) { /* Mono -> Stereo transition: init state of second channel and stereo state */ - ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ] ); + ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch ); silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) ); silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) ); psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0; @@ -206,9 +207,8 @@ opus_int silk_Encode( /* O Returns error co } /* Reset Encoder */ for( n = 0; n < encControl->nChannelsInternal; n++ ) { - if( (ret = silk_init_encoder( &psEnc->state_Fxx[ n ] ) ) != 0 ) { - silk_assert( 0 ); - } + ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch ); + silk_assert( !ret ); } tmp_payloadSize_ms = encControl->payloadSize_ms; encControl->payloadSize_ms = 10; diff --git a/silk/fixed/autocorr_FIX.c b/silk/fixed/autocorr_FIX.c index dec3cc0b..de95c986 100644 --- a/silk/fixed/autocorr_FIX.c +++ b/silk/fixed/autocorr_FIX.c @@ -38,10 +38,11 @@ void silk_autocorr( opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *inputData, /* I Input data to correlate */ const opus_int inputDataSize, /* I Length of input */ - const opus_int correlationCount /* I Number of correlation taps to compute */ + const opus_int correlationCount, /* I Number of correlation taps to compute */ + int arch /* I Run-time architecture */ ) { opus_int corrCount; corrCount = silk_min_int( inputDataSize, correlationCount ); - *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize); + *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize, arch); } diff --git a/silk/fixed/burg_modified_FIX.c b/silk/fixed/burg_modified_FIX.c index f5112b64..db348295 100644 --- a/silk/fixed/burg_modified_FIX.c +++ b/silk/fixed/burg_modified_FIX.c @@ -50,7 +50,8 @@ void silk_burg_modified( const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D /* I Order */ + const opus_int D, /* I Order */ + int arch /* I Run-time architecture */ ) { opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; @@ -98,7 +99,7 @@ void silk_burg_modified( int i; opus_int32 d; x_ptr = x + s * subfr_length; - celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D ); + celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); for( n = 1; n < D + 1; n++ ) { for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] ); diff --git a/silk/fixed/encode_frame_FIX.c b/silk/fixed/encode_frame_FIX.c index 9c57a10a..b490986b 100644 --- a/silk/fixed/encode_frame_FIX.c +++ b/silk/fixed/encode_frame_FIX.c @@ -132,12 +132,12 @@ opus_int silk_encode_frame_FIX( /*****************************************/ /* Find pitch lags, initial LPC analysis */ /*****************************************/ - silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame ); + silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); /************************/ /* Noise shape analysis */ /************************/ - silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame ); + silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch ); /***************************************************/ /* Find linear prediction coefficients (LPC + LTP) */ diff --git a/silk/fixed/find_LPC_FIX.c b/silk/fixed/find_LPC_FIX.c index 70cefb6b..783d32e2 100644 --- a/silk/fixed/find_LPC_FIX.c +++ b/silk/fixed/find_LPC_FIX.c @@ -60,13 +60,13 @@ void silk_find_LPC_FIX( psEncC->indices.NLSFInterpCoef_Q2 = 4; /* Burg AR analysis for the full frame */ - silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder ); + silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch ); if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) { VARDECL( opus_int16, LPC_res ); /* Optimal solution for last 10 ms */ - silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder ); + silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch ); /* subtract residual energy here, as that's easier than adding it to the */ /* residual energy of the first 10 ms in each iteration of the search below */ diff --git a/silk/fixed/find_pitch_lags_FIX.c b/silk/fixed/find_pitch_lags_FIX.c index f60b4364..620f8dcd 100644 --- a/silk/fixed/find_pitch_lags_FIX.c +++ b/silk/fixed/find_pitch_lags_FIX.c @@ -38,7 +38,8 @@ void silk_find_pitch_lags_FIX( silk_encoder_state_FIX *psEnc, /* I/O encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ opus_int16 res[], /* O residual */ - const opus_int16 x[] /* I Speech signal */ + const opus_int16 x[], /* I Speech signal */ + int arch /* I Run-time architecture */ ) { opus_int buf_len, i, scale; @@ -86,7 +87,7 @@ void silk_find_pitch_lags_FIX( silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch ); /* Calculate autocorrelation sequence */ - silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 ); + silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch ); /* Add white noise, as fraction of energy */ auto_corr[ 0 ] = silk_SMLAWB( auto_corr[ 0 ], auto_corr[ 0 ], SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ) + 1; @@ -127,7 +128,8 @@ void silk_find_pitch_lags_FIX( /*****************************************/ if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16, - (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 ) + (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, + psEnc->sCmn.arch) == 0 ) { psEnc->sCmn.indices.signalType = TYPE_VOICED; } else { diff --git a/silk/fixed/main_FIX.h b/silk/fixed/main_FIX.h index d999b13f..a56ca07a 100644 --- a/silk/fixed/main_FIX.h +++ b/silk/fixed/main_FIX.h @@ -73,7 +73,8 @@ opus_int silk_encode_frame_FIX( /* Initializes the Silk encoder state */ opus_int silk_init_encoder( - silk_encoder_state_Fxx *psEnc /* I/O Pointer to Silk FIX encoder state */ + silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */ + int arch /* I Run-time architecture */ ); /* Control the Silk encoder */ @@ -104,7 +105,8 @@ void silk_noise_shape_analysis_FIX( silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ - const opus_int16 *x /* I Input signal [ frame_length + la_shape ] */ + const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ + int arch /* I Run-time architecture */ ); /* Autocorrelations for a warped frequency axis */ @@ -132,7 +134,8 @@ void silk_find_pitch_lags_FIX( silk_encoder_state_FIX *psEnc, /* I/O encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ opus_int16 res[], /* O residual */ - const opus_int16 x[] /* I Speech signal */ + const opus_int16 x[], /* I Speech signal */ + int arch /* I Run-time architecture */ ); /* Find LPC and LTP coefficients */ diff --git a/silk/fixed/noise_shape_analysis_FIX.c b/silk/fixed/noise_shape_analysis_FIX.c index d381fa3d..e24d2e9d 100644 --- a/silk/fixed/noise_shape_analysis_FIX.c +++ b/silk/fixed/noise_shape_analysis_FIX.c @@ -145,7 +145,8 @@ void silk_noise_shape_analysis_FIX( silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ - const opus_int16 *x /* I Input signal [ frame_length + la_shape ] */ + const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ + int arch /* I Run-time architecture */ ) { silk_shape_state_FIX *psShapeSt = &psEnc->sShape; @@ -281,7 +282,7 @@ void silk_noise_shape_analysis_FIX( silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); } else { /* Calculate regular auto correlation */ - silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 ); + silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); } /* Add white noise, as a fraction of energy */ diff --git a/silk/fixed/pitch_analysis_core_FIX.c b/silk/fixed/pitch_analysis_core_FIX.c index b2c6cbb4..1641a0fb 100644 --- a/silk/fixed/pitch_analysis_core_FIX.c +++ b/silk/fixed/pitch_analysis_core_FIX.c @@ -62,7 +62,8 @@ static void silk_P_Ana_calc_corr_st3( opus_int start_lag, /* I lag offset to search around */ opus_int sf_length, /* I length of a 5 ms subframe */ opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ ); static void silk_P_Ana_calc_energy_st3( @@ -88,7 +89,8 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ const opus_int Fs_kHz, /* I Sample frequency (kHz) */ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr /* I number of 5 ms subframes */ + const opus_int nb_subfr, /* I number of 5 ms subframes */ + int arch /* I Run-time architecture */ ) { VARDECL( opus_int16, frame_8kHz ); @@ -189,7 +191,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 silk_assert( basis_ptr >= frame_4kHz ); silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); - celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1 ); + celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch ); /* Calculate first vector products before loop */ cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ]; @@ -516,7 +518,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 /* Calculate the correlations and energies needed in stage 3 */ ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); - silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity ); + silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity ); lag_counter = 0; @@ -597,7 +599,8 @@ static void silk_P_Ana_calc_corr_st3( opus_int start_lag, /* I lag offset to search around */ opus_int sf_length, /* I length of a 5 ms subframe */ opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ ) { const opus_int16 *target_ptr; @@ -634,7 +637,7 @@ static void silk_P_Ana_calc_corr_st3( lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); - celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1 ); + celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch ); for( j = lag_low; j <= lag_high; j++ ) { silk_assert( lag_counter < SCRATCH_SIZE ); scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ]; diff --git a/silk/float/SigProc_FLP.h b/silk/float/SigProc_FLP.h index e7131f14..f0cb3733 100644 --- a/silk/float/SigProc_FLP.h +++ b/silk/float/SigProc_FLP.h @@ -94,7 +94,8 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */ const opus_int Fs_kHz, /* I sample frequency (kHz) */ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr /* I Number of 5 ms subframes */ + const opus_int nb_subfr, /* I Number of 5 ms subframes */ + int arch /* I Run-time architecture */ ); void silk_insertion_sort_decreasing_FLP( diff --git a/silk/float/encode_frame_FLP.c b/silk/float/encode_frame_FLP.c index df75db91..d54e2686 100644 --- a/silk/float/encode_frame_FLP.c +++ b/silk/float/encode_frame_FLP.c @@ -129,7 +129,7 @@ opus_int silk_encode_frame_FLP( /*****************************************/ /* Find pitch lags, initial LPC analysis */ /*****************************************/ - silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame ); + silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); /************************/ /* Noise shape analysis */ diff --git a/silk/float/find_pitch_lags_FLP.c b/silk/float/find_pitch_lags_FLP.c index 455db82f..f3b22d25 100644 --- a/silk/float/find_pitch_lags_FLP.c +++ b/silk/float/find_pitch_lags_FLP.c @@ -37,7 +37,8 @@ void silk_find_pitch_lags_FLP( silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ silk_float res[], /* O Residual */ - const silk_float x[] /* I Speech signal */ + const silk_float x[], /* I Speech signal */ + int arch /* I Run-time architecture */ ) { opus_int buf_len; @@ -116,7 +117,7 @@ void silk_find_pitch_lags_FLP( /*****************************************/ if( silk_pitch_analysis_core_FLP( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16 / 65536.0f, - thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 ) + thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, arch ) == 0 ) { psEnc->sCmn.indices.signalType = TYPE_VOICED; } else { diff --git a/silk/float/main_FLP.h b/silk/float/main_FLP.h index dbd34aef..71d58c20 100644 --- a/silk/float/main_FLP.h +++ b/silk/float/main_FLP.h @@ -71,7 +71,8 @@ opus_int silk_encode_frame_FLP( /* Initializes the Silk encoder state */ opus_int silk_init_encoder( - silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + int arch /* I Run-tim architecture */ ); /* Control the Silk encoder */ @@ -129,7 +130,8 @@ void silk_find_pitch_lags_FLP( silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ silk_float res[], /* O Residual */ - const silk_float x[] /* I Speech signal */ + const silk_float x[], /* I Speech signal */ + int arch /* I Run-time architecture */ ); /* Find LPC and LTP coefficients */ diff --git a/silk/float/pitch_analysis_core_FLP.c b/silk/float/pitch_analysis_core_FLP.c index b6bafe81..e58f041b 100644 --- a/silk/float/pitch_analysis_core_FLP.c +++ b/silk/float/pitch_analysis_core_FLP.c @@ -48,7 +48,8 @@ static void silk_P_Ana_calc_corr_st3( opus_int start_lag, /* I start lag */ opus_int sf_length, /* I sub frame length */ opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ ); static void silk_P_Ana_calc_energy_st3( @@ -74,7 +75,8 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */ const opus_int Fs_kHz, /* I sample frequency (kHz) */ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr /* I Number of 5 ms subframes */ + const opus_int nb_subfr, /* I Number of 5 ms subframes */ + int arch /* I Run-time architecture */ ) { opus_int i, k, d, j; @@ -176,7 +178,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, silk_assert( basis_ptr >= frame_4kHz ); silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); - celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1 ); + celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch ); /* Calculate first vector products before loop */ cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ]; @@ -409,7 +411,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, CCmax = -1000.0f; /* Calculate the correlations and energies needed in stage 3 */ - silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity ); + silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch ); silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity ); lag_counter = 0; @@ -493,7 +495,8 @@ static void silk_P_Ana_calc_corr_st3( opus_int start_lag, /* I start lag */ opus_int sf_length, /* I sub frame length */ opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ ) { const silk_float *target_ptr; @@ -527,7 +530,7 @@ static void silk_P_Ana_calc_corr_st3( lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); - celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1 ); + celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch ); for( j = lag_low; j <= lag_high; j++ ) { silk_assert( lag_counter < SCRATCH_SIZE ); scratch_mem[ lag_counter ] = xcorr[ lag_high - j ]; diff --git a/silk/init_encoder.c b/silk/init_encoder.c index 7a4d918c..65995c33 100644 --- a/silk/init_encoder.c +++ b/silk/init_encoder.c @@ -34,12 +34,14 @@ POSSIBILITY OF SUCH DAMAGE. #include "main_FLP.h" #endif #include "tuning_parameters.h" +#include "cpu_support.h" /*********************************/ /* Initialize Silk Encoder state */ /*********************************/ opus_int silk_init_encoder( - silk_encoder_state_Fxx *psEnc /* I/O Pointer to Silk FIX encoder state */ + silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */ + int arch /* I Run-time architecture */ ) { opus_int ret = 0; @@ -47,6 +49,8 @@ opus_int silk_init_encoder( /* Clear the entire encoder state */ silk_memset( psEnc, 0, sizeof( silk_encoder_state_Fxx ) ); + psEnc->sCmn.arch = arch; + psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 ); psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15; diff --git a/silk/macros.h b/silk/macros.h index 976f3b7d..a84e5a5d 100644 --- a/silk/macros.h +++ b/silk/macros.h @@ -103,11 +103,11 @@ static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32) (*((Matrix_base_adr) + ((row)+(M)*(column)))) #endif -#ifdef ARMv4_ASM +#ifdef OPUS_ARM_INLINE_ASM #include "arm/macros_armv4.h" #endif -#ifdef ARMv5E_ASM +#ifdef OPUS_ARM_INLINE_EDSP #include "arm/macros_armv5e.h" #endif diff --git a/silk/structs.h b/silk/structs.h index 4ccd0c1a..aa84a528 100644 --- a/silk/structs.h +++ b/silk/structs.h @@ -191,6 +191,8 @@ typedef struct { SideInfoIndices indices; opus_int8 pulses[ MAX_FRAME_LENGTH ]; + int arch; + /* Input/output buffering */ opus_int16 inputBuf[ MAX_FRAME_LENGTH + 2 ]; /* Buffer containing input signal */ opus_int inputBufIx; |