aboutsummaryrefslogtreecommitdiff
path: root/silk/x86
diff options
context:
space:
mode:
authorFelicia Lim <flim@google.com>2017-07-05 17:36:56 -0700
committerFelicia Lim <flim@google.com>2017-07-07 09:42:55 -0700
commit0c2090c324e4f2ba2a8621c8b083559bab74c7c5 (patch)
treefea1ab0038bc4102569d1ab4ee57a0f973895570 /silk/x86
parenta7703b70699299f078a189e19b6915120cded732 (diff)
downloadlibopus-0c2090c324e4f2ba2a8621c8b083559bab74c7c5.tar.gz
Change-Id: I551f1de5c5e121ac1275334e67c7e0f96ab18114 Test: - verified builds for arm*/mips*/x86* - checked functionality using an emulator and stagefright
Diffstat (limited to 'silk/x86')
-rw-r--r--silk/x86/NSQ_del_dec_sse.c18
-rw-r--r--silk/x86/NSQ_sse.c3
-rw-r--r--silk/x86/main_sse.h45
-rw-r--r--silk/x86/x86_silk_map.c26
4 files changed, 27 insertions, 65 deletions
diff --git a/silk/x86/NSQ_del_dec_sse.c b/silk/x86/NSQ_del_dec_sse.c
index 21d4a8bc..c5212bee 100644
--- a/silk/x86/NSQ_del_dec_sse.c
+++ b/silk/x86/NSQ_del_dec_sse.c
@@ -107,12 +107,12 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
opus_int predictLPCOrder, /* I Prediction filter order */
opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
- opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
+ opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */
opus_int decisionDelay /* I */
);
void silk_NSQ_del_dec_sse4_1(
- const silk_encoder_state *psEncC, /* I/O Encoder State */
+ const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
@@ -234,7 +234,8 @@ void silk_NSQ_del_dec_sse4_1(
psDD = &psDelDec[ Winner_ind ];
last_smple_idx = smpl_buf_idx + decisionDelay;
for( i = 0; i < decisionDelay; i++ ) {
- last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
+ last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY;
+ if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY;
pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) );
@@ -285,7 +286,8 @@ void silk_NSQ_del_dec_sse4_1(
last_smple_idx = smpl_buf_idx + decisionDelay;
Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 );
for( i = 0; i < decisionDelay; i++ ) {
- last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
+ last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY;
+ if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY;
pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) );
@@ -299,7 +301,6 @@ void silk_NSQ_del_dec_sse4_1(
NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
/* Save quantized speech signal */
- /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */
silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
RESTORE_STACK;
@@ -333,7 +334,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
opus_int predictLPCOrder, /* I Prediction filter order */
opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
- opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
+ opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */
opus_int decisionDelay /* I */
)
{
@@ -638,8 +639,9 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
psSS[ 1 ].xq_Q14 = xq_Q14;
}
}
- *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */
- last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */
+ *smpl_buf_idx = ( *smpl_buf_idx - 1 ) % DECISION_DELAY;
+ if( *smpl_buf_idx < 0 ) *smpl_buf_idx += DECISION_DELAY;
+ last_smple_idx = ( *smpl_buf_idx + decisionDelay ) % DECISION_DELAY;
/* Find winner */
RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
diff --git a/silk/x86/NSQ_sse.c b/silk/x86/NSQ_sse.c
index bb3c5f19..6a9e6e96 100644
--- a/silk/x86/NSQ_sse.c
+++ b/silk/x86/NSQ_sse.c
@@ -71,7 +71,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
);
void silk_NSQ_sse4_1(
- const silk_encoder_state *psEncC, /* I/O Encoder State */
+ const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
@@ -233,7 +233,6 @@ void silk_NSQ_sse4_1(
NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
/* Save quantized speech and noise shaping signals */
- /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */
silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
RESTORE_STACK;
diff --git a/silk/x86/main_sse.h b/silk/x86/main_sse.h
index d8d61310..2f15d448 100644
--- a/silk/x86/main_sse.h
+++ b/silk/x86/main_sse.h
@@ -34,6 +34,7 @@
# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */
# define OVERRIDE_silk_VQ_WMat_EC
void silk_VQ_WMat_EC_sse4_1(
@@ -79,11 +80,13 @@ extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
mu_Q9, max_gain_Q7, L))
#endif
+#endif
+#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */
# define OVERRIDE_silk_NSQ
void silk_NSQ_sse4_1(
- const silk_encoder_state *psEncC, /* I/O Encoder State */
+ const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
@@ -110,7 +113,7 @@ void silk_NSQ_sse4_1(
#else
extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
- const silk_encoder_state *psEncC, /* I/O Encoder State */
+ const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
@@ -137,7 +140,7 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
# define OVERRIDE_silk_NSQ_del_dec
void silk_NSQ_del_dec_sse4_1(
- const silk_encoder_state *psEncC, /* I/O Encoder State */
+ const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
@@ -164,7 +167,7 @@ void silk_NSQ_del_dec_sse4_1(
#else
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
- const silk_encoder_state *psEncC, /* I/O Encoder State */
+ const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
@@ -187,6 +190,7 @@ extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
#endif
+#endif
void silk_noise_shape_quantizer(
silk_nsq_state *NSQ, /* I/O NSQ state */
@@ -238,39 +242,6 @@ extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
silk_encoder_state *psEnC,
const opus_int16 pIn[]);
-# define OVERRIDE_silk_warped_LPC_analysis_filter_FIX
-
-#endif
-
-void silk_warped_LPC_analysis_filter_FIX_sse4_1(
- opus_int32 state[], /* I/O State [order + 1] */
- opus_int32 res_Q2[], /* O Residual signal [length] */
- const opus_int16 coef_Q13[], /* I Coefficients [order] */
- const opus_int16 input[], /* I Input signal [length] */
- const opus_int16 lambda_Q16, /* I Warping factor */
- const opus_int length, /* I Length of input signal */
- const opus_int order /* I Filter order (even) */
-);
-
-#if defined(OPUS_X86_PRESUME_SSE4_1)
-#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \
- ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order))
-
-#else
-
-extern void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[OPUS_ARCHMASK + 1])(
- opus_int32 state[], /* I/O State [order + 1] */
- opus_int32 res_Q2[], /* O Residual signal [length] */
- const opus_int16 coef_Q13[], /* I Coefficients [order] */
- const opus_int16 input[], /* I Input signal [length] */
- const opus_int16 lambda_Q16, /* I Warping factor */
- const opus_int length, /* I Length of input signal */
- const opus_int order /* I Filter order (even) */
-);
-
-# define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \
- ((*SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[(arch) & OPUS_ARCHMASK])(state, res_Q2, coef_Q13, input, lambda_Q16, length, order))
-
#endif
# endif
diff --git a/silk/x86/x86_silk_map.c b/silk/x86/x86_silk_map.c
index 818841f2..32dcc3ca 100644
--- a/silk/x86/x86_silk_map.c
+++ b/silk/x86/x86_silk_map.c
@@ -66,8 +66,9 @@ opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )(
MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */
};
+#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */
void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )(
- const silk_encoder_state *psEncC, /* I/O Encoder State */
+ const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
@@ -89,7 +90,9 @@ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )(
MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */
MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */
};
+#endif
+#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */
void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )(
opus_int8 *ind, /* O index of best codebook vector */
opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */
@@ -109,9 +112,11 @@ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )(
MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */
MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */
};
+#endif
+#if 0 /* FIXME: SSE disabled until the NSQ code gets updated. */
void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )(
- const silk_encoder_state *psEncC, /* I/O Encoder State */
+ const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
@@ -133,25 +138,10 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )(
MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */
MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */
};
+#endif
#if defined(FIXED_POINT)
-void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[ OPUS_ARCHMASK + 1 ] )(
- opus_int32 state[], /* I/O State [order + 1] */
- opus_int32 res_Q2[], /* O Residual signal [length] */
- const opus_int16 coef_Q13[], /* I Coefficients [order] */
- const opus_int16 input[], /* I Input signal [length] */
- const opus_int16 lambda_Q16, /* I Warping factor */
- const opus_int length, /* I Length of input signal */
- const opus_int order /* I Filter order (even) */
-) = {
- silk_warped_LPC_analysis_filter_FIX_c, /* non-sse */
- silk_warped_LPC_analysis_filter_FIX_c,
- silk_warped_LPC_analysis_filter_FIX_c,
- MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ), /* sse4.1 */
- MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ) /* avx */
-};
-
void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )(
opus_int32 *res_nrg, /* O Residual energy */
opus_int *res_nrg_Q, /* O Residual energy Q value */