From 89c5e06d4bd5a5e41dda53478f9f706d5196fdef Mon Sep 17 00:00:00 2001 From: Felicia Lim Date: Mon, 10 Feb 2020 14:36:35 -0800 Subject: Revert "Fixes to the the activity flag that is passed to Silk so it represents the final activity flag used in the DTX decision" This reverts commit ea3b30f946d0e3a8d5b88d1b71cac56fb87955fd. --- src/opus_encoder.c | 54 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 1ae5598a..844b08dd 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -892,15 +892,34 @@ static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, in #endif /* Decides if DTX should be turned on (=1) or off (=0) */ -static int decide_dtx_mode(opus_int activity, /* indicates if this frame contains speech/music */ - int *nb_no_activity_frames /* number of consecutive frames with no activity */ - ) - +static int decide_dtx_mode(float activity_probability, /* probability that current frame contains speech/music */ + int *nb_no_activity_frames, /* number of consecutive frames with no activity */ + opus_val32 peak_signal_energy, /* peak energy of desired signal detected so far */ + const opus_val16 *pcm, /* input pcm signal */ + int frame_size, /* frame size */ + int channels, + int is_silence, /* only digital silence detected in this frame */ + int arch + ) { - if (!activity) + opus_val32 noise_energy; + + if (!is_silence) + { + if (activity_probability < DTX_ACTIVITY_THRESHOLD) /* is noise */ + { + noise_energy = compute_frame_energy(pcm, frame_size, channels, arch); + + /* but is sufficiently quiet */ + is_silence = peak_signal_energy >= (PSEUDO_SNR_THRESHOLD * noise_energy); + } + } + + if (is_silence) { /* The number of consecutive DTX frames should be within the allowed bounds */ (*nb_no_activity_frames)++; + if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX) { if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX)) @@ -1083,8 +1102,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ int analysis_read_subframe_bak=-1; int is_silence = 0; #endif - opus_int activity = VAD_NO_DECISION; - VARDECL(opus_val16, tmp_prefill); ALLOC_STACK; @@ -1152,17 +1169,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (!is_silence) st->voice_ratio = -1; - if (analysis_info.valid) { - activity = !is_silence && analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD; - if (!activity) { - /* Mark as active if this noise frame is sufficiently loud */ - opus_val32 noise_energy = compute_frame_energy(pcm, frame_size, st->channels, st->arch); - activity = st->peak_signal_energy < (PSEUDO_SNR_THRESHOLD * noise_energy); - } - } else { - activity = !is_silence; - } - st->detected_bandwidth = 0; if (analysis_info.valid) { @@ -1662,6 +1668,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->mode != MODE_CELT_ONLY) { opus_int32 total_bitRate, celt_rate; + opus_int activity; #ifdef FIXED_POINT const opus_int16 *pcm_silk; #else @@ -1669,6 +1676,14 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ALLOC(pcm_silk, st->channels*frame_size, opus_int16); #endif + activity = VAD_NO_DECISION; +#ifndef DISABLE_FLOAT_API + if( analysis_info.valid ) { + /* Inform SILK about the Opus VAD decision */ + activity = ( analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD ); + } +#endif + /* Distribute bits between SILK and CELT */ total_bitRate = 8 * bytes_target * frame_rate; if( st->mode == MODE_HYBRID ) { @@ -2129,7 +2144,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifndef DISABLE_FLOAT_API if (st->use_dtx && (analysis_info.valid || is_silence)) { - if (decide_dtx_mode(activity, &st->nb_no_activity_frames)) + if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames, + st->peak_signal_energy, pcm, frame_size, st->channels, is_silence, st->arch)) { st->rangeFinal = 0; data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); -- cgit v1.2.3