diff options
author | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2019-02-12 19:33:26 -0500 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2019-02-13 13:51:08 -0500 |
commit | 843121b356685ff5a8c40211951f392f77f689cc (patch) | |
tree | f8896b370ce4e45f01ca4ca4b50b065a464b7888 /src/analysis.c | |
parent | 0619d0848520ce2cd45ec3c9fc3a2c9743b4608e (diff) | |
download | libopus-843121b356685ff5a8c40211951f392f77f689cc.tar.gz |
Fixes analysis buffering for silence and complexity changes
The previous code would go out of sync in those cases.
Diffstat (limited to 'src/analysis.c')
-rw-r--r-- | src/analysis.c | 48 |
1 files changed, 39 insertions, 9 deletions
diff --git a/src/analysis.c b/src/analysis.c index 23f6fa56..6907a631 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -249,6 +249,15 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int if (curr_lookahead<0) curr_lookahead += DETECT_SIZE; + tonal->read_subframe += len/(tonal->Fs/400); + while (tonal->read_subframe>=8) + { + tonal->read_subframe -= 8; + tonal->read_pos++; + } + if (tonal->read_pos>=DETECT_SIZE) + tonal->read_pos-=DETECT_SIZE; + /* On long frames, look at the second analysis window rather than the first. */ if (len > tonal->Fs/50 && pos != tonal->write_pos) { @@ -262,6 +271,8 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int pos = DETECT_SIZE-1; pos0 = pos; OPUS_COPY(info_out, &tonal->info[pos], 1); + if (!info_out->valid) + return; tonality_max = tonality_avg = info_out->tonality; tonality_count = 1; /* Look at the neighbouring frames and pick largest bandwidth found (to be safe). */ @@ -393,14 +404,6 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int info_out->music_prob_max = prob_max; /* printf("%f %f %f %f %f\n", prob_min, prob_max, prob_avg/prob_count, vad_prob, info_out->music_prob); */ - tonal->read_subframe += len/(tonal->Fs/400); - while (tonal->read_subframe>=8) - { - tonal->read_subframe -= 8; - tonal->read_pos++; - } - if (tonal->read_pos>=DETECT_SIZE) - tonal->read_pos-=DETECT_SIZE; } static const float std_feature_bias[9] = { @@ -420,6 +423,24 @@ static const float std_feature_bias[9] = { #define SCALE_ENER(e) (e) #endif +#ifdef FIXED_POINT +static int is_digital_silence32(const opus_val32* pcm, int frame_size, int channels, int lsb_depth) +{ + int silence = 0; + opus_val32 sample_max = 0; +#ifdef MLP_TRAINING + return 0; +#endif + sample_max = celt_maxabs32(pcm, frame_size*channels); + + silence = (sample_max == 0); + (void)lsb_depth; + return silence; +} +#else +#define is_digital_silence32(pcm, frame_size, channels, lsb_depth) is_digital_silence(pcm, frame_size, channels, lsb_depth) +#endif + static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) { int i, b; @@ -464,8 +485,10 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt float layer_out[MAX_NEURONS]; float below_max_pitch; float above_max_pitch; + int is_silence; SAVE_STACK; + tonal->initialized = 1; alpha = 1.f/IMIN(10, 1+tonal->count); alphaE = 1.f/IMIN(25, 1+tonal->count); /* Noise floor related decay for bandwidth detection: -2.2 dB/second */ @@ -500,6 +523,8 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt if (tonal->write_pos>=DETECT_SIZE) tonal->write_pos-=DETECT_SIZE; + is_silence = is_digital_silence32(tonal->inmem, ANALYSIS_BUF_SIZE, 1, lsb_depth); + ALLOC(in, 480, kiss_fft_cpx); ALLOC(out, 480, kiss_fft_cpx); ALLOC(tonality, 240, float); @@ -518,6 +543,12 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt &tonal->inmem[240], tonal->downmix_state, remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C, tonal->Fs); tonal->mem_fill = 240 + remaining; + if (is_silence) + { + info->valid = 0; + RESTORE_STACK; + return; + } opus_fft(kfft, in, out, tonal->arch); #ifndef FIXED_POINT /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */ @@ -938,7 +969,6 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co analysis->analysis_offset -= frame_size; } - analysis_info->valid = 0; tonality_get_info(analysis, analysis_info, frame_size); } |