Add RNN for VAD and speech/music classification

Based on two dense layers with a GRU layer in the middle
author: Jean-Marc Valin <jmvalin@jmvalin.ca> 2017-07-12 16:55:28 -0400
committer: Jean-Marc Valin <jmvalin@jmvalin.ca> 2017-10-05 17:40:27 -0400
commit: af93fbd55fd5c23a2492166816311d9f67df1b24 (patch)
tree: 7221fd8dd284dd593e4b3eb1a3ed9cee3b4fc926 /src/analysis.h
parent: f3cff05eeb83ec8c055b7331338d705af220358d (diff)
download: libopus-af93fbd55fd5c23a2492166816311d9f67df1b24.tar.gz
1 files changed, 2 insertions, 13 deletions
diff --git a/src/analysis.h b/src/analysis.h
index cac51dfa..289c845e 100644
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -30,6 +30,7 @@
 
 #include "celt.h"
 #include "opus_private.h"
+#include "mlp.h"
 
 #define NB_FRAMES 8
 #define NB_TBANDS 18
@@ -64,28 +65,16 @@ typedef struct {
    float mem[32];
    float cmean[8];
    float std[9];
-   float music_prob;
-   float vad_prob;
    float Etracker;
    float lowECount;
    int E_count;
-   int last_music;
    int count;
    int analysis_offset;
-   /** Probability of having speech for time i to DETECT_SIZE-1 (and music before).
-       pspeech[0] is the probability that all frames in the window are speech. */
-   float pspeech[DETECT_SIZE];
-   /** Probability of having music for time i to DETECT_SIZE-1 (and speech before).
-       pmusic[0] is the probability that all frames in the window are music. */
-   float pmusic[DETECT_SIZE];
-   float speech_confidence;
-   float music_confidence;
-   int speech_confidence_count;
-   int music_confidence_count;
    int write_pos;
    int read_pos;
    int read_subframe;
    float hp_ener_accum;
+   float rnn_state[MAX_NEURONS];
    opus_val32 downmix_state[3];
    AnalysisInfo info[DETECT_SIZE];
 } TonalityAnalysisState;
author	Jean-Marc Valin <jmvalin@jmvalin.ca>	2017-07-12 16:55:28 -0400
committer	Jean-Marc Valin <jmvalin@jmvalin.ca>	2017-10-05 17:40:27 -0400
commit	af93fbd55fd5c23a2492166816311d9f67df1b24 (patch)
tree	7221fd8dd284dd593e4b3eb1a3ed9cee3b4fc926 /src/analysis.h
parent	f3cff05eeb83ec8c055b7331338d705af220358d (diff)
download	libopus-af93fbd55fd5c23a2492166816311d9f67df1b24.tar.gz