Upgrade Opus to v1.1.2android-wear-n-preview-3 android-wear-n-preview-1 android-n-preview-3

Change-Id: I8211751bab026ab236a612c6e0873f8bdbcd6c98
author: flim <flim@google.com> 2016-01-26 14:33:44 +0100
committer: flim <flim@google.com> 2016-03-04 17:40:32 +0100
commit: c91ee5b5642fcc4969150f73d5f6848f88bf1638 (patch)
tree: 900cb39b975dfed729e37e5810fef61fd92d7a70 /src
parent: 1391dbf0ccd121ce7a49d30e2142d36c8d404990 (diff)
download: libopus-c91ee5b5642fcc4969150f73d5f6848f88bf1638.tar.gz
13 files changed, 377 insertions, 199 deletions
diff --git a/src/analysis.c b/src/analysis.c
index 778a62aa..360ebcc8 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -39,8 +39,6 @@
 #include "mlp.h"
 #include "stack_alloc.h"
 
-extern const MLP net;
-
 #ifndef M_PI
 #define M_PI 3.141592653
 #endif
@@ -140,6 +138,21 @@ static OPUS_INLINE float fast_atan2f(float y, float x) {
    }
 }
 
+void tonality_analysis_init(TonalityAnalysisState *tonal)
+{
+  /* Initialize reusable fields. */
+  tonal->arch = opus_select_arch();
+  /* Clear remaining fields. */
+  tonality_analysis_reset(tonal);
+}
+
+void tonality_analysis_reset(TonalityAnalysisState *tonal)
+{
+  /* Clear non-reusable fields. */
+  char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START;
+  OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal));
+}
+
 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
 {
    int pos;
@@ -189,7 +202,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
    info_out->music_prob = psum;
 }
 
-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
+static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
 {
     int i, b;
     const kiss_fft_state *kfft;
@@ -262,7 +275,16 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
     remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
     downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
     tonal->mem_fill = 240 + remaining;
-    opus_fft(kfft, in, out);
+    opus_fft(kfft, in, out, tonal->arch);
+#ifndef FIXED_POINT
+    /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */
+    if (celt_isnan(out[0].r))
+    {
+       info->valid = 0;
+       RESTORE_STACK;
+       return;
+    }
+#endif
 
     for (i=1;i<N2;i++)
     {
@@ -334,6 +356,16 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
           tE += binE*tonality[i];
           nE += binE*2.f*(.5f-noisiness[i]);
        }
+#ifndef FIXED_POINT
+       /* Check for extreme band energies that could cause NaNs later. */
+       if (!(E<1e9f) || celt_isnan(E))
+       {
+          info->valid = 0;
+          RESTORE_STACK;
+          return;
+       }
+#endif
+
        tonal->E[tonal->E_count][b] = E;
        frame_noisiness += nE/(1e-15f+E);
 
@@ -611,8 +643,6 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
     /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
     info->noisiness = frame_noisiness;
     info->valid = 1;
-    if (info_out!=NULL)
-       OPUS_COPY(info_out, info, 1);
     RESTORE_STACK;
 }
 
@@ -631,7 +661,7 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
       pcm_len = analysis_frame_size - analysis->analysis_offset;
       offset = analysis->analysis_offset;
       do {
-         tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+         tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
          offset += 480;
          pcm_len -= 480;
       } while (pcm_len>0);
diff --git a/src/analysis.h b/src/analysis.h
index be0388fa..9eae56a5 100644
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -39,6 +39,8 @@
 #define DETECT_SIZE 200
 
 typedef struct {
+   int arch;
+#define TONALITY_ANALYSIS_RESET_START angle
    float angle[240];
    float d_angle[240];
    float d2_angle[240];
@@ -78,8 +80,19 @@ typedef struct {
    AnalysisInfo info[DETECT_SIZE];
 } TonalityAnalysisState;
 
-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
-     const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix);
+/** Initialize a TonalityAnalysisState struct.
+ *
+ * This performs some possibly slow initialization steps which should
+ * not be repeated every analysis step. No allocated memory is retained
+ * by the state struct, so no cleanup call is required.
+ */
+void tonality_analysis_init(TonalityAnalysisState *analysis);
+
+/** Reset a TonalityAnalysisState stuct.
+ *
+ * Call this when there's a discontinuity in the data.
+ */
+void tonality_analysis_reset(TonalityAnalysisState *analysis);
 
 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);
 
diff --git a/src/mlp.c b/src/mlp.c
index 46386026..ff9e50df 100644
--- a/src/mlp.c
+++ b/src/mlp.c
@@ -41,77 +41,82 @@
 #if 0
 static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
 {
-	int i;
-	opus_val16 xx; /* Q11 */
-	/*double x, y;*/
-	opus_val16 dy, yy; /* Q14 */
-	/*x = 1.9073e-06*_x;*/
-	if (_x>=QCONST32(8,19))
-		return QCONST32(1.,14);
-	if (_x<=-QCONST32(8,19))
-		return -QCONST32(1.,14);
-	xx = EXTRACT16(SHR32(_x, 8));
-	/*i = lrint(25*x);*/
-	i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
-	/*x -= .04*i;*/
-	xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
-	/*x = xx*(1./2048);*/
-	/*y = tansig_table[250+i];*/
-	yy = tansig_table[250+i];
-	/*y = yy*(1./16384);*/
-	dy = 16384-MULT16_16_Q14(yy,yy);
-	yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
-	return yy;
+    int i;
+    opus_val16 xx; /* Q11 */
+    /*double x, y;*/
+    opus_val16 dy, yy; /* Q14 */
+    /*x = 1.9073e-06*_x;*/
+    if (_x>=QCONST32(8,19))
+        return QCONST32(1.,14);
+    if (_x<=-QCONST32(8,19))
+        return -QCONST32(1.,14);
+    xx = EXTRACT16(SHR32(_x, 8));
+    /*i = lrint(25*x);*/
+    i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
+    /*x -= .04*i;*/
+    xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
+    /*x = xx*(1./2048);*/
+    /*y = tansig_table[250+i];*/
+    yy = tansig_table[250+i];
+    /*y = yy*(1./16384);*/
+    dy = 16384-MULT16_16_Q14(yy,yy);
+    yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
+    return yy;
 }
 #else
 /*extern const float tansig_table[501];*/
 static OPUS_INLINE float tansig_approx(float x)
 {
-	int i;
-	float y, dy;
-	float sign=1;
-	/* Tests are reversed to catch NaNs */
+    int i;
+    float y, dy;
+    float sign=1;
+    /* Tests are reversed to catch NaNs */
     if (!(x<8))
         return 1;
     if (!(x>-8))
         return -1;
-	if (x<0)
-	{
-	   x=-x;
-	   sign=-1;
-	}
-	i = (int)floor(.5f+25*x);
-	x -= .04f*i;
-	y = tansig_table[i];
-	dy = 1-y*y;
-	y = y + x*dy*(1 - y*x);
-	return sign*y;
+#ifndef FIXED_POINT
+    /* Another check in case of -ffast-math */
+    if (celt_isnan(x))
+       return 0;
+#endif
+    if (x<0)
+    {
+       x=-x;
+       sign=-1;
+    }
+    i = (int)floor(.5f+25*x);
+    x -= .04f*i;
+    y = tansig_table[i];
+    dy = 1-y*y;
+    y = y + x*dy*(1 - y*x);
+    return sign*y;
 }
 #endif
 
 #if 0
 void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
 {
-	int j;
-	opus_val16 hidden[MAX_NEURONS];
-	const opus_val16 *W = m->weights;
-	/* Copy to tmp_in */
-	for (j=0;j<m->topo[1];j++)
-	{
-		int k;
-		opus_val32 sum = SHL32(EXTEND32(*W++),8);
-		for (k=0;k<m->topo[0];k++)
-			sum = MAC16_16(sum, in[k],*W++);
-		hidden[j] = tansig_approx(sum);
-	}
-	for (j=0;j<m->topo[2];j++)
-	{
-		int k;
-		opus_val32 sum = SHL32(EXTEND32(*W++),14);
-		for (k=0;k<m->topo[1];k++)
-			sum = MAC16_16(sum, hidden[k], *W++);
-		out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
-	}
+    int j;
+    opus_val16 hidden[MAX_NEURONS];
+    const opus_val16 *W = m->weights;
+    /* Copy to tmp_in */
+    for (j=0;j<m->topo[1];j++)
+    {
+        int k;
+        opus_val32 sum = SHL32(EXTEND32(*W++),8);
+        for (k=0;k<m->topo[0];k++)
+            sum = MAC16_16(sum, in[k],*W++);
+        hidden[j] = tansig_approx(sum);
+    }
+    for (j=0;j<m->topo[2];j++)
+    {
+        int k;
+        opus_val32 sum = SHL32(EXTEND32(*W++),14);
+        for (k=0;k<m->topo[1];k++)
+            sum = MAC16_16(sum, hidden[k], *W++);
+        out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
+    }
 }
 #else
 void mlp_process(const MLP *m, const float *in, float *out)
diff --git a/src/mlp.h b/src/mlp.h
index 86c8e061..618e246e 100644
--- a/src/mlp.h
+++ b/src/mlp.h
@@ -31,11 +31,13 @@
 #include "arch.h"
 
 typedef struct {
-	int layers;
-	const int *topo;
-	const float *weights;
+    int layers;
+    const int *topo;
+    const float *weights;
 } MLP;
 
+extern const MLP net;
+
 void mlp_process(const MLP *m, const float *in, float *out);
 
 #endif /* _MLP_H_ */
diff --git a/src/mlp_data.c b/src/mlp_data.c
index 401c4c02..c2fda4e2 100644
--- a/src/mlp_data.c
+++ b/src/mlp_data.c
@@ -1,6 +1,10 @@
 /* The contents of this file was automatically generated by mlp_train.c
    It contains multi-layer perceptron (MLP) weights. */
 
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
 #include "mlp.h"
 
 /* RMS error was 0.138320, seed was 1361535663 */
diff --git a/src/opus.c b/src/opus.c
index 30890b9c..e9ce93b3 100644
--- a/src/opus.c
+++ b/src/opus.c
@@ -166,6 +166,27 @@ static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *siz
    }
 }
 
+int opus_packet_get_samples_per_frame(const unsigned char *data,
+      opus_int32 Fs)
+{
+   int audiosize;
+   if (data[0]&0x80)
+   {
+      audiosize = ((data[0]>>3)&0x3);
+      audiosize = (Fs<<audiosize)/400;
+   } else if ((data[0]&0x60) == 0x60)
+   {
+      audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
+   } else {
+      audiosize = ((data[0]>>3)&0x3);
+      if (audiosize == 3)
+         audiosize = Fs*60/1000;
+      else
+         audiosize = (Fs<<audiosize)/100;
+   }
+   return audiosize;
+}
+
 int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
       int self_delimited, unsigned char *out_toc,
       const unsigned char *frames[48], opus_int16 size[48],
diff --git a/src/opus_decoder.c b/src/opus_decoder.c
index 919ba521..080bec50 100644
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -33,7 +33,7 @@
 # error "OPUS_BUILD _MUST_ be defined to build Opus. This probably means you need other defines as well, as in a config.h. See the included build files for details."
 #endif
 
-#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__)
+#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) && !defined(OPUS_WILL_BE_SLOW)
 # pragma message "You appear to be compiling without optimization, if so opus will be very slow."
 #endif
 
@@ -59,6 +59,7 @@ struct OpusDecoder {
    opus_int32   Fs;          /** Sampling rate (at the API level) */
    silk_DecControlStruct DecControl;
    int          decode_gain;
+   int          arch;
 
    /* Everything beyond this point gets cleared on a reset */
 #define OPUS_DECODER_RESET_START stream_channels
@@ -77,12 +78,6 @@ struct OpusDecoder {
    opus_uint32  rangeFinal;
 };
 
-#ifdef FIXED_POINT
-static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
-   return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
-}
-#endif
-
 
 int opus_decoder_get_size(int channels)
 {
@@ -137,6 +132,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels)
 
    st->prev_mode = 0;
    st->frame_size = Fs/400;
+   st->arch = opus_select_arch();
    return OPUS_OK;
 }
 
@@ -215,7 +211,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
    VARDECL(opus_val16, pcm_transition_silk);
    int pcm_transition_celt_size;
    VARDECL(opus_val16, pcm_transition_celt);
-   opus_val16 *pcm_transition;
+   opus_val16 *pcm_transition=NULL;
    int redundant_audio_size;
    VARDECL(opus_val16, redundant_audio);
 
@@ -230,6 +226,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
    int F2_5, F5, F10, F20;
    const opus_val16 *window;
    opus_uint32 redundant_rng = 0;
+   int celt_accum;
    ALLOC_STACK;
 
    silk_dec = (char*)st+st->silk_dec_offset;
@@ -295,6 +292,14 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
       }
    }
 
+   /* In fixed-point, we can tell CELT to do the accumulation on top of the
+      SILK PCM buffer. This saves some stack space. */
+#ifdef FIXED_POINT
+   celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10);
+#else
+   celt_accum = 0;
+#endif
+
    pcm_transition_silk_size = ALLOC_NONE;
    pcm_transition_celt_size = ALLOC_NONE;
    if (data!=NULL && st->prev_mode > 0 && (
@@ -325,14 +330,20 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
    }
 
    /* Don't allocate any memory when in CELT-only mode */
-   pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;
+   pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;
    ALLOC(pcm_silk, pcm_silk_size, opus_int16);
 
    /* SILK processing */
    if (mode != MODE_CELT_ONLY)
    {
       int lost_flag, decoded_samples;
-      opus_int16 *pcm_ptr = pcm_silk;
+      opus_int16 *pcm_ptr;
+#ifdef FIXED_POINT
+      if (celt_accum)
+         pcm_ptr = pcm;
+      else
+#endif
+         pcm_ptr = pcm_silk;
 
       if (st->prev_mode==MODE_CELT_ONLY)
          silk_InitDecoder( silk_dec );
@@ -366,7 +377,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
         /* Call SILK decoder */
         int first_frame = decoded_samples == 0;
         silk_ret = silk_Decode( silk_dec, &st->DecControl,
-                                lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size );
+                                lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size, st->arch );
         if( silk_ret ) {
            if (lost_flag) {
               /* PLC failure should not be fatal */
@@ -462,7 +473,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
    {
       celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
       celt_decode_with_ec(celt_dec, data+len, redundancy_bytes,
-                          redundant_audio, F5, NULL);
+                          redundant_audio, F5, NULL, 0);
       celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
    }
 
@@ -477,25 +488,28 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
          celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
       /* Decode CELT */
       celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data,
-                                     len, pcm, celt_frame_size, &dec);
+                                     len, pcm, celt_frame_size, &dec, celt_accum);
    } else {
       unsigned char silence[2] = {0xFF, 0xFF};
-      for (i=0;i<frame_size*st->channels;i++)
-         pcm[i] = 0;
+      if (!celt_accum)
+      {
+         for (i=0;i<frame_size*st->channels;i++)
+            pcm[i] = 0;
+      }
       /* For hybrid -> SILK transitions, we let the CELT MDCT
          do a fade-out by decoding a silence frame */
       if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) )
       {
          celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
-         celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL);
+         celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum);
       }
    }
 
-   if (mode != MODE_CELT_ONLY)
+   if (mode != MODE_CELT_ONLY && !celt_accum)
    {
 #ifdef FIXED_POINT
       for (i=0;i<frame_size*st->channels;i++)
-         pcm[i] = SAT16(pcm[i] + pcm_silk[i]);
+         pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i]));
 #else
       for (i=0;i<frame_size*st->channels;i++)
          pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]);
@@ -514,7 +528,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
       celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
       celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
 
-      celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL);
+      celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0);
       celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
       smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5,
                   pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs);
@@ -710,6 +724,7 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data,
 {
    VARDECL(opus_int16, out);
    int ret, i;
+   int nb_samples;
    ALLOC_STACK;
 
    if(frame_size<=0)
@@ -717,6 +732,14 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data,
       RESTORE_STACK;
       return OPUS_BAD_ARG;
    }
+   if (data != NULL && len > 0 && !decode_fec)
+   {
+      nb_samples = opus_decoder_get_nb_samples(st, data, len);
+      if (nb_samples>0)
+         frame_size = IMIN(frame_size, nb_samples);
+      else
+         return OPUS_INVALID_PACKET;
+   }
    ALLOC(out, frame_size*st->channels, opus_int16);
 
    ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0);
@@ -737,6 +760,7 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
 {
    VARDECL(float, out);
    int ret, i;
+   int nb_samples;
    ALLOC_STACK;
 
    if(frame_size<=0)
@@ -745,6 +769,14 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
       return OPUS_BAD_ARG;
    }
 
+   if (data != NULL && len > 0 && !decode_fec)
+   {
+      nb_samples = opus_decoder_get_nb_samples(st, data, len);
+      if (nb_samples>0)
+         frame_size = IMIN(frame_size, nb_samples);
+      else
+         return OPUS_INVALID_PACKET;
+   }
    ALLOC(out, frame_size*st->channels, float);
 
    ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1);
@@ -904,27 +936,6 @@ int opus_packet_get_bandwidth(const unsigned char *data)
    return bandwidth;
 }
 
-int opus_packet_get_samples_per_frame(const unsigned char *data,
-      opus_int32 Fs)
-{
-   int audiosize;
-   if (data[0]&0x80)
-   {
-      audiosize = ((data[0]>>3)&0x3);
-      audiosize = (Fs<<audiosize)/400;
-   } else if ((data[0]&0x60) == 0x60)
-   {
-      audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
-   } else {
-      audiosize = ((data[0]>>3)&0x3);
-      if (audiosize == 3)
-         audiosize = Fs*60/1000;
-      else
-         audiosize = (Fs<<audiosize)/100;
-   }
-   return audiosize;
-}
-
 int opus_packet_get_nb_channels(const unsigned char *data)
 {
    return (data[0]&0x4) ? 2 : 1;
diff --git a/src/opus_demo.c b/src/opus_demo.c
index f8cdf031..9e99a3b2 100644
--- a/src/opus_demo.c
+++ b/src/opus_demo.c
@@ -48,7 +48,7 @@ void print_usage( char* argv[] )
         "<bits per second>  [options] <input> <output>\n", argv[0]);
     fprintf(stderr, "       %s -d <sampling rate (Hz)> <channels (1/2)> "
         "[options] <input> <output>\n\n", argv[0]);
-    fprintf(stderr, "mode: voip | audio | restricted-lowdelay\n" );
+    fprintf(stderr, "application: voip | audio | restricted-lowdelay\n" );
     fprintf(stderr, "options:\n" );
     fprintf(stderr, "-e                   : only runs the encoder (output the bit-stream)\n" );
     fprintf(stderr, "-d                   : only runs the decoder (reads the bit-stream as input)\n" );
@@ -245,14 +245,14 @@ int main(int argc, char *argv[])
     double bits=0.0, bits_max=0.0, bits_act=0.0, bits2=0.0, nrg;
     double tot_samples=0;
     opus_uint64 tot_in, tot_out;
-    int bandwidth=-1;
+    int bandwidth=OPUS_AUTO;
     const char *bandwidth_string;
     int lost = 0, lost_prev = 1;
     int toggle = 0;
     opus_uint32 enc_final_range[2];
     opus_uint32 dec_final_range;
     int encode_only=0, decode_only=0;
-    int max_frame_size = 960*6;
+    int max_frame_size = 48000*2;
     int curr_read=0;
     int sweep_bps = 0;
     int random_framesize=0, newsize=0, delayed_celt=0;
@@ -336,15 +336,12 @@ int main(int argc, char *argv[])
 
     /* defaults: */
     use_vbr = 1;
-    bandwidth = OPUS_AUTO;
     max_payload_bytes = MAX_PACKET;
     complexity = 10;
     use_inbandfec = 0;
     forcechannels = OPUS_AUTO;
     use_dtx = 0;
     packet_loss_perc = 0;
-    max_frame_size = 2*48000;
-    curr_read=0;
 
     while( args < argc - 2 ) {
         /* process command line options */
@@ -576,7 +573,7 @@ int main(int argc, char *argv[])
          bandwidth_string = "fullband";
          break;
     case OPUS_AUTO:
-         bandwidth_string = "auto";
+         bandwidth_string = "auto bandwidth";
          break;
     default:
          bandwidth_string = "unknown";
@@ -588,7 +585,7 @@ int main(int argc, char *argv[])
                        (long)sampling_rate, channels);
     else
        fprintf(stderr, "Encoding %ld Hz input at %.3f kb/s "
-                       "in %s mode with %d-sample frames.\n",
+                       "in %s with %d-sample frames.\n",
                        (long)sampling_rate, bitrate_bps*0.001,
                        bandwidth_string, frame_size);
 
@@ -637,7 +634,7 @@ int main(int argc, char *argv[])
             case 4: newsize=sampling_rate/25; break;
             case 5: newsize=3*sampling_rate/50; break;
             }
-            while (newsize < sampling_rate/25 && bitrate_bps-fabs(sweep_bps) <= 3*12*sampling_rate/newsize)
+            while (newsize < sampling_rate/25 && bitrate_bps-abs(sweep_bps) <= 3*12*sampling_rate/newsize)
                newsize*=2;
             if (newsize < sampling_rate/100 && frame_size >= sampling_rate/100)
             {
@@ -837,35 +834,39 @@ int main(int argc, char *argv[])
         }
 
         lost_prev = lost;
-
-        /* count bits */
-        bits += len[toggle]*8;
-        bits_max = ( len[toggle]*8 > bits_max ) ? len[toggle]*8 : bits_max;
         if( count >= use_inbandfec ) {
-            nrg = 0.0;
+            /* count bits */
+            bits += len[toggle]*8;
+            bits_max = ( len[toggle]*8 > bits_max ) ? len[toggle]*8 : bits_max;
+            bits2 += len[toggle]*len[toggle]*64;
             if (!decode_only)
             {
+                nrg = 0.0;
                 for ( k = 0; k < frame_size * channels; k++ ) {
                     nrg += in[ k ] * (double)in[ k ];
                 }
+                nrg /= frame_size * channels;
+                if( nrg > 1e5 ) {
+                    bits_act += len[toggle]*8;
+                    count_act++;
+                }
             }
-            if ( ( nrg / ( frame_size * channels ) ) > 1e5 ) {
-                bits_act += len[toggle]*8;
-                count_act++;
-            }
-            /* Variance */
-            bits2 += len[toggle]*len[toggle]*64;
         }
         count++;
         toggle = (toggle + use_inbandfec) & 1;
     }
+
+    /* Print out bitrate statistics */
+    if(decode_only)
+        frame_size = (int)(tot_samples / count);
+    count -= use_inbandfec;
     fprintf (stderr, "average bitrate:             %7.3f kb/s\n",
                      1e-3*bits*sampling_rate/tot_samples);
     fprintf (stderr, "maximum bitrate:             %7.3f kb/s\n",
                      1e-3*bits_max*sampling_rate/frame_size);
     if (!decode_only)
        fprintf (stderr, "active bitrate:              %7.3f kb/s\n",
-               1e-3*bits_act*sampling_rate/(frame_size*(double)count_act));
+               1e-3*bits_act*sampling_rate/(1e-15+frame_size*(double)count_act));
     fprintf (stderr, "bitrate standard deviation:  %7.3f kb/s\n",
             1e-3*sqrt(bits2/count - bits*bits/(count*(double)count))*sampling_rate/frame_size);
     /* Close any files to which intermediate results were stored */
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index fbd3de63..a7e19127 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -38,6 +38,7 @@
 #include "float_cast.h"
 #include "opus.h"
 #include "arch.h"
+#include "pitch.h"
 #include "opus_private.h"
 #include "os_support.h"
 #include "cpu_support.h"
@@ -80,6 +81,10 @@ struct OpusEncoder {
     int          lsb_depth;
     int          encoder_buffer;
     int          lfe;
+    int          arch;
+#ifndef DISABLE_FLOAT_API
+    TonalityAnalysisState analysis;
+#endif
 
 #define OPUS_ENCODER_RESET_START stream_channels
     int          stream_channels;
@@ -99,12 +104,9 @@ struct OpusEncoder {
     StereoWidthState width_mem;
     opus_val16   delay_buffer[MAX_ENCODER_BUFFER*2];
 #ifndef DISABLE_FLOAT_API
-    TonalityAnalysisState analysis;
     int          detected_bandwidth;
-    int          analysis_offset;
 #endif
     opus_uint32  rangeFinal;
-    int          arch;
 };
 
 /* Transition tables for the voice and music. First column is the
@@ -231,7 +233,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
     st->lsb_depth = 24;
     st->variable_duration = OPUS_FRAMESIZE_ARG;
 
-    /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead 
+    /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead
        + 1.5 ms for SILK resamplers and stereo prediction) */
     st->delay_compensation = st->Fs/250;
 
@@ -242,6 +244,10 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
     st->mode = MODE_HYBRID;
     st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
 
+#ifndef DISABLE_FLOAT_API
+    tonality_analysis_init(&st->analysis);
+#endif
+
     return OPUS_OK;
 }
 
@@ -648,7 +654,7 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_
    return best_state;
 }
 
-int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+static int optimize_framesize(const void *x, int len, int C, opus_int32 Fs,
                 int bitrate, opus_val16 tonality, float *mem, int buffering,
                 downmix_func downmix)
 {
@@ -660,6 +666,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
    int bestLM=0;
    int subframe;
    int pos;
+   int offset;
    VARDECL(opus_val32, sub);
 
    subframe = Fs/400;
@@ -670,9 +677,8 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
    {
       /* Consider the CELT delay when not in restricted-lowdelay */
       /* We assume the buffering is between 2.5 and 5 ms */
-      int offset = 2*subframe - buffering;
+      offset = 2*subframe - buffering;
       celt_assert(offset>=0 && offset <= subframe);
-      x += C*offset;
       len -= offset;
       e[1]=mem[1];
       e_1[1]=1.f/(EPSILON+mem[1]);
@@ -681,6 +687,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
       pos = 3;
    } else {
       pos=1;
+      offset=0;
    }
    N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE);
    /* Just silencing a warning, it's really initialized later */
@@ -692,7 +699,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
       int j;
       tmp=EPSILON;
 
-      downmix(x, sub, subframe, i*subframe, 0, -2, C);
+      downmix(x, sub, subframe, i*subframe+offset, 0, -2, C);
       if (i==0)
          memx = sub[0];
       for (j=0;j<subframe;j++)
@@ -836,6 +843,12 @@ opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
          LM--;
       frame_size = (Fs/400<<LM);
    } else
+#else
+   (void)analysis_pcm;
+   (void)C;
+   (void)bitrate_bps;
+   (void)delay_compensation;
+   (void)downmix;
 #endif
    {
       frame_size = frame_size_select(frame_size, variable_duration, Fs);
@@ -924,7 +937,8 @@ opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int3
 
 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
                 unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
-                const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix)
+                const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
+                int analysis_channels, downmix_func downmix, int float_api)
 {
     void *silk_enc;
     CELTEncoder *celt_enc;
@@ -954,9 +968,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     int total_buffer;
     opus_val16 stereo_width;
     const CELTMode *celt_mode;
+#ifndef DISABLE_FLOAT_API
     AnalysisInfo analysis_info;
     int analysis_read_pos_bak=-1;
     int analysis_read_subframe_bak=-1;
+#endif
     VARDECL(opus_val16, tmp_prefill);
 
     ALLOC_STACK;
@@ -982,9 +998,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
 
     lsb_depth = IMIN(lsb_depth, st->lsb_depth);
 
-    analysis_info.valid = 0;
     celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
 #ifndef DISABLE_FLOAT_API
+    analysis_info.valid = 0;
 #ifdef FIXED_POINT
     if (st->silk_mode.complexity >= 10 && st->Fs==48000)
 #else
@@ -997,6 +1013,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
              c1, c2, analysis_channels, st->Fs,
              lsb_depth, downmix, &analysis_info);
     }
+#else
+    (void)analysis_pcm;
+    (void)analysis_size;
 #endif
 
     st->voice_ratio = -1;
@@ -1377,7 +1396,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
              st->user_forced_mode = MODE_CELT_ONLY;
           tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50,
                 tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth,
-                NULL, 0, c1, c2, analysis_channels, downmix);
+                NULL, 0, c1, c2, analysis_channels, downmix, float_api);
           if (tmp_len<0)
           {
              RESTORE_STACK;
@@ -1424,8 +1443,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     ec_enc_init(&enc, data, max_data_bytes-1);
 
     ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16);
-    for (i=0;i<total_buffer*st->channels;i++)
-       pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i];
+    OPUS_COPY(pcm_buf, &st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels], total_buffer*st->channels);
 
     if (st->mode == MODE_CELT_ONLY)
        hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
@@ -1444,7 +1462,20 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     } else {
        dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
     }
-
+#ifndef FIXED_POINT
+    if (float_api)
+    {
+       opus_val32 sum;
+       sum = celt_inner_prod(&pcm_buf[total_buffer*st->channels], &pcm_buf[total_buffer*st->channels], frame_size*st->channels, st->arch);
+       /* This should filter out both NaNs and ridiculous signals that could
+          cause NaNs further down. */
+       if (!(sum < 1e9f) || celt_isnan(sum))
+       {
+          OPUS_CLEAR(&pcm_buf[total_buffer*st->channels], frame_size*st->channels);
+          st->hp_mem[0] = st->hp_mem[1] = st->hp_mem[2] = st->hp_mem[3] = 0;
+       }
+    }
+#endif
 
 
     /* SILK processing */
@@ -1599,8 +1630,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
             prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400);
             gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset,
                   0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs);
-            for(i=0;i<prefill_offset;i++)
-               st->delay_buffer[i]=0;
+            OPUS_CLEAR(st->delay_buffer, prefill_offset);
 #ifdef FIXED_POINT
             pcm_silk = st->delay_buffer;
 #else
@@ -1727,15 +1757,18 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16);
     if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
     {
-       for (i=0;i<st->channels*st->Fs/400;i++)
-          tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
+       OPUS_COPY(tmp_prefill, &st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels], st->channels*st->Fs/400);
     }
 
-    for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)
-        st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size];
-    for (;i<st->encoder_buffer*st->channels;i++)
-        st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i];
-
+    if (st->channels*(st->encoder_buffer-(frame_size+total_buffer)) > 0)
+    {
+       OPUS_MOVE(st->delay_buffer, &st->delay_buffer[st->channels*frame_size], st->channels*(st->encoder_buffer-frame_size-total_buffer));
+       OPUS_COPY(&st->delay_buffer[st->channels*(st->encoder_buffer-frame_size-total_buffer)],
+             &pcm_buf[0],
+             (frame_size+total_buffer)*st->channels);
+    } else {
+       OPUS_COPY(st->delay_buffer, &pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels], st->encoder_buffer*st->channels);
+    }
     /* gain_fade() and stereo_fade() need to be after the buffer copying
        because we don't want any of this to affect the SILK part */
     if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) {
@@ -1955,7 +1988,8 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra
 
    for (i=0;i<frame_size*st->channels;i++)
       in[i] = FLOAT2INT16(pcm[i]);
-   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
+                            pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1);
    RESTORE_STACK;
    return ret;
 }
@@ -1977,7 +2011,8 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram
          , st->analysis.subframe_mem
 #endif
          );
-   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16,
+                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
 }
 
 #else
@@ -2002,7 +2037,8 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram
 
    for (i=0;i<frame_size*st->channels;i++)
       in[i] = (1.0f/32768)*pcm[i];
-   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
+                            pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
    RESTORE_STACK;
    return ret;
 }
@@ -2019,7 +2055,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra
          st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
          delay_compensation, downmix_float, st->analysis.subframe_mem);
    return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24,
-                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
+                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1);
 }
 #endif
 
@@ -2108,7 +2144,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
         case OPUS_SET_MAX_BANDWIDTH_REQUEST:
         {
             opus_int32 value = va_arg(ap, opus_int32);
-            if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) 
+            if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND)
             {
                goto bad_arg;
             }
@@ -2418,11 +2454,14 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
         {
            void *silk_enc;
            silk_EncControlStruct dummy;
+           char *start;
            silk_enc = (char*)st+st->silk_enc_offset;
+#ifndef DISABLE_FLOAT_API
+           tonality_analysis_reset(&st->analysis);
+#endif
 
-           OPUS_CLEAR((char*)&st->OPUS_ENCODER_RESET_START,
-                 sizeof(OpusEncoder)-
-                 ((char*)&st->OPUS_ENCODER_RESET_START - (char*)st));
+           start = (char*)&st->OPUS_ENCODER_RESET_START;
+           OPUS_CLEAR(start, sizeof(OpusEncoder) - (start - (char*)st));
 
            celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
            silk_InitEncoder( silk_enc, st->arch, &dummy );
diff --git a/src/opus_multistream_decoder.c b/src/opus_multistream_decoder.c
index a05fa1e7..b95eaa6e 100644
--- a/src/opus_multistream_decoder.c
+++ b/src/opus_multistream_decoder.c
@@ -75,7 +75,7 @@ int opus_multistream_decoder_init(
    char *ptr;
 
    if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
-       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+       (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
       return OPUS_BAD_ARG;
 
    st->layout.nb_channels = channels;
@@ -119,7 +119,7 @@ OpusMSDecoder *opus_multistream_decoder_create(
    int ret;
    OpusMSDecoder *st;
    if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
-       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+       (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
    {
       if (error)
          *error = OPUS_BAD_ARG;
diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c
index 49e27913..9e857735 100644
--- a/src/opus_multistream_encoder.c
+++ b/src/opus_multistream_encoder.c
@@ -41,6 +41,7 @@
 #include "modes.h"
 #include "bands.h"
 #include "quant_bands.h"
+#include "pitch.h"
 
 typedef struct {
    int nb_streams;
@@ -71,6 +72,7 @@ typedef void (*opus_copy_channel_in_func)(
 
 struct OpusMSEncoder {
    ChannelLayout layout;
+   int arch;
    int lfe_stream;
    int application;
    int variable_duration;
@@ -98,7 +100,8 @@ static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st)
       else
          ptr += align(mono_size);
    }
-   return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32));
+   /* void* cast avoids clang -Wcast-align warning */
+   return (opus_val32*)(void*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32));
 }
 
 static opus_val32 *ms_get_window_mem(OpusMSEncoder *st)
@@ -117,7 +120,8 @@ static opus_val32 *ms_get_window_mem(OpusMSEncoder *st)
       else
          ptr += align(mono_size);
    }
-   return (opus_val32*)ptr;
+   /* void* cast avoids clang -Wcast-align warning */
+   return (opus_val32*)(void*)ptr;
 }
 
 static int validate_encoder_layout(const ChannelLayout *layout)
@@ -199,7 +203,7 @@ static opus_val16 logSum(opus_val16 a, opus_val16 b)
       max = b;
       diff = SUB32(EXTEND32(b),EXTEND32(a));
    }
-   if (diff >= QCONST16(8.f, DB_SHIFT))
+   if (!(diff < QCONST16(8.f, DB_SHIFT)))  /* inverted to catch NaNs */
       return max;
 #ifdef FIXED_POINT
    low = SHR32(diff, DB_SHIFT-1);
@@ -218,7 +222,7 @@ opus_val16 logSum(opus_val16 a, opus_val16 b)
 #endif
 
 void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,
-      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in
+      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch
 )
 {
    int c;
@@ -257,7 +261,21 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b
       OPUS_COPY(in, mem+c*overlap, overlap);
       (*copy_channel_in)(x, 1, pcm, channels, c, len);
       celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);
-      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
+#ifndef FIXED_POINT
+      {
+         opus_val32 sum;
+         sum = celt_inner_prod(in, in, frame_size+overlap, 0);
+         /* This should filter out both NaNs and ridiculous signals that could
+            cause NaNs further down. */
+         if (!(sum < 1e9f) || celt_isnan(sum))
+         {
+            OPUS_CLEAR(in, frame_size+overlap);
+            preemph_mem[c] = 0;
+         }
+      }
+#endif
+      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window,
+            overlap, celt_mode->maxLM-LM, 1, arch);
       if (upsample != 1)
       {
          int bound = len;
@@ -267,7 +285,7 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b
             freq[i] = 0;
       }
 
-      compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<<LM);
+      compute_band_energies(celt_mode, freq, bandE, 21, 1, LM);
       amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1);
       /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */
       for (i=1;i<21;i++)
@@ -408,9 +426,10 @@ static int opus_multistream_encoder_init_impl(
    char *ptr;
 
    if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
-       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+       (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
       return OPUS_BAD_ARG;
 
+   st->arch = opus_select_arch();
    st->layout.nb_channels = channels;
    st->layout.nb_streams = streams;
    st->layout.nb_coupled_streams = coupled_streams;
@@ -530,7 +549,7 @@ OpusMSEncoder *opus_multistream_encoder_create(
    int ret;
    OpusMSEncoder *st;
    if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
-       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+       (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
    {
       if (error)
          *error = OPUS_BAD_ARG;
@@ -566,6 +585,7 @@ OpusMSEncoder *opus_multistream_surround_encoder_create(
 )
 {
    int ret;
+   opus_int32 size;
    OpusMSEncoder *st;
    if ((channels>255) || (channels<1))
    {
@@ -573,7 +593,14 @@ OpusMSEncoder *opus_multistream_surround_encoder_create(
          *error = OPUS_BAD_ARG;
       return NULL;
    }
-   st = (OpusMSEncoder *)opus_alloc(opus_multistream_surround_encoder_get_size(channels, mapping_family));
+   size = opus_multistream_surround_encoder_get_size(channels, mapping_family);
+   if (!size)
+   {
+      if (error)
+         *error = OPUS_UNIMPLEMENTED;
+      return NULL;
+   }
+   st = (OpusMSEncoder *)opus_alloc(size);
    if (st==NULL)
    {
       if (error)
@@ -591,7 +618,7 @@ OpusMSEncoder *opus_multistream_surround_encoder_create(
    return st;
 }
 
-static void surround_rate_allocation(
+static opus_int32 surround_rate_allocation(
       OpusMSEncoder *st,
       opus_int32 *rate,
       int frame_size
@@ -605,6 +632,7 @@ static void surround_rate_allocation(
    int lfe_offset;
    int coupled_ratio; /* Q8 */
    int lfe_ratio;     /* Q8 */
+   opus_int32 rate_sum=0;
 
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
@@ -660,7 +688,10 @@ static void surround_rate_allocation(
          rate[i] = stream_offset+channel_rate;
       else
          rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8);
+      rate[i] = IMAX(rate[i], 500);
+      rate_sum += rate[i];
    }
+   return rate_sum;
 }
 
 /* Max size in case the encoder decides to return three frames */
@@ -674,7 +705,8 @@ static int opus_multistream_encode_native
     unsigned char *data,
     opus_int32 max_data_bytes,
     int lsb_depth,
-    downmix_func downmix
+    downmix_func downmix,
+    int float_api
 )
 {
    opus_int32 Fs;
@@ -694,6 +726,8 @@ static int opus_multistream_encode_native
    opus_val32 *mem = NULL;
    opus_val32 *preemph_mem=NULL;
    int frame_size;
+   opus_int32 rate_sum;
+   opus_int32 smallest_packet;
    ALLOC_STACK;
 
    if (st->surround)
@@ -737,6 +771,14 @@ static int opus_multistream_encode_native
       RESTORE_STACK;
       return OPUS_BAD_ARG;
    }
+
+   /* Smallest packet the encoder can produce. */
+   smallest_packet = st->layout.nb_streams*2-1;
+   if (max_data_bytes < smallest_packet)
+   {
+      RESTORE_STACK;
+      return OPUS_BUFFER_TOO_SMALL;
+   }
    ALLOC(buf, 2*frame_size, opus_val16);
    coupled_size = opus_encoder_get_size(2);
    mono_size = opus_encoder_get_size(1);
@@ -744,21 +786,23 @@ static int opus_multistream_encode_native
    ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
    if (st->surround)
    {
-      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);
-   }
-
-   if (max_data_bytes < 4*st->layout.nb_streams-1)
-   {
-      RESTORE_STACK;
-      return OPUS_BUFFER_TOO_SMALL;
+      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch);
    }
 
    /* Compute bitrate allocation between streams (this could be a lot better) */
-   surround_rate_allocation(st, bitrates, frame_size);
+   rate_sum = surround_rate_allocation(st, bitrates, frame_size);
 
    if (!vbr)
-      max_data_bytes = IMIN(max_data_bytes, 3*st->bitrate_bps/(3*8*Fs/frame_size));
-
+   {
+      if (st->bitrate_bps == OPUS_AUTO)
+      {
+         max_data_bytes = IMIN(max_data_bytes, 3*rate_sum/(3*8*Fs/frame_size));
+      } else if (st->bitrate_bps != OPUS_BITRATE_MAX)
+      {
+         max_data_bytes = IMIN(max_data_bytes, IMAX(smallest_packet,
+                          3*st->bitrate_bps/(3*8*Fs/frame_size)));
+      }
+   }
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    for (s=0;s<st->layout.nb_streams;s++)
    {
@@ -843,13 +887,15 @@ static int opus_multistream_encode_native
          opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
       /* number of bytes left (+Toc) */
       curr_max = max_data_bytes - tot_size;
-      /* Reserve three bytes for the last stream and four for the others */
-      curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);
+      /* Reserve one byte for the last stream and two for the others */
+      curr_max -= IMAX(0,2*(st->layout.nb_streams-s-1)-1);
       curr_max = IMIN(curr_max,MS_FRAME_TMP);
+      /* Repacketizer will add one or two bytes for self-delimited frames */
+      if (s != st->layout.nb_streams-1) curr_max -=  curr_max>253 ? 2 : 1;
       if (!vbr && s == st->layout.nb_streams-1)
          opus_encoder_ctl(enc, OPUS_SET_BITRATE(curr_max*(8*Fs/frame_size)));
       len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth,
-            pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix);
+            pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix, float_api);
       if (len<0)
       {
          RESTORE_STACK;
@@ -922,7 +968,7 @@ int opus_multistream_encode(
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_short,
-      pcm, frame_size, data, max_data_bytes, 16, downmix_int);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0);
 }
 
 #ifndef DISABLE_FLOAT_API
@@ -935,7 +981,7 @@ int opus_multistream_encode_float(
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_float,
-      pcm, frame_size, data, max_data_bytes, 16, downmix_float);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_float, 1);
 }
 #endif
 
@@ -951,7 +997,7 @@ int opus_multistream_encode_float
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_float,
-      pcm, frame_size, data, max_data_bytes, 24, downmix_float);
+      pcm, frame_size, data, max_data_bytes, 24, downmix_float, 1);
 }
 
 int opus_multistream_encode(
@@ -963,7 +1009,7 @@ int opus_multistream_encode(
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_short,
-      pcm, frame_size, data, max_data_bytes, 16, downmix_int);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0);
 }
 #endif
 
diff --git a/src/opus_private.h b/src/opus_private.h
index 83225f2b..3b62eed0 100644
--- a/src/opus_private.h
+++ b/src/opus_private.h
@@ -33,6 +33,8 @@
 #include "opus.h"
 #include "celt.h"
 
+#include <stddef.h> /* offsetof */
+
 struct OpusRepacketizer {
    unsigned char toc;
    int nb_frames;
@@ -86,10 +88,6 @@ typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int
 void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
 void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
 
-int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
-                int bitrate, opus_val16 tonality, float *mem, int buffering,
-                downmix_func downmix);
-
 int encode_size(int size, unsigned char *data);
 
 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);
@@ -104,16 +102,23 @@ opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
 
 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
       unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
-      const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix);
+      const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
+      int analysis_channels, downmix_func downmix, int float_api);
 
 int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
       opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
       opus_int32 *packet_offset, int soft_clip);
 
-/* Make sure everything's aligned to sizeof(void *) bytes */
+/* Make sure everything is properly aligned. */
 static OPUS_INLINE int align(int i)
 {
-    return (i+(int)sizeof(void *)-1)&-(int)sizeof(void *);
+    struct foo {char c; union { void* p; opus_int32 i; opus_val32 v; } u;};
+
+    unsigned int alignment = offsetof(struct foo, u);
+
+    /* Optimizing compilers should optimize div and multiply into and
+       for all sensible alignment values. */
+    return ((i + alignment - 1) / alignment) * alignment;
 }
 
 int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
diff --git a/src/repacketizer.c b/src/repacketizer.c
index a62675ce..f27e9ab9 100644
--- a/src/repacketizer.c
+++ b/src/repacketizer.c
@@ -219,8 +219,9 @@ opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int
    }
    if (pad)
    {
-      for (i=ptr-data;i<maxlen;i++)
-         data[i] = 0;
+      /* Fill padding with zeros. */
+      while (ptr<data+maxlen)
+         *ptr++=0;
    }
    return tot_size;
 }
author	flim <flim@google.com>	2016-01-26 14:33:44 +0100
committer	flim <flim@google.com>	2016-03-04 17:40:32 +0100
commit	c91ee5b5642fcc4969150f73d5f6848f88bf1638 (patch)
tree	900cb39b975dfed729e37e5810fef61fd92d7a70 /src
parent	1391dbf0ccd121ce7a49d30e2142d36c8d404990 (diff)
download	libopus-c91ee5b5642fcc4969150f73d5f6848f88bf1638.tar.gz