1 files changed, 105 insertions, 13 deletions
diff --git a/celt/celt_decoder.c b/celt/celt_decoder.c
index b978bb34..567d7456 100644
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -73,6 +73,7 @@ struct OpusCustomDecoder {
    int downsample;
    int start, end;
    int signalling;
+   int disable_inv;
    int arch;
 
    /* Everything beyond this point gets cleared on a reset */
@@ -163,6 +164,11 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod
    st->start = 0;
    st->end = st->mode->effEBands;
    st->signalling = 1;
+#ifdef ENABLE_UPDATE_DRAFT
+   st->disable_inv = channels == 1;
+#else
+   st->disable_inv = 0;
+#endif
    st->arch = opus_select_arch();
 
    opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
@@ -177,6 +183,36 @@ void opus_custom_decoder_destroy(CELTDecoder *st)
 }
 #endif /* CUSTOM_MODES */
 
+#ifndef CUSTOM_MODES
+/* Special case for stereo with no downsampling and no accumulation. This is
+   quite common and we can make it faster by processing both channels in the
+   same loop, reducing overhead due to the dependency loop in the IIR filter. */
+static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0,
+      celt_sig *mem)
+{
+   celt_sig * OPUS_RESTRICT x0;
+   celt_sig * OPUS_RESTRICT x1;
+   celt_sig m0, m1;
+   int j;
+   x0=in[0];
+   x1=in[1];
+   m0 = mem[0];
+   m1 = mem[1];
+   for (j=0;j<N;j++)
+   {
+      celt_sig tmp0, tmp1;
+      /* Add VERY_SMALL to x[] first to reduce dependency chain. */
+      tmp0 = x0[j] + VERY_SMALL + m0;
+      tmp1 = x1[j] + VERY_SMALL + m1;
+      m0 = MULT16_32_Q15(coef0, tmp0);
+      m1 = MULT16_32_Q15(coef0, tmp1);
+      pcm[2*j  ] = SCALEOUT(SIG2WORD16(tmp0));
+      pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1));
+   }
+   mem[0] = m0;
+   mem[1] = m1;
+}
+#endif
 
 #ifndef RESYNTH
 static
@@ -190,6 +226,14 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
    opus_val16 coef0;
    VARDECL(celt_sig, scratch);
    SAVE_STACK;
+#ifndef CUSTOM_MODES
+   /* Short version for common case. */
+   if (downsample == 1 && C == 2 && !accum)
+   {
+      deemphasis_stereo_simple(in, pcm, N, coef[0], mem);
+      return;
+   }
+#endif
 #ifndef FIXED_POINT
    (void)accum;
    celt_assert(accum==0);
@@ -225,7 +269,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
          /* Shortcut for the standard (non-custom modes) case */
          for (j=0;j<N;j++)
          {
-            celt_sig tmp = x[j] + m + VERY_SMALL;
+            celt_sig tmp = x[j] + VERY_SMALL + m;
             m = MULT16_32_Q15(coef0, tmp);
             scratch[j] = tmp;
          }
@@ -246,7 +290,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
          {
             for (j=0;j<N;j++)
             {
-               celt_sig tmp = x[j] + m + VERY_SMALL;
+               celt_sig tmp = x[j] + VERY_SMALL + m;
                m = MULT16_32_Q15(coef0, tmp);
                y[j*C] = SCALEOUT(SIG2WORD16(tmp));
             }
@@ -333,7 +377,7 @@ void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
       denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M,
             downsample, silence);
       for (i=0;i<N;i++)
-         freq[i] = HALF32(ADD32(freq[i],freq2[i]));
+         freq[i] = ADD32(HALF32(freq[i]), HALF32(freq2[i]));
       for (b=0;b<B;b++)
          clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
    } else {
@@ -345,6 +389,12 @@ void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
             clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch);
       } while (++c<CC);
    }
+   /* Saturate IMDCT output so that we can't overflow in the pitch postfilter
+      or in the */
+   c=0; do {
+      for (i=0;i<N;i++)
+         out_syn[c][i] = SATURATE(out_syn[c][i], SIG_SAT);
+   } while (++c<CC);
    RESTORE_STACK;
 }
 
@@ -506,10 +556,11 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
    } else {
       /* Pitch-based PLC */
       const opus_val16 *window;
+      opus_val16 *exc;
       opus_val16 fade = Q15ONE;
       int pitch_index;
       VARDECL(opus_val32, etmp);
-      VARDECL(opus_val16, exc);
+      VARDECL(opus_val16, _exc);
 
       if (loss_count == 0)
       {
@@ -520,7 +571,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
       }
 
       ALLOC(etmp, overlap, opus_val32);
-      ALLOC(exc, MAX_PERIOD, opus_val16);
+      ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16);
+      exc = _exc+LPC_ORDER;
       window = mode->window;
       c=0; do {
          opus_val16 decay;
@@ -561,6 +613,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
 #endif
             }
             _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
+#ifdef FIXED_POINT
+         /* For fixed-point, apply bandwidth expansion until we can guarantee that
+            no overflow can happen in the IIR filter. This means:
+            32768*sum(abs(filter)) < 2^31 */
+         while (1) {
+            opus_val16 tmp=Q15ONE;
+            opus_val32 sum=QCONST16(1., SIG_SHIFT);
+            for (i=0;i<LPC_ORDER;i++)
+               sum += ABS16(lpc[c*LPC_ORDER+i]);
+            if (sum < 65535) break;
+            for (i=0;i<LPC_ORDER;i++)
+            {
+               tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp);
+               lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp);
+            }
+         }
+#endif
          }
          /* We want the excitation for 2 pitch periods in order to look for a
             decaying signal, but we can't get more than MAX_PERIOD. */
@@ -568,15 +637,14 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
          /* Initialize the LPC history with the samples just before the start
             of the region for which we're computing the excitation. */
          {
-            opus_val16 lpc_mem[LPC_ORDER];
             for (i=0;i<LPC_ORDER;i++)
             {
-               lpc_mem[i] =
-                     ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
+               exc[MAX_PERIOD-exc_length-LPC_ORDER+i] =
+                     ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-LPC_ORDER+i], SIG_SHIFT);
             }
             /* Compute the excitation for exc_length samples before the loss. */
             celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
-                  exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem, st->arch);
+                  exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, st->arch);
          }
 
          /* Check if the waveform is decaying, and if so how fast.
@@ -630,9 +698,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
             tmp = ROUND16(
                   buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
                   SIG_SHIFT);
-            S1 += SHR32(MULT16_16(tmp, tmp), 8);
+            S1 += SHR32(MULT16_16(tmp, tmp), 10);
          }
-
          {
             opus_val16 lpc_mem[LPC_ORDER];
             /* Copy the last decoded samples (prior to the overlap region) to
@@ -644,6 +711,10 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
             celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
                   buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
                   lpc_mem, st->arch);
+#ifdef FIXED_POINT
+            for (i=0; i < extrapolation_len; i++)
+               buf[DECODE_BUFFER_SIZE-N+i] = SATURATE(buf[DECODE_BUFFER_SIZE-N+i], SIG_SAT);
+#endif
          }
 
          /* Check if the synthesis energy is higher than expected, which can
@@ -654,7 +725,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
             for (i=0;i<extrapolation_len;i++)
             {
                opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
-               S2 += SHR32(MULT16_16(tmp, tmp), 8);
+               S2 += SHR32(MULT16_16(tmp, tmp), 10);
             }
             /* This checks for an "explosion" in the synthesis. */
 #ifdef FIXED_POINT
@@ -979,7 +1050,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
 
    quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
          NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
-         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, st->arch);
+         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, 0,
+         st->arch, st->disable_inv);
 
    if (anti_collapse_rsv > 0)
    {
@@ -1234,6 +1306,26 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
          *value=st->rng;
       }
       break;
+      case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          if(value<0 || value>1)
+          {
+             goto bad_arg;
+          }
+          st->disable_inv = value;
+      }
+      break;
+      case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST:
+      {
+          opus_int32 *value = va_arg(ap, opus_int32*);
+          if (!value)
+          {
+             goto bad_arg;
+          }
+          *value = st->disable_inv;
+      }
+      break;
       default:
          goto bad_request;
    }