aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2013-10-01 17:06:09 -0400
committerJean-Marc Valin <jmvalin@jmvalin.ca>2013-10-01 17:06:09 -0400
commit7c60625f808e66abe016401eb449e9940916b3c1 (patch)
tree10e47c9416906fd08812be1d23d51a18b71fd6ee
parentc435f06b308e52d7a5d2aad9eec98a1541832b47 (diff)
parent6b9087aa6efbb98731216d2b03cc977826fd6062 (diff)
downloadlibopus-7c60625f808e66abe016401eb449e9940916b3c1.tar.gz
Merge branch 'exp_surround1'
-rw-r--r--Makefile.am3
-rw-r--r--celt/arch.h1
-rw-r--r--celt/celt.h18
-rw-r--r--celt/celt_encoder.c124
-rw-r--r--celt/fixed_generic.h1
-rw-r--r--src/analysis.c93
-rw-r--r--src/analysis.h10
-rw-r--r--src/mlp.c39
-rw-r--r--src/mlp.h4
-rw-r--r--src/opus_encoder.c349
-rw-r--r--src/opus_multistream_encoder.c562
-rw-r--r--src/opus_private.h17
-rw-r--r--src/tansig_table.h2
13 files changed, 728 insertions, 495 deletions
diff --git a/Makefile.am b/Makefile.am
index b090ca09..edbcc42e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -20,9 +20,10 @@ if FIXED_POINT
SILK_SOURCES += $(SILK_SOURCES_FIXED)
else
SILK_SOURCES += $(SILK_SOURCES_FLOAT)
-OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
endif
+OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
+
if CPU_ARM
CELT_SOURCES += $(CELT_SOURCES_ARM)
endif
diff --git a/celt/arch.h b/celt/arch.h
index 78e2635f..f9c98567 100644
--- a/celt/arch.h
+++ b/celt/arch.h
@@ -185,6 +185,7 @@ typedef float celt_ener;
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
#define MULT16_16_Q11_32(a,b) ((a)*(b))
+#define MULT16_16_Q11(a,b) ((a)*(b))
#define MULT16_16_Q13(a,b) ((a)*(b))
#define MULT16_16_Q14(a,b) ((a)*(b))
#define MULT16_16_Q15(a,b) ((a)*(b))
diff --git a/celt/celt.h b/celt/celt.h
index 0911c72f..cdb76c8b 100644
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -52,11 +52,11 @@ extern "C" {
typedef struct {
int valid;
- opus_val16 tonality;
- opus_val16 tonality_slope;
- opus_val16 noisiness;
- opus_val16 activity;
- opus_val16 music_prob;
+ float tonality;
+ float tonality_slope;
+ float noisiness;
+ float activity;
+ float music_prob;
int bandwidth;
}AnalysisInfo;
@@ -109,10 +109,7 @@ typedef struct {
#define OPUS_SET_LFE_REQUEST 10024
#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)
-#define OPUS_SET_ENERGY_SAVE_REQUEST 10026
-#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x)
-
-#define OPUS_SET_ENERGY_MASK_REQUEST 10028
+#define OPUS_SET_ENERGY_MASK_REQUEST 10026
#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
/* Encoder stuff */
@@ -193,6 +190,9 @@ extern const signed char tf_select_table[4][8];
int resampling_factor(opus_int32 rate);
+void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+ int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip);
+
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap);
diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c
index 31bea1bb..241fb372 100644
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -111,7 +111,6 @@ struct OpusCustomEncoder {
opus_val32 overlap_max;
opus_val16 stereo_saving;
int intensity;
- opus_val16 *energy_save;
opus_val16 *energy_mask;
opus_val16 spec_avg;
@@ -452,7 +451,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
}
-static void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip)
{
int i;
@@ -744,7 +743,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM,
static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
const opus_val16 *bandLogE, int end, int LM, int C, int N0,
AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
- int intensity)
+ int intensity, opus_val16 surround_trim)
{
int i;
opus_val32 diff=0;
@@ -818,11 +817,12 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
if (diff < -QCONST16(10.f, DB_SHIFT))
trim_index++;
trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
+ trim -= SHR16(surround_trim, DB_SHIFT-8);
trim -= 2*SHR16(tf_estimate, 14-8);
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
if (analysis->valid)
{
- trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05f)));
+ trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)));
}
#endif
@@ -877,7 +877,7 @@ static int stereo_analysis(const CELTMode *m, const celt_norm *X,
static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
- int effectiveBytes, opus_int32 *tot_boost_, int lfe)
+ int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc)
{
int i, c;
opus_int32 tot_boost=0;
@@ -940,6 +940,8 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
follower[i] = MAX16(0, bandLogE[i]-follower[i]);
}
}
+ for (i=start;i<end;i++)
+ follower[i] = MAX16(follower[i], surround_dynalloc[i]);
/* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
if ((!vbr || constrained_vbr)&&!isTransient)
{
@@ -1140,7 +1142,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
target = base_target;
/*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
if (analysis->valid && analysis->activity<.4)
target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity));
#endif
@@ -1165,7 +1167,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
QCONST16(0.02f,14) : QCONST16(0.04f,14);
target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
/* Apply tonality boost */
if (analysis->valid && !lfe)
{
@@ -1291,6 +1293,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
int transient_got_disabled=0;
opus_val16 surround_masking=0;
opus_val16 temporal_vbr=0;
+ opus_val16 surround_trim = 0;
+ VARDECL(opus_val16, surround_dynalloc);
ALLOC_STACK;
mode = st->mode;
@@ -1526,37 +1530,83 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
}
}
amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
- if (st->energy_save)
- {
- opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
-#ifdef FIXED_POINT
- /* Compensate for the 1/8 gain we apply in the fixed-point downshift to avoid overflows. */
- offset -= QCONST16(3.0f, DB_SHIFT);
-#endif
- for(i=0;i<C*nbEBands;i++)
- st->energy_save[i]=bandLogE[i]-offset;
- st->energy_save=NULL;
- }
+
+ ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
+ for(i=0;i<st->end;i++)
+ surround_dynalloc[i] = 0;
/* This computes how much masking takes place between surround channels */
- if (st->energy_mask&&!st->lfe)
+ if (st->start==0&&st->energy_mask&&!st->lfe)
{
+ int mask_end;
+ int midband;
+ int count_dynalloc;
opus_val32 mask_avg=0;
- opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
+ opus_val32 diff=0;
+ int count=0;
+ mask_end = st->lastCodedBands;
for (c=0;c<C;c++)
{
- opus_val16 followE, followMask;
- followE = followMask = -QCONST16(14.f, DB_SHIFT);
- for(i=0;i<st->end;i++)
+ for(i=0;i<mask_end;i++)
{
- /* We use a simple follower to approximate the masking spreading function. */
- followE = MAX16(followE-QCONST16(1.f, DB_SHIFT), bandLogE[nbEBands*c+i]-offset);
- followMask = MAX16(followMask-QCONST16(1.f, DB_SHIFT), st->energy_mask[nbEBands*c+i]);
- mask_avg += followE-followMask;
+ opus_val16 mask;
+ mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i],
+ QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+ if (mask > 0)
+ mask = HALF16(mask);
+ mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]);
+ count += eBands[i+1]-eBands[i];
+ diff += MULT16_16(mask, 1+2*i-mask_end);
}
}
- surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.7f, DB_SHIFT);
- surround_masking = MIN16(MAX16(surround_masking, -QCONST16(2.f, DB_SHIFT)), QCONST16(.2f, DB_SHIFT));
- surround_masking -= HALF16(HALF16(surround_masking));
+ mask_avg = DIV32_16(mask_avg,count);
+ mask_avg += QCONST16(.2f, DB_SHIFT);
+ diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);
+ /* Again, being conservative */
+ diff = HALF32(diff);
+ diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT));
+ /* Find the band that's in the middle of the coded spectrum */
+ for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++);
+ count_dynalloc=0;
+ for(i=0;i<mask_end;i++)
+ {
+ opus_val32 lin;
+ opus_val16 unmask;
+ lin = mask_avg + diff*(i-midband);
+ if (C==2)
+ unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]);
+ else
+ unmask = st->energy_mask[i];
+ unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT));
+ unmask -= lin;
+ if (unmask > QCONST16(.25f, DB_SHIFT))
+ {
+ surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT);
+ count_dynalloc++;
+ }
+ }
+ if (count_dynalloc>=3)
+ {
+ /* If we need dynalloc in many bands, it's probably because our
+ initial masking rate was too low. */
+ mask_avg += QCONST16(.25f, DB_SHIFT);
+ if (mask_avg>0)
+ {
+ /* Something went really wrong in the original calculations,
+ disabling masking. */
+ mask_avg = 0;
+ diff = 0;
+ for(i=0;i<mask_end;i++)
+ surround_dynalloc[i] = 0;
+ } else {
+ for(i=0;i<mask_end;i++)
+ surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
+ }
+ }
+ mask_avg += QCONST16(.2f, DB_SHIFT);
+ /* Convert to 1/64th units used for the trim */
+ surround_trim = 64*diff;
+ /*printf("%d %d ", mask_avg, surround_trim);*/
+ surround_masking = mask_avg;
}
/* Temporal VBR (but not for LFE) */
if (!st->lfe)
@@ -1683,7 +1733,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets,
st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
- eBands, LM, effectiveBytes, &tot_boost, st->lfe);
+ eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);
/* For LFE, everything interesting is in the first band */
if (st->lfe)
offsets[0] = IMIN(8, effectiveBytes/3);
@@ -1756,7 +1806,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
alloc_trim = 5;
else
alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
- st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity);
+ st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim);
ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
tell = ec_tell_frac(enc);
}
@@ -1859,7 +1909,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
bits -= anti_collapse_rsv;
signalBandwidth = st->end-1;
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
if (st->analysis.valid)
{
int min_bandwidth;
@@ -2261,12 +2311,6 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
st->lfe = value;
}
break;
- case OPUS_SET_ENERGY_SAVE_REQUEST:
- {
- opus_val16 *value = va_arg(ap, opus_val16*);
- st->energy_save=value;
- }
- break;
case OPUS_SET_ENERGY_MASK_REQUEST:
{
opus_val16 *value = va_arg(ap, opus_val16*);
diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h
index 0e77976e..657e67c8 100644
--- a/celt/fixed_generic.h
+++ b/celt/fixed_generic.h
@@ -116,6 +116,7 @@
#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
diff --git a/src/analysis.c b/src/analysis.c
index a9d2073f..53247df9 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -184,12 +184,12 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
for (;i<DETECT_SIZE;i++)
psum += tonal->pspeech[i];
psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
- /*printf("%f %f\n", psum, info_out->music_prob);*/
+ /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/
info_out->music_prob = psum;
}
-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
{
int i, b;
const kiss_fft_state *kfft;
@@ -234,7 +234,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
kfft = celt_mode->mdct.kfft[0];
if (tonal->count==0)
tonal->mem_fill = 240;
- downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);
+ downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
{
tonal->mem_fill += len;
@@ -253,14 +253,14 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
for (i=0;i<N2;i++)
{
float w = analysis_window[i];
- in[i].r = MULT16_16(w, tonal->inmem[i]);
- in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
- in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
- in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
+ in[i].r = w*tonal->inmem[i];
+ in[i].i = w*tonal->inmem[N2+i];
+ in[N-i-1].r = w*tonal->inmem[N-i-1];
+ in[N-i-1].i = w*tonal->inmem[N+N2-i-1];
}
OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
- downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);
+ downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
tonal->mem_fill = 240 + remaining;
opus_fft(kfft, in, out);
@@ -325,8 +325,12 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
float stationarity;
for (i=tbands[b];i<tbands[b+1];i++)
{
- float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
- + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+ float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+ + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
+#ifdef FIXED_POINT
+ /* FIXME: It's probably best to change the BFCC filter initial state instead */
+ binE *= 5.55e-17f;
+#endif
E += binE;
tE += binE*tonality[i];
nE += binE*2.f*(.5f-noisiness[i]);
@@ -334,7 +338,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
tonal->E[tonal->E_count][b] = E;
frame_noisiness += nE/(1e-15f+E);
- frame_loudness += celt_sqrt(E+1e-10f);
+ frame_loudness += sqrt(E+1e-10f);
logE[b] = (float)log(E+1e-10f);
tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
@@ -343,21 +347,21 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
tonal->highE[b]+=.5f;
tonal->lowE[b]-=.5f;
}
- relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
+ relativeE += (logE[b]-tonal->lowE[b])/(1e-15+tonal->highE[b]-tonal->lowE[b]);
L1=L2=0;
for (i=0;i<NB_FRAMES;i++)
{
- L1 += celt_sqrt(tonal->E[i][b]);
+ L1 += sqrt(tonal->E[i][b]);
L2 += tonal->E[i][b];
}
- stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2));
+ stationarity = MIN16(0.99f,L1/sqrt(1e-15+NB_FRAMES*L2));
stationarity *= stationarity;
stationarity *= stationarity;
frame_stationarity += stationarity;
/*band_tonality[b] = tE/(1e-15+E)*/;
- band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
+ band_tonality[b] = MAX16(tE/(1e-15+E), stationarity*tonal->prev_band_tonality[b]);
#if 0
if (b>=NB_TONAL_SKIP_BANDS)
{
@@ -379,6 +383,9 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
bandwidth = 0;
maxE = 0;
noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
+#ifdef FIXED_POINT
+ noise_floor *= 1<<(15+SIG_SHIFT);
+#endif
noise_floor *= noise_floor;
for (b=0;b<NB_TOT_BANDS;b++)
{
@@ -389,8 +396,8 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
band_end = extra_bands[b+1];
for (i=band_start;i<band_end;i++)
{
- float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
- + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+ float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+ + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
E += binE;
}
maxE = MAX32(maxE, E);
@@ -469,14 +476,14 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
tonal->mem[i] = BFCC[i];
}
for (i=0;i<9;i++)
- features[11+i] = celt_sqrt(tonal->std[i]);
+ features[11+i] = sqrt(tonal->std[i]);
features[20] = info->tonality;
features[21] = info->activity;
features[22] = frame_stationarity;
features[23] = info->tonality_slope;
features[24] = tonal->lowECount;
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
mlp_process(&net, features, frame_probs);
frame_probs[0] = .5f*(frame_probs[0]+1);
/* Curve fitting between the MLP probability and the actual probability */
@@ -611,44 +618,30 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
RESTORE_STACK;
}
-int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
- const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
- int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+ int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+ int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
{
int offset;
int pcm_len;
- /* Avoid overflow/wrap-around of the analysis buffer */
- frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);
-
- pcm_len = frame_size - analysis->analysis_offset;
- offset = 0;
- do {
- tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);
- offset += 480;
- pcm_len -= 480;
- } while (pcm_len>0);
- analysis->analysis_offset = frame_size;
-
- if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+ if (analysis_pcm != NULL)
{
- int LM = 3;
- LM = optimize_framesize((const opus_val16*)pcm, frame_size, C, Fs, bitrate_bps,
- analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);
- while ((Fs/400<<LM)>frame_size)
- LM--;
- frame_size = (Fs/400<<LM);
- } else {
- frame_size = frame_size_select(frame_size, variable_duration, Fs);
+ /* Avoid overflow/wrap-around of the analysis buffer */
+ analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);
+
+ pcm_len = analysis_frame_size - analysis->analysis_offset;
+ offset = analysis->analysis_offset;
+ do {
+ tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+ offset += 480;
+ pcm_len -= 480;
+ } while (pcm_len>0);
+ analysis->analysis_offset = analysis_frame_size;
+
+ analysis->analysis_offset -= frame_size;
}
- if (frame_size<0)
- return -1;
- analysis->analysis_offset -= frame_size;
- /* Only perform analysis up to 20-ms frames. Longer ones will be split if
- they're in CELT-only mode. */
analysis_info->valid = 0;
tonality_get_info(analysis, analysis_info, frame_size);
-
- return frame_size;
}
diff --git a/src/analysis.h b/src/analysis.h
index 8cd78883..cf37792d 100644
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -42,7 +42,7 @@ typedef struct {
float angle[240];
float d_angle[240];
float d2_angle[240];
- float inmem[ANALYSIS_BUF_SIZE];
+ opus_val32 inmem[ANALYSIS_BUF_SIZE];
int mem_fill; /* number of usable samples in the buffer */
float prev_band_tonality[NB_TBANDS];
float prev_tonality;
@@ -79,12 +79,12 @@ typedef struct {
} TonalityAnalysisState;
void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
- const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix);
+ const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix);
void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);
-int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
- const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
- int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+ int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+ int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
#endif
diff --git a/src/mlp.c b/src/mlp.c
index 90e94a5f..73b1d315 100644
--- a/src/mlp.c
+++ b/src/mlp.c
@@ -35,7 +35,7 @@
#include "tansig_table.h"
#define MAX_NEURONS 100
-#ifdef FIXED_POINT
+#if 0
static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
{
int i;
@@ -43,9 +43,9 @@ static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
/*double x, y;*/
opus_val16 dy, yy; /* Q14 */
/*x = 1.9073e-06*_x;*/
- if (_x>=QCONST32(10,19))
+ if (_x>=QCONST32(8,19))
return QCONST32(1.,14);
- if (_x<=-QCONST32(10,19))
+ if (_x<=-QCONST32(8,19))
return -QCONST32(1.,14);
xx = EXTRACT16(SHR32(_x, 8));
/*i = lrint(25*x);*/
@@ -62,11 +62,11 @@ static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
}
#else
/*extern const float tansig_table[501];*/
-static inline opus_val16 tansig_approx(opus_val16 x)
+static inline float tansig_approx(float x)
{
int i;
- opus_val16 y, dy;
- opus_val16 sign=1;
+ float y, dy;
+ float sign=1;
if (x>=8)
return 1;
if (x<=-8)
@@ -85,6 +85,7 @@ static inline opus_val16 tansig_approx(opus_val16 x)
}
#endif
+#if 0
void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
{
int j;
@@ -108,4 +109,28 @@ void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
}
}
-
+#else
+void mlp_process(const MLP *m, const float *in, float *out)
+{
+ int j;
+ float hidden[MAX_NEURONS];
+ const float *W = m->weights;
+ /* Copy to tmp_in */
+ for (j=0;j<m->topo[1];j++)
+ {
+ int k;
+ float sum = *W++;
+ for (k=0;k<m->topo[0];k++)
+ sum = sum + in[k]**W++;
+ hidden[j] = tansig_approx(sum);
+ }
+ for (j=0;j<m->topo[2];j++)
+ {
+ int k;
+ float sum = *W++;
+ for (k=0;k<m->topo[1];k++)
+ sum = sum + hidden[k]**W++;
+ out[j] = tansig_approx(sum);
+ }
+}
+#endif
diff --git a/src/mlp.h b/src/mlp.h
index 68ff68d8..86c8e061 100644
--- a/src/mlp.h
+++ b/src/mlp.h
@@ -33,9 +33,9 @@
typedef struct {
int layers;
const int *topo;
- const opus_val16 *weights;
+ const float *weights;
} MLP;
-void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out);
+void mlp_process(const MLP *m, const float *in, float *out);
#endif /* _MLP_H_ */
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index ac40edac..c151fd07 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -95,10 +95,10 @@ struct OpusEncoder {
int silk_bw_switch;
/* Sampling rate (at the API level) */
int first;
- int energy_masking;
+ opus_val16 * energy_masking;
StereoWidthState width_mem;
opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
TonalityAnalysisState analysis;
int detected_bandwidth;
int analysis_offset;
@@ -201,7 +201,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
st->silk_mode.payloadSize_ms = 20;
st->silk_mode.bitRate = 25000;
st->silk_mode.packetLossPercentage = 0;
- st->silk_mode.complexity = 10;
+ st->silk_mode.complexity = 9;
st->silk_mode.useInBandFEC = 0;
st->silk_mode.useDTX = 0;
st->silk_mode.useCBR = 0;
@@ -212,7 +212,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR;
celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0));
- celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(10));
+ celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity));
st->use_vbr = 1;
/* Makes constrained VBR the default (safer for real-time use) */
@@ -551,7 +551,7 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m
return st->user_bitrate_bps;
}
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
/* Don't use more than 60 ms for the frame size analysis */
#define MAX_DYNAMIC_FRAMESIZE 24
/* Estimates how much the bitrate will be boosted based on the sub-frame energy */
@@ -685,32 +685,6 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_
return best_state;
}
-void downmix_float(const void *_x, float *sub, int subframe, int offset, int C)
-{
- const float *x;
- int c, j;
- x = (const float *)_x;
- for (j=0;j<subframe;j++)
- sub[j] = x[(j+offset)*C];
- for (c=1;c<C;c++)
- for (j=0;j<subframe;j++)
- sub[j] += x[(j+offset)*C+c];
-}
-
-void downmix_int(const void *_x, float *sub, int subframe, int offset, int C)
-{
- const opus_int16 *x;
- int c, j;
- x = (const opus_int16 *)_x;
- for (j=0;j<subframe;j++)
- sub[j] = x[(j+offset)*C];
- for (c=1;c<C;c++)
- for (j=0;j<subframe;j++)
- sub[j] += x[(j+offset)*C+c];
- for (j=0;j<subframe;j++)
- sub[j] *= (1.f/32768);
-}
-
int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,
downmix_func downmix)
@@ -723,10 +697,10 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
int bestLM=0;
int subframe;
int pos;
- VARDECL(opus_val16, sub);
+ VARDECL(opus_val32, sub);
subframe = Fs/400;
- ALLOC(sub, subframe, opus_val16);
+ ALLOC(sub, subframe, opus_val32);
e[0]=mem[0];
e_1[0]=1.f/(EPSILON+mem[0]);
if (buffering)
@@ -754,7 +728,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
int j;
tmp=EPSILON;
- downmix(x, sub, subframe, i*subframe, C);
+ downmix(x, sub, subframe, i*subframe, 0, -2, C);
if (i==0)
memx = sub[0];
for (j=0;j<subframe;j++)
@@ -784,6 +758,76 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
#endif
+#ifndef DISABLE_FLOAT_API
+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+{
+ const float *x;
+ opus_val32 scale;
+ int j;
+ x = (const float *)_x;
+ for (j=0;j<subframe;j++)
+ sub[j] = SCALEIN(x[(j+offset)*C+c1]);
+ if (c2>-1)
+ {
+ for (j=0;j<subframe;j++)
+ sub[j] += SCALEIN(x[(j+offset)*C+c2]);
+ } else if (c2==-2)
+ {
+ int c;
+ for (c=1;c<C;c++)
+ {
+ for (j=0;j<subframe;j++)
+ sub[j] += SCALEIN(x[(j+offset)*C+c]);
+ }
+ }
+#ifdef FIXED_POINT
+ scale = (1<<SIG_SHIFT);
+#else
+ scale = 1.f;
+#endif
+ if (C==-2)
+ scale /= C;
+ else
+ scale /= 2;
+ for (j=0;j<subframe;j++)
+ sub[j] *= scale;
+}
+#endif
+
+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+{
+ const opus_int16 *x;
+ opus_val32 scale;
+ int j;
+ x = (const opus_int16 *)_x;
+ for (j=0;j<subframe;j++)
+ sub[j] = x[(j+offset)*C+c1];
+ if (c2>-1)
+ {
+ for (j=0;j<subframe;j++)
+ sub[j] += x[(j+offset)*C+c2];
+ } else if (c2==-2)
+ {
+ int c;
+ for (c=1;c<C;c++)
+ {
+ for (j=0;j<subframe;j++)
+ sub[j] += x[(j+offset)*C+c];
+ }
+ }
+#ifdef FIXED_POINT
+ scale = (1<<SIG_SHIFT);
+#else
+ scale = 1.f/32768;
+#endif
+ if (C==-2)
+ scale /= C;
+ else
+ scale /= 2;
+ for (j=0;j<subframe;j++)
+ sub[j] *= scale;
+}
+
opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
{
int new_size;
@@ -805,6 +849,29 @@ opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_
return new_size;
}
+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
+ int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+ int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem)
+{
+#ifndef DISABLE_FLOAT_API
+ if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+ {
+ int LM = 3;
+ LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps,
+ 0, subframe_mem, delay_compensation, downmix);
+ while ((Fs/400<<LM)>frame_size)
+ LM--;
+ frame_size = (Fs/400<<LM);
+ } else
+#endif
+ {
+ frame_size = frame_size_select(frame_size, variable_duration, Fs);
+ }
+ if (frame_size<0)
+ return -1;
+ return frame_size;
+}
+
opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem)
{
opus_val16 corr;
@@ -883,11 +950,8 @@ opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int3
}
opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
- unsigned char *data, opus_int32 out_data_bytes, int lsb_depth
-#ifndef FIXED_POINT
- , AnalysisInfo *analysis_info
-#endif
- )
+ unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
+ const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix)
{
void *silk_enc;
CELTEncoder *celt_enc;
@@ -916,6 +980,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
int total_buffer;
opus_val16 stereo_width;
+ const CELTMode *celt_mode;
+ AnalysisInfo analysis_info;
+ int analysis_read_pos_bak=-1;
+ int analysis_read_subframe_bak=-1;
VARDECL(opus_val16, tmp_prefill);
ALLOC_STACK;
@@ -941,17 +1009,34 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
lsb_depth = IMIN(lsb_depth, st->lsb_depth);
+ analysis_info.valid = 0;
+ celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
+#ifndef DISABLE_FLOAT_API
+#ifdef FIXED_POINT
+ if (st->silk_mode.complexity >= 10 && st->Fs==48000)
+#else
+ if (st->silk_mode.complexity >= 7 && st->Fs==48000)
+#endif
+ {
+ analysis_read_pos_bak = st->analysis.read_pos;
+ analysis_read_subframe_bak = st->analysis.read_subframe;
+ run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,
+ c1, c2, analysis_channels, st->Fs,
+ lsb_depth, downmix, &analysis_info);
+ }
+#endif
+
st->voice_ratio = -1;
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
st->detected_bandwidth = 0;
- if (analysis_info->valid)
+ if (analysis_info.valid)
{
int analysis_bandwidth;
if (st->signal_type == OPUS_AUTO)
- st->voice_ratio = (int)floor(.5+100*(1-analysis_info->music_prob));
+ st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
- analysis_bandwidth = analysis_info->bandwidth;
+ analysis_bandwidth = analysis_info.bandwidth;
if (analysis_bandwidth<=12)
st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
else if (analysis_bandwidth<=14)
@@ -1281,6 +1366,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
VARDECL(OpusRepacketizer, rp);
opus_int32 bytes_per_frame;
+ if (analysis_read_pos_bak!= -1)
+ {
+ st->analysis.read_pos = analysis_read_pos_bak;
+ st->analysis.read_subframe = analysis_read_subframe_bak;
+ }
nb_frames = frame_size > st->Fs/25 ? 3 : 2;
bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames);
@@ -1310,11 +1400,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
/* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
if (to_celt && i==nb_frames-1)
st->user_forced_mode = MODE_CELT_ONLY;
- tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth
-#ifndef FIXED_POINT
- , analysis_info
-#endif
- );
+ tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50,
+ tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth,
+ NULL, 0, c1, c2, analysis_channels, downmix);
if (tmp_len<0)
{
RESTORE_STACK;
@@ -1419,6 +1507,45 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
st->silk_mode.bitRate = total_bitRate;
}
+ /* Surround masking for SILK */
+ if (st->energy_masking && st->use_vbr && !st->lfe)
+ {
+ opus_val32 mask_sum=0;
+ opus_val16 masking_depth;
+ opus_int32 rate_offset;
+ int c;
+ int end = 17;
+ opus_int16 srate = 16000;
+ if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND)
+ {
+ end = 13;
+ srate = 8000;
+ } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+ {
+ end = 15;
+ srate = 12000;
+ }
+ for (c=0;c<st->channels;c++)
+ {
+ for(i=0;i<end;i++)
+ {
+ opus_val16 mask;
+ mask = MAX16(MIN16(st->energy_masking[21*c+i],
+ QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+ if (mask > 0)
+ mask = HALF16(mask);
+ mask_sum += mask;
+ }
+ }
+ /* Conservative rate reduction, we cut the masking in half */
+ masking_depth = HALF16(mask_sum / end*st->channels);
+ rate_offset = PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT);
+ rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3);
+ rate_offset += QCONST16(.4f, DB_SHIFT);
+ st->silk_mode.bitRate += rate_offset;
+ bytes_target += rate_offset * frame_size / (8 * st->Fs);
+ }
+
st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs;
st->silk_mode.nChannelsAPI = st->channels;
st->silk_mode.nChannelsInternal = st->stream_channels;
@@ -1476,9 +1603,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (prefill)
{
opus_int32 zero=0;
- const CELTMode *celt_mode;
int prefill_offset;
- celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
/* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode
a discontinuity. The exact location is what we need to avoid leaving any "gap"
in the audio when mixing with the redundant CELT frame. Here we can afford to
@@ -1589,12 +1714,12 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->use_vbr)
{
opus_int32 bonus=0;
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)
{
bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50);
- if (analysis_info->valid)
- bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info->tonality));
+ if (analysis_info.valid)
+ bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality));
}
#endif
celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
@@ -1625,9 +1750,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
/* gain_fade() and stereo_fade() need to be after the buffer copying
because we don't want any of this to affect the SILK part */
if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) {
- const CELTMode *celt_mode;
-
- celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
gain_fade(pcm_buf, pcm_buf,
st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);
}
@@ -1638,9 +1760,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
/* Apply stereo width reduction (at low bitrates) */
if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {
opus_val16 g1, g2;
- const CELTMode *celt_mode;
-
- celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
g1 = st->hybrid_stereo_width_Q14;
g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14);
#ifdef FIXED_POINT
@@ -1697,9 +1816,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
ec_enc_shrink(&enc, nb_compr_bytes);
}
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
if (redundancy || st->mode != MODE_SILK_ONLY)
- celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info));
+ celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
#endif
/* 5 ms redundant frame for CELT->SILK */
@@ -1825,114 +1944,88 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
#ifdef FIXED_POINT
#ifndef DISABLE_FLOAT_API
-opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
unsigned char *data, opus_int32 max_data_bytes)
{
int i, ret;
+ int frame_size;
+ int delay_compensation;
VARDECL(opus_int16, in);
ALLOC_STACK;
- frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
- if(frame_size<0)
- {
- RESTORE_STACK;
- return OPUS_BAD_ARG;
- }
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ delay_compensation = 0;
+ else
+ delay_compensation = st->delay_compensation;
+ frame_size = compute_frame_size(pcm, analysis_frame_size,
+ st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+ delay_compensation, downmix_float, st->analysis.subframe_mem);
ALLOC(in, frame_size*st->channels, opus_int16);
for (i=0;i<frame_size*st->channels;i++)
in[i] = FLOAT2INT16(pcm[i]);
- ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16);
+ ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
RESTORE_STACK;
return ret;
}
#endif
-opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,
+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
unsigned char *data, opus_int32 out_data_bytes)
{
- frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
- if(frame_size<0)
- {
- return OPUS_BAD_ARG;
- }
- return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16);
+ int frame_size;
+ int delay_compensation;
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ delay_compensation = 0;
+ else
+ delay_compensation = st->delay_compensation;
+ frame_size = compute_frame_size(pcm, analysis_frame_size,
+ st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+ delay_compensation, downmix_float, st->analysis.subframe_mem);
+ return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
}
#else
-opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,
+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
unsigned char *data, opus_int32 max_data_bytes)
{
int i, ret;
- const CELTMode *celt_mode;
+ int frame_size;
int delay_compensation;
- int lsb_depth;
VARDECL(float, in);
- AnalysisInfo analysis_info;
ALLOC_STACK;
- opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));
if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
delay_compensation = 0;
else
delay_compensation = st->delay_compensation;
-
- lsb_depth = IMIN(16, st->lsb_depth);
-
- analysis_info.valid = 0;
- if (st->silk_mode.complexity >= 7 && st->Fs==48000)
- {
- frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,
- frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_int, &analysis_info);
- } else {
- frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
- }
- if(frame_size<0)
- {
- RESTORE_STACK;
- return OPUS_BAD_ARG;
- }
+ frame_size = compute_frame_size(pcm, analysis_frame_size,
+ st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+ delay_compensation, downmix_float, st->analysis.subframe_mem);
ALLOC(in, frame_size*st->channels, float);
for (i=0;i<frame_size*st->channels;i++)
in[i] = (1.0f/32768)*pcm[i];
- ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, &analysis_info);
+ ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
RESTORE_STACK;
return ret;
}
-opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
unsigned char *data, opus_int32 out_data_bytes)
{
- const CELTMode *celt_mode;
+ int frame_size;
int delay_compensation;
- int lsb_depth;
- AnalysisInfo analysis_info;
-
- opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));
if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
delay_compensation = 0;
else
delay_compensation = st->delay_compensation;
-
- lsb_depth = IMIN(24, st->lsb_depth);
-
- analysis_info.valid = 0;
- if (st->silk_mode.complexity >= 7 && st->Fs==48000)
- {
- frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,
- frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_float, &analysis_info);
- } else {
- frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
- }
- if(frame_size<0)
- {
- return OPUS_BAD_ARG;
- }
-
- return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, &analysis_info);
-
+ frame_size = compute_frame_size(pcm, analysis_frame_size,
+ st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+ delay_compensation, downmix_float, st->analysis.subframe_mem);
+ return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24,
+ pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
}
#endif
@@ -2349,20 +2442,10 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value));
}
break;
- case OPUS_SET_ENERGY_SAVE_REQUEST:
- {
- opus_val16 *value = va_arg(ap, opus_val16*);
- if (!value)
- {
- goto bad_arg;
- }
- ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_SAVE(value));
- }
- break;
case OPUS_SET_ENERGY_MASK_REQUEST:
{
opus_val16 *value = va_arg(ap, opus_val16*);
- st->energy_masking = (value!=NULL);
+ st->energy_masking = value;
ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value));
}
break;
diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c
index 4cddbffd..31447bf9 100644
--- a/src/opus_multistream_encoder.c
+++ b/src/opus_multistream_encoder.c
@@ -36,8 +36,11 @@
#include <stdarg.h>
#include "float_cast.h"
#include "os_support.h"
-#include "analysis.h"
#include "mathops.h"
+#include "mdct.h"
+#include "modes.h"
+#include "bands.h"
+#include "quant_bands.h"
typedef struct {
int nb_streams;
@@ -57,17 +60,65 @@ static const VorbisLayout vorbis_mappings[8] = {
{5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */
};
+typedef void (*opus_copy_channel_in_func)(
+ opus_val16 *dst,
+ int dst_stride,
+ const void *src,
+ int src_stride,
+ int src_channel,
+ int frame_size
+);
+
struct OpusMSEncoder {
- TonalityAnalysisState analysis;
ChannelLayout layout;
int lfe_stream;
+ int application;
int variable_duration;
int surround;
opus_int32 bitrate_bps;
opus_val32 subframe_mem[3];
/* Encoder states go here */
+ /* then opus_val32 window_mem[channels*120]; */
+ /* then opus_val32 preemph_mem[channels]; */
};
+static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st)
+{
+ int s;
+ char *ptr;
+ int coupled_size, mono_size;
+
+ coupled_size = opus_encoder_get_size(2);
+ mono_size = opus_encoder_get_size(1);
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ }
+ return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32));
+}
+
+static opus_val32 *ms_get_window_mem(OpusMSEncoder *st)
+{
+ int s;
+ char *ptr;
+ int coupled_size, mono_size;
+
+ coupled_size = opus_encoder_get_size(2);
+ mono_size = opus_encoder_get_size(1);
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ }
+ return (opus_val32*)ptr;
+}
static int validate_encoder_layout(const ChannelLayout *layout)
{
@@ -88,6 +139,206 @@ static int validate_encoder_layout(const ChannelLayout *layout)
return 1;
}
+static void channel_pos(int channels, int pos[8])
+{
+ /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */
+ if (channels==4)
+ {
+ pos[0]=1;
+ pos[1]=3;
+ pos[2]=1;
+ pos[3]=3;
+ } else if (channels==3||channels==5||channels==6)
+ {
+ pos[0]=1;
+ pos[1]=2;
+ pos[2]=3;
+ pos[3]=1;
+ pos[4]=3;
+ pos[5]=0;
+ } else if (channels==7)
+ {
+ pos[0]=1;
+ pos[1]=2;
+ pos[2]=3;
+ pos[3]=1;
+ pos[4]=3;
+ pos[5]=2;
+ pos[6]=0;
+ } else if (channels==8)
+ {
+ pos[0]=1;
+ pos[1]=2;
+ pos[2]=3;
+ pos[3]=1;
+ pos[4]=3;
+ pos[5]=1;
+ pos[6]=3;
+ pos[7]=0;
+ }
+}
+
+#if 1
+/* Computes a rough approximation of log2(2^a + 2^b) */
+static opus_val16 logSum(opus_val16 a, opus_val16 b)
+{
+ opus_val16 max;
+ opus_val32 diff;
+ opus_val16 frac;
+ static const opus_val16 diff_table[17] = {
+ QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT),
+ QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT),
+ QCONST16(0.0028123f, DB_SHIFT)
+ };
+ int low;
+ if (a>b)
+ {
+ max = a;
+ diff = SUB32(EXTEND32(a),EXTEND32(b));
+ } else {
+ max = b;
+ diff = SUB32(EXTEND32(b),EXTEND32(a));
+ }
+ if (diff >= QCONST16(8.f, DB_SHIFT))
+ return max;
+#ifdef FIXED_POINT
+ low = SHR32(diff, DB_SHIFT-1);
+ frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT);
+#else
+ low = floor(2*diff);
+ frac = 2*diff - low;
+#endif
+ return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low]));
+}
+#else
+opus_val16 logSum(opus_val16 a, opus_val16 b)
+{
+ return log2(pow(4, a)+ pow(4, b))/2;
+}
+#endif
+
+void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,
+ int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in
+)
+{
+ int c;
+ int i;
+ int LM;
+ int pos[8] = {0};
+ int upsample;
+ int frame_size;
+ opus_val32 bandE[21];
+ opus_val16 maskLogE[3][21];
+ VARDECL(opus_val32, in);
+ VARDECL(opus_val16, x);
+ VARDECL(opus_val32, out);
+ SAVE_STACK;
+
+ upsample = resampling_factor(rate);
+ frame_size = len*upsample;
+
+ for (LM=0;LM<=celt_mode->maxLM;LM++)
+ if (celt_mode->shortMdctSize<<LM==frame_size)
+ break;
+
+ ALLOC(in, frame_size+overlap, opus_val32);
+ ALLOC(x, len, opus_val16);
+ ALLOC(freq, frame_size, opus_val32);
+
+ channel_pos(channels, pos);
+
+ for (c=0;c<3;c++)
+ for (i=0;i<21;i++)
+ maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT);
+
+ for (c=0;c<channels;c++)
+ {
+ OPUS_COPY(in, mem+c*overlap, overlap);
+ (*copy_channel_in)(x, 1, pcm, channels, c, len);
+ preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);
+ clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
+ if (upsample != 1)
+ {
+ int bound = len;
+ for (i=0;i<bound;i++)
+ freq[i] *= upsample;
+ for (;i<frame_size;i++)
+ freq[i] = 0;
+ }
+
+ compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<<LM);
+ amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1);
+ /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */
+ for (i=1;i<21;i++)
+ bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i-1]-QCONST16(1.f, DB_SHIFT));
+ for (i=19;i>=0;i--)
+ bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT));
+ if (pos[c]==1)
+ {
+ for (i=0;i<21;i++)
+ maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]);
+ } else if (pos[c]==3)
+ {
+ for (i=0;i<21;i++)
+ maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]);
+ } else if (pos[c]==2)
+ {
+ for (i=0;i<21;i++)
+ {
+ maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));
+ maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));
+ }
+ }
+#if 0
+ for (i=0;i<21;i++)
+ printf("%f ", bandLogE[21*c+i]);
+//#else
+ float sum=0;
+ for (i=0;i<21;i++)
+ sum += bandLogE[21*c+i];
+ printf("%f ", sum/21);
+#endif
+ OPUS_COPY(mem+c*overlap, in+frame_size, overlap);
+ }
+ for (i=0;i<21;i++)
+ maskLogE[1][i] = MIN32(maskLogE[0][i],maskLogE[2][i]);
+ for (c=0;c<3;c++)
+ for (i=0;i<21;i++)
+ maskLogE[c][i] += QCONST16(.5f, DB_SHIFT)*log2(2.f/(channels-1));
+#if 0
+ for (c=0;c<3;c++)
+ {
+ for (i=0;i<21;i++)
+ printf("%f ", maskLogE[c][i]);
+ }
+#endif
+ for (c=0;c<channels;c++)
+ {
+ opus_val16 *mask;
+ if (pos[c]!=0)
+ {
+ mask = &maskLogE[pos[c]-1][0];
+ for (i=0;i<21;i++)
+ bandLogE[21*c+i] = bandLogE[21*c+i] - mask[i];
+ } else {
+ for (i=0;i<21;i++)
+ bandLogE[21*c+i] = 0;
+ }
+#if 0
+ for (i=0;i<21;i++)
+ printf("%f ", bandLogE[21*c+i]);
+ printf("\n");
+#endif
+#if 0
+ float sum=0;
+ for (i=0;i<21;i++)
+ sum += bandLogE[21*c+i];
+ printf("%f ", sum/21);
+ printf("\n");
+#endif
+ }
+ RESTORE_STACK;
+}
opus_int32 opus_multistream_encoder_get_size(int nb_streams, int nb_coupled_streams)
{
@@ -132,7 +383,9 @@ opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_
return 0;
size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams);
if (channels>2)
- size += align(opus_encoder_get_size(2));
+ {
+ size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32));
+ }
return size;
}
@@ -161,10 +414,10 @@ static int opus_multistream_encoder_init_impl(
st->layout.nb_streams = streams;
st->layout.nb_coupled_streams = coupled_streams;
st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0;
- OPUS_CLEAR(&st->analysis,1);
if (!surround)
st->lfe_stream = -1;
st->bitrate_bps = OPUS_AUTO;
+ st->application = application;
st->variable_duration = OPUS_FRAMESIZE_ARG;
for (i=0;i<st->layout.nb_channels;i++)
st->layout.mapping[i] = mapping[i];
@@ -192,10 +445,8 @@ static int opus_multistream_encoder_init_impl(
}
if (surround)
{
- OpusEncoder *downmix_enc;
- downmix_enc = (OpusEncoder*)ptr;
- ret = opus_encoder_init(downmix_enc, Fs, 2, OPUS_APPLICATION_AUDIO);
- if(ret!=OPUS_OK)return ret;
+ OPUS_CLEAR(ms_get_preemph_mem(st), channels);
+ OPUS_CLEAR(ms_get_window_mem(st), channels*120);
}
st->surround = surround;
return OPUS_OK;
@@ -339,22 +590,6 @@ OpusMSEncoder *opus_multistream_surround_encoder_create(
return st;
}
-typedef void (*opus_copy_channel_in_func)(
- opus_val16 *dst,
- int dst_stride,
- const void *src,
- int src_stride,
- int src_channel,
- int frame_size
-);
-
-typedef void (*opus_surround_downmix_funct)(
- opus_val16 *dst,
- const void *src,
- int channels,
- int frame_size
-);
-
static void surround_rate_allocation(
OpusMSEncoder *st,
opus_int32 *rate,
@@ -433,15 +668,11 @@ static int opus_multistream_encode_native
OpusMSEncoder *st,
opus_copy_channel_in_func copy_channel_in,
const void *pcm,
- int frame_size,
+ int analysis_frame_size,
unsigned char *data,
opus_int32 max_data_bytes,
int lsb_depth,
- opus_surround_downmix_funct surround_downmix
-#ifndef FIXED_POINT
- , downmix_func downmix
- , const void *pcm_analysis
-#endif
+ downmix_func downmix
)
{
opus_int32 Fs;
@@ -451,31 +682,29 @@ static int opus_multistream_encode_native
char *ptr;
int tot_size;
VARDECL(opus_val16, buf);
+ VARDECL(opus_val16, bandSMR);
unsigned char tmp_data[MS_FRAME_TMP];
OpusRepacketizer rp;
opus_int32 complexity;
-#ifndef FIXED_POINT
- AnalysisInfo analysis_info;
-#endif
const CELTMode *celt_mode;
opus_int32 bitrates[256];
opus_val16 bandLogE[42];
- opus_val16 bandLogE_mono[21];
+ opus_val32 *mem = NULL;
+ opus_val32 *preemph_mem=NULL;
+ int frame_size;
ALLOC_STACK;
+ if (st->surround)
+ {
+ preemph_mem = ms_get_preemph_mem(st);
+ mem = ms_get_window_mem(st);
+ }
+
ptr = (char*)st + align(sizeof(OpusMSEncoder));
opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_COMPLEXITY(&complexity));
opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode));
- if (400*frame_size < Fs)
- {
- RESTORE_STACK;
- return OPUS_BAD_ARG;
- }
-#ifndef FIXED_POINT
- analysis_info.valid = 0;
- if (complexity >= 7 && Fs==48000)
{
opus_int32 delay_compensation;
int channels;
@@ -483,13 +712,15 @@ static int opus_multistream_encode_native
channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation));
delay_compensation -= Fs/400;
+ frame_size = compute_frame_size(pcm, analysis_frame_size,
+ st->variable_duration, channels, Fs, st->bitrate_bps,
+ delay_compensation, downmix, st->subframe_mem);
+ }
- frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm_analysis,
- frame_size, st->variable_duration, channels, Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix, &analysis_info);
- } else
-#endif
+ if (400*frame_size < Fs)
{
- frame_size = frame_size_select(frame_size, st->variable_duration, Fs);
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
}
/* Validate frame_size before using it to allocate stack space.
This mirrors the checks in opus_encode[_float](). */
@@ -504,42 +735,10 @@ static int opus_multistream_encode_native
coupled_size = opus_encoder_get_size(2);
mono_size = opus_encoder_get_size(1);
+ ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
if (st->surround)
{
- int i;
- unsigned char dummy[512];
- /* Temporary kludge -- remove */
- OpusEncoder *downmix_enc;
-
- ptr = (char*)st + align(sizeof(OpusMSEncoder));
- for (s=0;s<st->layout.nb_streams;s++)
- {
- if (s < st->layout.nb_coupled_streams)
- ptr += align(coupled_size);
- else
- ptr += align(mono_size);
- }
- downmix_enc = (OpusEncoder*)ptr;
- surround_downmix(buf, pcm, st->layout.nb_channels, frame_size);
- opus_encoder_ctl(downmix_enc, OPUS_SET_ENERGY_SAVE(bandLogE));
- opus_encoder_ctl(downmix_enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
- opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
- opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_CHANNELS(2));
- opus_encode_native(downmix_enc, buf, frame_size, dummy, 512, lsb_depth
-#ifndef FIXED_POINT
- , &analysis_info
-#endif
- );
- /* Combines the left and right mask into a centre mask. We
- use an approximation for the log of the sum of the energies. */
- for(i=0;i<21;i++)
- {
- opus_val16 diff;
- diff = ABS16(SUB16(bandLogE[i], bandLogE[21+i]));
- diff = diff + HALF16(diff);
- diff = SHR32(HALF32(celt_exp2(-diff)), 16-DB_SHIFT);
- bandLogE_mono[i] = MAX16(bandLogE[i], bandLogE[21+i]) + diff;
- }
+ surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);
}
if (max_data_bytes < 4*st->layout.nb_streams-1)
@@ -563,10 +762,24 @@ static int opus_multistream_encode_native
opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s]));
if (st->surround)
{
- opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
- opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
+ opus_int32 equiv_rate;
+ equiv_rate = st->bitrate_bps;
+ if (frame_size*50 < Fs)
+ equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels;
+ if (equiv_rate > 112000)
+ opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
+ else if (equiv_rate > 76000)
+ opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND));
+ else if (equiv_rate > 48000)
+ opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND));
+ else
+ opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND));
if (s < st->layout.nb_coupled_streams)
+ {
+ /* To preserve the spatial image, force stereo CELT on coupled streams */
+ opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2));
+ }
}
}
@@ -578,11 +791,13 @@ static int opus_multistream_encode_native
OpusEncoder *enc;
int len;
int curr_max;
+ int c1, c2;
opus_repacketizer_init(&rp);
enc = (OpusEncoder*)ptr;
if (s < st->layout.nb_coupled_streams)
{
+ int i;
int left, right;
left = get_left_channel(&st->layout, s, -1);
right = get_right_channel(&st->layout, s, -1);
@@ -591,28 +806,39 @@ static int opus_multistream_encode_native
(*copy_channel_in)(buf+1, 2,
pcm, st->layout.nb_channels, right, frame_size);
ptr += align(coupled_size);
- /* FIXME: This isn't correct for the coupled center channels in
- 6.1 surround configuration */
if (st->surround)
- opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
+ {
+ for (i=0;i<21;i++)
+ {
+ bandLogE[i] = bandSMR[21*left+i];
+ bandLogE[21+i] = bandSMR[21*right+i];
+ }
+ }
+ c1 = left;
+ c2 = right;
} else {
+ int i;
int chan = get_mono_channel(&st->layout, s, -1);
(*copy_channel_in)(buf, 1,
pcm, st->layout.nb_channels, chan, frame_size);
ptr += align(mono_size);
if (st->surround)
- opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE_mono));
+ {
+ for (i=0;i<21;i++)
+ bandLogE[i] = bandSMR[21*chan+i];
+ }
+ c1 = chan;
+ c2 = -1;
}
+ if (st->surround)
+ opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
/* number of bytes left (+Toc) */
curr_max = max_data_bytes - tot_size;
/* Reserve three bytes for the last stream and four for the others */
curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);
curr_max = IMIN(curr_max,MS_FRAME_TMP);
- len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth
-#ifndef FIXED_POINT
- , &analysis_info
-#endif
- );
+ len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth,
+ pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix);
if (len<0)
{
RESTORE_STACK;
@@ -626,50 +852,12 @@ static int opus_multistream_encode_native
data += len;
tot_size += len;
}
+ /*printf("\n");*/
RESTORE_STACK;
return tot_size;
}
-static void channel_pos(int channels, int pos[8])
-{
- /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */
- if (channels==4)
- {
- pos[0]=1;
- pos[1]=3;
- pos[2]=1;
- pos[3]=3;
- } else if (channels==3||channels==5||channels==6)
- {
- pos[0]=1;
- pos[1]=2;
- pos[2]=3;
- pos[3]=1;
- pos[4]=3;
- pos[5]=0;
- } else if (channels==7)
- {
- pos[0]=1;
- pos[1]=2;
- pos[2]=3;
- pos[3]=1;
- pos[4]=3;
- pos[5]=2;
- pos[6]=0;
- } else if (channels==8)
- {
- pos[0]=1;
- pos[1]=2;
- pos[2]=3;
- pos[3]=1;
- pos[4]=3;
- pos[5]=1;
- pos[6]=3;
- pos[7]=0;
- }
-}
-
#if !defined(DISABLE_FLOAT_API)
static void opus_copy_channel_in_float(
opus_val16 *dst,
@@ -690,57 +878,6 @@ static void opus_copy_channel_in_float(
dst[i*dst_stride] = float_src[i*src_stride+src_channel];
#endif
}
-
-static void opus_surround_downmix_float(
- opus_val16 *dst,
- const void *src,
- int channels,
- int frame_size
-)
-{
- const float *float_src;
- opus_int32 i;
- int pos[8] = {0};
- int c;
- float_src = (const float *)src;
-
- channel_pos(channels, pos);
- for (i=0;i<2*frame_size;i++)
- dst[i]=0;
-
- for (c=0;c<channels;c++)
- {
- if (pos[c]==1)
- {
- for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
- dst[2*i] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3);
-#else
- dst[2*i] += float_src[i*channels+c];
-#endif
- } else if (pos[c]==3)
- {
- for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
- dst[2*i+1] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3);
-#else
- dst[2*i+1] += float_src[i*channels+c];
-#endif
- } else if (pos[c]==2)
- {
- for (i=0;i<frame_size;i++)
- {
-#if defined(FIXED_POINT)
- dst[2*i] += SHR32(MULT16_16(QCONST16(.70711f,15), FLOAT2INT16(float_src[i*channels+c])),3+15);
- dst[2*i+1] += SHR32(MULT16_16(QCONST16(.70711f,15), FLOAT2INT16(float_src[i*channels+c])),3+15);
-#else
- dst[2*i] += .707f*float_src[i*channels+c];
- dst[2*i+1] += .707f*float_src[i*channels+c];
-#endif
- }
- }
- }
-}
#endif
static void opus_copy_channel_in_short(
@@ -763,57 +900,6 @@ static void opus_copy_channel_in_short(
#endif
}
-static void opus_surround_downmix_short(
- opus_val16 *dst,
- const void *src,
- int channels,
- int frame_size
-)
-{
- const opus_int16 *short_src;
- opus_int32 i;
- int pos[8] = {0};
- int c;
- short_src = (const opus_int16 *)src;
-
- channel_pos(channels, pos);
- for (i=0;i<2*frame_size;i++)
- dst[i]=0;
-
- for (c=0;c<channels;c++)
- {
- if (pos[c]==1)
- {
- for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
- dst[2*i] += SHR16(short_src[i*channels+c],3);
-#else
- dst[2*i] += (1/32768.f)*short_src[i*channels+c];
-#endif
- } else if (pos[c]==3)
- {
- for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
- dst[2*i+1] += SHR16(short_src[i*channels+c],3);
-#else
- dst[2*i+1] += (1/32768.f)*short_src[i*channels+c];
-#endif
- } else if (pos[c]==2)
- {
- for (i=0;i<frame_size;i++)
- {
-#if defined(FIXED_POINT)
- dst[2*i] += SHR32(MULT16_16(QCONST16(.70711f,15), short_src[i*channels+c]),3+15);
- dst[2*i+1] += SHR32(MULT16_16(QCONST16(.70711f,15), short_src[i*channels+c]),3+15);
-#else
- dst[2*i] += (.707f/32768.f)*short_src[i*channels+c];
- dst[2*i+1] += (.707f/32768.f)*short_src[i*channels+c];
-#endif
- }
- }
- }
-}
-
#ifdef FIXED_POINT
int opus_multistream_encode(
@@ -825,7 +911,7 @@ int opus_multistream_encode(
)
{
return opus_multistream_encode_native(st, opus_copy_channel_in_short,
- pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short);
+ pcm, frame_size, data, max_data_bytes, 16, downmix_int);
}
#ifndef DISABLE_FLOAT_API
@@ -838,7 +924,7 @@ int opus_multistream_encode_float(
)
{
return opus_multistream_encode_native(st, opus_copy_channel_in_float,
- pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_float);
+ pcm, frame_size, data, max_data_bytes, 16, downmix_float);
}
#endif
@@ -853,9 +939,8 @@ int opus_multistream_encode_float
opus_int32 max_data_bytes
)
{
- int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
return opus_multistream_encode_native(st, opus_copy_channel_in_float,
- pcm, frame_size, data, max_data_bytes, 24, opus_surround_downmix_float, downmix_float, pcm+channels*st->analysis.analysis_offset);
+ pcm, frame_size, data, max_data_bytes, 24, downmix_float);
}
int opus_multistream_encode(
@@ -866,9 +951,8 @@ int opus_multistream_encode(
opus_int32 max_data_bytes
)
{
- int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
return opus_multistream_encode_native(st, opus_copy_channel_in_short,
- pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short, downmix_int, pcm+channels*st->analysis.analysis_offset);
+ pcm, frame_size, data, max_data_bytes, 16, downmix_int);
}
#endif
diff --git a/src/opus_private.h b/src/opus_private.h
index 9d8210b5..0e739ebb 100644
--- a/src/opus_private.h
+++ b/src/opus_private.h
@@ -82,9 +82,9 @@ int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
#define OPUS_SET_FORCE_MODE_REQUEST 11002
#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)
-typedef void (*downmix_func)(const void *, float *, int, int, int);
-void downmix_float(const void *_x, float *sub, int subframe, int offset, int C);
-void downmix_int(const void *_x, float *sub, int subframe, int offset, int C);
+typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,
@@ -94,12 +94,13 @@ int encode_size(int size, unsigned char *data);
opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);
+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
+ int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+ int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem);
+
opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
- unsigned char *data, opus_int32 out_data_bytes, int lsb_depth
-#ifndef FIXED_POINT
- , AnalysisInfo *analysis_info
-#endif
- );
+ unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
+ const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix);
int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
diff --git a/src/tansig_table.h b/src/tansig_table.h
index 885ea3e8..c76f844a 100644
--- a/src/tansig_table.h
+++ b/src/tansig_table.h
@@ -1,6 +1,6 @@
/* This file is auto-generated by gen_tables */
-static const opus_val16 tansig_table[201] = {
+static const float tansig_table[201] = {
0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,