From fdceae89bff3bbdcb8cb720ef6f6c735da1aae7b Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Fri, 30 Aug 2013 21:58:02 -0400 Subject: Surround masking rewrite The channel energy is now computed using an MDCT in the multi-stream encoder rather than computing the energy of the stereo downmix. --- celt/celt.h | 8 +- celt/celt_encoder.c | 30 +-- src/opus_encoder.c | 10 - src/opus_multistream_encoder.c | 454 ++++++++++++++++++++++------------------- 4 files changed, 254 insertions(+), 248 deletions(-) diff --git a/celt/celt.h b/celt/celt.h index 0911c72f..1c504d19 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -109,10 +109,7 @@ typedef struct { #define OPUS_SET_LFE_REQUEST 10024 #define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x) -#define OPUS_SET_ENERGY_SAVE_REQUEST 10026 -#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x) - -#define OPUS_SET_ENERGY_MASK_REQUEST 10028 +#define OPUS_SET_ENERGY_MASK_REQUEST 10026 #define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x) /* Encoder stuff */ @@ -193,6 +190,9 @@ extern const signed char tf_select_table[4][8]; int resampling_factor(opus_int32 rate); +void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, + int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip); + void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, const opus_val16 *window, int overlap); diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index d4b84ffd..72ef4f36 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -111,7 +111,6 @@ struct OpusCustomEncoder { opus_val32 overlap_max; opus_val16 stereo_saving; int intensity; - opus_val16 *energy_save; opus_val16 *energy_mask; opus_val16 spec_avg; @@ -452,7 +451,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS } -static void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, +void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip) { int i; @@ -1526,35 +1525,18 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } } amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); - if (st->energy_save) - { - opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; -#ifdef FIXED_POINT - /* Compensate for the 1/8 gain we apply in the fixed-point downshift to avoid overflows. */ - offset -= QCONST16(3.0f, DB_SHIFT); -#endif - for(i=0;ienergy_save[i]=bandLogE[i]-offset; - st->energy_save=NULL; - } /* This computes how much masking takes place between surround channels */ if (st->energy_mask&&!st->lfe) { opus_val32 mask_avg=0; - opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; for (c=0;cend;i++) { - /* We use a simple follower to approximate the masking spreading function. */ - followE = MAX16(followE-QCONST16(1.f, DB_SHIFT), bandLogE[nbEBands*c+i]-offset); - followMask = MAX16(followMask-QCONST16(1.f, DB_SHIFT), st->energy_mask[nbEBands*c+i]); - mask_avg += followE-followMask; + mask_avg += st->energy_mask[nbEBands*c+i]; } } - surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.7f, DB_SHIFT); + surround_masking = DIV32_16(mask_avg,C*st->end); surround_masking = MIN16(MAX16(surround_masking, -QCONST16(2.f, DB_SHIFT)), QCONST16(.2f, DB_SHIFT)); surround_masking -= HALF16(HALF16(surround_masking)); } @@ -2261,12 +2243,6 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) st->lfe = value; } break; - case OPUS_SET_ENERGY_SAVE_REQUEST: - { - opus_val16 *value = va_arg(ap, opus_val16*); - st->energy_save=value; - } - break; case OPUS_SET_ENERGY_MASK_REQUEST: { opus_val16 *value = va_arg(ap, opus_val16*); diff --git a/src/opus_encoder.c b/src/opus_encoder.c index ac40edac..15333630 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -2349,16 +2349,6 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); } break; - case OPUS_SET_ENERGY_SAVE_REQUEST: - { - opus_val16 *value = va_arg(ap, opus_val16*); - if (!value) - { - goto bad_arg; - } - ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_SAVE(value)); - } - break; case OPUS_SET_ENERGY_MASK_REQUEST: { opus_val16 *value = va_arg(ap, opus_val16*); diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 4cddbffd..bc86489d 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -38,6 +38,10 @@ #include "os_support.h" #include "analysis.h" #include "mathops.h" +#include "mdct.h" +#include "modes.h" +#include "bands.h" +#include "quant_bands.h" typedef struct { int nb_streams; @@ -57,6 +61,15 @@ static const VorbisLayout vorbis_mappings[8] = { {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */ }; +typedef void (*opus_copy_channel_in_func)( + opus_val16 *dst, + int dst_stride, + const void *src, + int src_stride, + int src_channel, + int frame_size +); + struct OpusMSEncoder { TonalityAnalysisState analysis; ChannelLayout layout; @@ -66,8 +79,47 @@ struct OpusMSEncoder { opus_int32 bitrate_bps; opus_val32 subframe_mem[3]; /* Encoder states go here */ + /* then opus_val32 window_mem[channels*120]; */ + /* then opus_val32 preemph_mem[channels]; */ }; +static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st) +{ + int s; + char *ptr; + int coupled_size, mono_size; + + coupled_size = opus_encoder_get_size(2); + mono_size = opus_encoder_get_size(1); + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + for (s=0;slayout.nb_streams;s++) + { + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + } + return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32)); +} + +static opus_val32 *ms_get_window_mem(OpusMSEncoder *st) +{ + int s; + char *ptr; + int coupled_size, mono_size; + + coupled_size = opus_encoder_get_size(2); + mono_size = opus_encoder_get_size(1); + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + for (s=0;slayout.nb_streams;s++) + { + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + } + return (opus_val32*)ptr; +} static int validate_encoder_layout(const ChannelLayout *layout) { @@ -88,6 +140,164 @@ static int validate_encoder_layout(const ChannelLayout *layout) return 1; } +static void channel_pos(int channels, int pos[8]) +{ + /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */ + if (channels==4) + { + pos[0]=1; + pos[1]=3; + pos[2]=1; + pos[3]=3; + } else if (channels==3||channels==5||channels==6) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=0; + } else if (channels==7) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=2; + pos[6]=0; + } else if (channels==8) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=1; + pos[6]=3; + pos[7]=0; + } +} + +void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem, + int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in +) +{ + int c; + int i; + /* FIXME: pass LM properly */ + int LM=3; + int pos[8] = {0}; + int upsample; + opus_val32 bandE[21]; + opus_val32 maskE[3][21]; + opus_val16 maskLogE[3][21]; + VARDECL(opus_val32, in); + VARDECL(opus_val16, x); + VARDECL(opus_val32, out); + SAVE_STACK; + ALLOC(in, len+overlap, opus_val32); + ALLOC(x, len, opus_val16); + ALLOC(freq, len, opus_val32); + + channel_pos(channels, pos); + + for (c=0;c<2;c++) + for (i=0;i<21;i++) + maskE[c][i] = 0; + + upsample = resampling_factor(rate); + for (c=0;cpreemph, preemph_mem+c, 0); + clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1); + if (upsample != 1) + { + int bound = len/upsample; + for (i=0;i2) - size += align(opus_encoder_get_size(2)); + { + size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32)); + } return size; } @@ -192,10 +404,8 @@ static int opus_multistream_encoder_init_impl( } if (surround) { - OpusEncoder *downmix_enc; - downmix_enc = (OpusEncoder*)ptr; - ret = opus_encoder_init(downmix_enc, Fs, 2, OPUS_APPLICATION_AUDIO); - if(ret!=OPUS_OK)return ret; + OPUS_CLEAR(ms_get_preemph_mem(st), channels); + OPUS_CLEAR(ms_get_window_mem(st), channels*120); } st->surround = surround; return OPUS_OK; @@ -339,22 +549,6 @@ OpusMSEncoder *opus_multistream_surround_encoder_create( return st; } -typedef void (*opus_copy_channel_in_func)( - opus_val16 *dst, - int dst_stride, - const void *src, - int src_stride, - int src_channel, - int frame_size -); - -typedef void (*opus_surround_downmix_funct)( - opus_val16 *dst, - const void *src, - int channels, - int frame_size -); - static void surround_rate_allocation( OpusMSEncoder *st, opus_int32 *rate, @@ -436,8 +630,7 @@ static int opus_multistream_encode_native int frame_size, unsigned char *data, opus_int32 max_data_bytes, - int lsb_depth, - opus_surround_downmix_funct surround_downmix + int lsb_depth #ifndef FIXED_POINT , downmix_func downmix , const void *pcm_analysis @@ -451,6 +644,7 @@ static int opus_multistream_encode_native char *ptr; int tot_size; VARDECL(opus_val16, buf); + VARDECL(opus_val16, bandSMR); unsigned char tmp_data[MS_FRAME_TMP]; OpusRepacketizer rp; opus_int32 complexity; @@ -460,9 +654,16 @@ static int opus_multistream_encode_native const CELTMode *celt_mode; opus_int32 bitrates[256]; opus_val16 bandLogE[42]; - opus_val16 bandLogE_mono[21]; + opus_val32 *mem = NULL; + opus_val32 *preemph_mem=NULL; ALLOC_STACK; + if (st->surround) + { + preemph_mem = ms_get_preemph_mem(st); + mem = ms_get_window_mem(st); + } + ptr = (char*)st + align(sizeof(OpusMSEncoder)); opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_COMPLEXITY(&complexity)); @@ -504,42 +705,10 @@ static int opus_multistream_encode_native coupled_size = opus_encoder_get_size(2); mono_size = opus_encoder_get_size(1); + ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16); if (st->surround) { - int i; - unsigned char dummy[512]; - /* Temporary kludge -- remove */ - OpusEncoder *downmix_enc; - - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - for (s=0;slayout.nb_streams;s++) - { - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - } - downmix_enc = (OpusEncoder*)ptr; - surround_downmix(buf, pcm, st->layout.nb_channels, frame_size); - opus_encoder_ctl(downmix_enc, OPUS_SET_ENERGY_SAVE(bandLogE)); - opus_encoder_ctl(downmix_enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); - opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); - opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_CHANNELS(2)); - opus_encode_native(downmix_enc, buf, frame_size, dummy, 512, lsb_depth -#ifndef FIXED_POINT - , &analysis_info -#endif - ); - /* Combines the left and right mask into a centre mask. We - use an approximation for the log of the sum of the energies. */ - for(i=0;i<21;i++) - { - opus_val16 diff; - diff = ABS16(SUB16(bandLogE[i], bandLogE[21+i])); - diff = diff + HALF16(diff); - diff = SHR32(HALF32(celt_exp2(-diff)), 16-DB_SHIFT); - bandLogE_mono[i] = MAX16(bandLogE[i], bandLogE[21+i]) + diff; - } + surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in); } if (max_data_bytes < 4*st->layout.nb_streams-1) @@ -583,6 +752,7 @@ static int opus_multistream_encode_native enc = (OpusEncoder*)ptr; if (s < st->layout.nb_coupled_streams) { + int i; int left, right; left = get_left_channel(&st->layout, s, -1); right = get_right_channel(&st->layout, s, -1); @@ -591,18 +761,28 @@ static int opus_multistream_encode_native (*copy_channel_in)(buf+1, 2, pcm, st->layout.nb_channels, right, frame_size); ptr += align(coupled_size); - /* FIXME: This isn't correct for the coupled center channels in - 6.1 surround configuration */ if (st->surround) - opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); + { + for (i=0;i<21;i++) + { + bandLogE[i] = bandSMR[21*left+i]; + bandLogE[21+i] = bandSMR[21*right+i]; + } + } } else { + int i; int chan = get_mono_channel(&st->layout, s, -1); (*copy_channel_in)(buf, 1, pcm, st->layout.nb_channels, chan, frame_size); ptr += align(mono_size); if (st->surround) - opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE_mono)); + { + for (i=0;i<21;i++) + bandLogE[i] = bandSMR[21*chan+i]; + } } + if (st->surround) + opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); /* number of bytes left (+Toc) */ curr_max = max_data_bytes - tot_size; /* Reserve three bytes for the last stream and four for the others */ @@ -626,50 +806,12 @@ static int opus_multistream_encode_native data += len; tot_size += len; } + /*printf("\n");*/ RESTORE_STACK; return tot_size; } -static void channel_pos(int channels, int pos[8]) -{ - /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */ - if (channels==4) - { - pos[0]=1; - pos[1]=3; - pos[2]=1; - pos[3]=3; - } else if (channels==3||channels==5||channels==6) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=0; - } else if (channels==7) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=2; - pos[6]=0; - } else if (channels==8) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=1; - pos[6]=3; - pos[7]=0; - } -} - #if !defined(DISABLE_FLOAT_API) static void opus_copy_channel_in_float( opus_val16 *dst, @@ -690,57 +832,6 @@ static void opus_copy_channel_in_float( dst[i*dst_stride] = float_src[i*src_stride+src_channel]; #endif } - -static void opus_surround_downmix_float( - opus_val16 *dst, - const void *src, - int channels, - int frame_size -) -{ - const float *float_src; - opus_int32 i; - int pos[8] = {0}; - int c; - float_src = (const float *)src; - - channel_pos(channels, pos); - for (i=0;i<2*frame_size;i++) - dst[i]=0; - - for (c=0;clayout.nb_streams + st->layout.nb_coupled_streams; return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 24, opus_surround_downmix_float, downmix_float, pcm+channels*st->analysis.analysis_offset); + pcm, frame_size, data, max_data_bytes, 24, downmix_float, pcm+channels*st->analysis.analysis_offset); } int opus_multistream_encode( @@ -868,7 +908,7 @@ int opus_multistream_encode( { int channels = st->layout.nb_streams + st->layout.nb_coupled_streams; return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short, downmix_int, pcm+channels*st->analysis.analysis_offset); + pcm, frame_size, data, max_data_bytes, 16, downmix_int, pcm+channels*st->analysis.analysis_offset); } #endif -- cgit v1.2.3 From a714ae98a849d03ab9948720a65b9ec57f1c9523 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sat, 31 Aug 2013 02:05:32 -0400 Subject: Makes surround_analysis() work in fixed-point --- src/opus_multistream_encoder.c | 65 +++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index bc86489d..7cc3237e 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -179,6 +179,45 @@ static void channel_pos(int channels, int pos[8]) } } +#if 1 +/* Computes a rough approximation of log2(2^a + 2^b) */ +static opus_val16 logSum(opus_val16 a, opus_val16 b) +{ + opus_val16 max; + opus_val32 diff; + opus_val16 frac; + static const opus_val16 diff_table[17] = { + QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT), + QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT), + QCONST16(0.0028123f, DB_SHIFT) + }; + int low; + if (a>b) + { + max = a; + diff = SUB32(EXTEND32(a),EXTEND32(b)); + } else { + max = b; + diff = SUB32(EXTEND32(b),EXTEND32(a)); + } + if (diff >= QCONST16(8.f, DB_SHIFT)) + return max; +#ifdef FIXED_POINT + low = SHR32(diff, DB_SHIFT-1); + frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT); +#else + low = floor(2*diff); + frac = 2*diff - low; +#endif + return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low])); +} +#else +opus_val16 logSum(opus_val16 a, opus_val16 b) +{ + return log2(pow(4, a)+ pow(4, b))/2; +} +#endif + void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem, int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in ) @@ -190,7 +229,6 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b int pos[8] = {0}; int upsample; opus_val32 bandE[21]; - opus_val32 maskE[3][21]; opus_val16 maskLogE[3][21]; VARDECL(opus_val32, in); VARDECL(opus_val16, x); @@ -202,9 +240,9 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b channel_pos(channels, pos); - for (c=0;c<2;c++) + for (c=0;c<3;c++) for (i=0;i<21;i++) - maskE[c][i] = 0; + maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT); upsample = resampling_factor(rate); for (c=0;c Date: Thu, 5 Sep 2013 12:49:55 -0400 Subject: Adds trim and dynalloc control to surround masking --- celt/celt_encoder.c | 45 +++++++++++++++++++++++++++++++++++------- src/opus_multistream_encoder.c | 5 +++++ 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 72ef4f36..2bc8fee6 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -743,7 +743,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, const opus_val16 *bandLogE, int end, int LM, int C, int N0, AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate, - int intensity) + int intensity, opus_val16 surround_trim) { int i; opus_val32 diff=0; @@ -817,6 +817,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, if (diff < -QCONST16(10.f, DB_SHIFT)) trim_index++; trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); + trim -= SHR16(surround_trim, DB_SHIFT-8); trim -= 2*SHR16(tf_estimate, 14-8); #ifndef FIXED_POINT if (analysis->valid) @@ -876,7 +877,7 @@ static int stereo_analysis(const CELTMode *m, const celt_norm *X, static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, - int effectiveBytes, opus_int32 *tot_boost_, int lfe) + int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc) { int i, c; opus_int32 tot_boost=0; @@ -939,6 +940,8 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 follower[i] = MAX16(0, bandLogE[i]-follower[i]); } } + for (i=start;imode; @@ -1525,20 +1530,46 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } } amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); + + ALLOC(surround_dynalloc, C*nbEBands, opus_val16); + for(i=0;iend;i++) + surround_dynalloc[i] = 0; /* This computes how much masking takes place between surround channels */ if (st->energy_mask&&!st->lfe) { opus_val32 mask_avg=0; + opus_val32 diff=0; for (c=0;cend;i++) { mask_avg += st->energy_mask[nbEBands*c+i]; + diff += st->energy_mask[i+c*nbEBands]*(opus_int32)(1+2*i-st->end); + } + } + mask_avg = DIV32_16(mask_avg,C*st->end); + mask_avg = MAX16(mask_avg, -QCONST16(2.f, DB_SHIFT)); + diff = diff*6/(C*(st->end-1)*(st->end+1)*st->end); + diff = MAX32(MIN32(diff, QCONST32(.05f, DB_SHIFT)), -QCONST32(.05f, DB_SHIFT)); + for(i=0;iend;i++) + { + opus_val32 lin; + opus_val16 unmask; + lin = mask_avg + HALF32(diff*(1+2*i-st->end)); + if (C==2) + unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]) - lin; + else + unmask = st->energy_mask[i] - lin; + if (unmask > QCONST16(.25f, DB_SHIFT)) + { + surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT); } } - surround_masking = DIV32_16(mask_avg,C*st->end); - surround_masking = MIN16(MAX16(surround_masking, -QCONST16(2.f, DB_SHIFT)), QCONST16(.2f, DB_SHIFT)); - surround_masking -= HALF16(HALF16(surround_masking)); + /* Convert to 1/64th units used for the trim */ + surround_trim = 64*diff; + /*printf("%d %d ", mask_avg, surround_trim);*/ + surround_masking = mask_avg; + surround_masking = MIN16(MAX16(surround_masking, -QCONST16(2.f, DB_SHIFT)), QCONST16(.0f, DB_SHIFT)); } /* Temporal VBR (but not for LFE) */ if (!st->lfe) @@ -1665,7 +1696,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets, st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, - eBands, LM, effectiveBytes, &tot_boost, st->lfe); + eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc); /* For LFE, everything interesting is in the first band */ if (st->lfe) offsets[0] = IMIN(8, effectiveBytes/3); @@ -1738,7 +1769,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, alloc_trim = 5; else alloc_trim = alloc_trim_analysis(mode, X, bandLogE, - st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity); + st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim); ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); tell = ec_tell_frac(enc); } diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 7cc3237e..2e29869f 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -263,6 +263,11 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<=0;i--) + bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT)); if (pos[c]==1) { for (i=0;i<21;i++) -- cgit v1.2.3 From 91904a4c91eb19abcebe843045f4599ee67adbee Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Thu, 5 Sep 2013 21:34:43 -0400 Subject: Moves the analysis back to opus_encode_native() to it runs for all streams. I checked that it's not *completely* busted, but it probably needs more testing. --- src/analysis.c | 18 ++-- src/analysis.h | 6 +- src/opus_encoder.c | 192 ++++++++++++++++++----------------------- src/opus_multistream_encoder.c | 52 +++-------- src/opus_private.h | 13 ++- 5 files changed, 113 insertions(+), 168 deletions(-) diff --git a/src/analysis.c b/src/analysis.c index a9d2073f..f4373150 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -184,12 +184,12 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int for (;ipspeech[i]; psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; - /*printf("%f %f\n", psum, info_out->music_prob);*/ + /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/ info_out->music_prob = psum; } -void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix) +void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) { int i, b; const kiss_fft_state *kfft; @@ -234,7 +234,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con kfft = celt_mode->mdct.kfft[0]; if (tonal->count==0) tonal->mem_fill = 240; - downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C); + downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C); if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE) { tonal->mem_fill += len; @@ -260,7 +260,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con } OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); - downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C); + downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); tonal->mem_fill = 240 + remaining; opus_fft(kfft, in, out); @@ -611,8 +611,8 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con RESTORE_STACK; } -int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm, - const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps, +int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const opus_val16 *pcm, + const void *analysis_pcm, int frame_size, int variable_duration, int c1, int c2, int C, opus_int32 Fs, int bitrate_bps, int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) { int offset; @@ -622,9 +622,9 @@ int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, con frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size); pcm_len = frame_size - analysis->analysis_offset; - offset = 0; + offset = analysis->analysis_offset; do { - tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix); + tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); offset += 480; pcm_len -= 480; } while (pcm_len>0); @@ -633,7 +633,7 @@ int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, con if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) { int LM = 3; - LM = optimize_framesize((const opus_val16*)pcm, frame_size, C, Fs, bitrate_bps, + LM = optimize_framesize(pcm, frame_size, C, Fs, bitrate_bps, analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix); while ((Fs/400<frame_size) LM--; diff --git a/src/analysis.h b/src/analysis.h index 8cd78883..4371a577 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -79,12 +79,12 @@ typedef struct { } TonalityAnalysisState; void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, - const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix); + const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix); void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len); -int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm, - const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps, +int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const opus_val16 *pcm, + const void *analysis_pcm, int frame_size, int variable_duration, int c1, int c2, int C, opus_int32 Fs, int bitrate_bps, int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); #endif diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 15333630..dad04082 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -685,32 +685,6 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_ return best_state; } -void downmix_float(const void *_x, float *sub, int subframe, int offset, int C) -{ - const float *x; - int c, j; - x = (const float *)_x; - for (j=0;j-1) + { + for (j=0;j-1) + { + for (j=0;jlsb_depth); + analysis_info.valid = 0; + celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); +#ifndef FIXED_POINT + if (st->silk_mode.complexity >= 7 && st->Fs==48000) + { + frame_size = run_analysis(&st->analysis, celt_mode, pcm, analysis_pcm, + frame_size, st->variable_duration, c1, c2, analysis_channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix, &analysis_info); + } else +#endif + { + frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); + } + if(frame_size<0) + { + return OPUS_BAD_ARG; + } + + st->voice_ratio = -1; #ifndef FIXED_POINT st->detected_bandwidth = 0; - if (analysis_info->valid) + if (analysis_info.valid) { int analysis_bandwidth; if (st->signal_type == OPUS_AUTO) - st->voice_ratio = (int)floor(.5+100*(1-analysis_info->music_prob)); + st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob)); - analysis_bandwidth = analysis_info->bandwidth; + analysis_bandwidth = analysis_info.bandwidth; if (analysis_bandwidth<=12) st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; else if (analysis_bandwidth<=14) @@ -1310,11 +1349,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ if (to_celt && i==nb_frames-1) st->user_forced_mode = MODE_CELT_ONLY; - tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth -#ifndef FIXED_POINT - , analysis_info -#endif - ); + tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, + tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, + analysis_pcm, c1, c2, analysis_channels, downmix); if (tmp_len<0) { RESTORE_STACK; @@ -1476,9 +1513,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (prefill) { opus_int32 zero=0; - const CELTMode *celt_mode; int prefill_offset; - celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode a discontinuity. The exact location is what we need to avoid leaving any "gap" in the audio when mixing with the redundant CELT frame. Here we can afford to @@ -1593,8 +1628,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) { bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); - if (analysis_info->valid) - bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info->tonality)); + if (analysis_info.valid) + bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality)); } #endif celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1)); @@ -1625,9 +1660,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /* gain_fade() and stereo_fade() need to be after the buffer copying because we don't want any of this to affect the SILK part */ if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { - const CELTMode *celt_mode; - - celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); gain_fade(pcm_buf, pcm_buf, st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs); } @@ -1638,9 +1670,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /* Apply stereo width reduction (at low bitrates) */ if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { opus_val16 g1, g2; - const CELTMode *celt_mode; - - celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); g1 = st->hybrid_stereo_width_Q14; g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14); #ifdef FIXED_POINT @@ -1699,7 +1728,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifndef FIXED_POINT if (redundancy || st->mode != MODE_SILK_ONLY) - celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info)); + celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); #endif /* 5 ms redundant frame for CELT->SILK */ @@ -1843,7 +1872,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size, for (i=0;ichannels;i++) in[i] = FLOAT2INT16(pcm[i]); - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, 0, -2, st->channels, downmix_float); RESTORE_STACK; return ret; } @@ -1857,7 +1886,7 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size, { return OPUS_BAD_ARG; } - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, 0, -2, st->channels, downmix_int); } #else @@ -1865,74 +1894,21 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size, unsigned char *data, opus_int32 max_data_bytes) { int i, ret; - const CELTMode *celt_mode; - int delay_compensation; - int lsb_depth; VARDECL(float, in); - AnalysisInfo analysis_info; ALLOC_STACK; - opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode)); - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - - lsb_depth = IMIN(16, st->lsb_depth); - - analysis_info.valid = 0; - if (st->silk_mode.complexity >= 7 && st->Fs==48000) - { - frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset, - frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_int, &analysis_info); - } else { - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - } - if(frame_size<0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - ALLOC(in, frame_size*st->channels, float); for (i=0;ichannels;i++) in[i] = (1.0f/32768)*pcm[i]; - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, &analysis_info); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, 0, -2, st->channels, downmix_int); RESTORE_STACK; return ret; } opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size, unsigned char *data, opus_int32 out_data_bytes) { - const CELTMode *celt_mode; - int delay_compensation; - int lsb_depth; - AnalysisInfo analysis_info; - - opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode)); - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - - lsb_depth = IMIN(24, st->lsb_depth); - - analysis_info.valid = 0; - if (st->silk_mode.complexity >= 7 && st->Fs==48000) - { - frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset, - frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_float, &analysis_info); - } else { - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - } - if(frame_size<0) - { - return OPUS_BAD_ARG; - } - - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, &analysis_info); - + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, pcm, 0, -2, st->channels, downmix_float); } #endif diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 2e29869f..91ce2cdc 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -36,7 +36,6 @@ #include #include "float_cast.h" #include "os_support.h" -#include "analysis.h" #include "mathops.h" #include "mdct.h" #include "modes.h" @@ -71,7 +70,6 @@ typedef void (*opus_copy_channel_in_func)( ); struct OpusMSEncoder { - TonalityAnalysisState analysis; ChannelLayout layout; int lfe_stream; int variable_duration; @@ -409,7 +407,6 @@ static int opus_multistream_encoder_init_impl( st->layout.nb_streams = streams; st->layout.nb_coupled_streams = coupled_streams; st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0; - OPUS_CLEAR(&st->analysis,1); if (!surround) st->lfe_stream = -1; st->bitrate_bps = OPUS_AUTO; @@ -666,11 +663,8 @@ static int opus_multistream_encode_native int frame_size, unsigned char *data, opus_int32 max_data_bytes, - int lsb_depth -#ifndef FIXED_POINT - , downmix_func downmix - , const void *pcm_analysis -#endif + int lsb_depth, + downmix_func downmix ) { opus_int32 Fs; @@ -684,9 +678,6 @@ static int opus_multistream_encode_native unsigned char tmp_data[MS_FRAME_TMP]; OpusRepacketizer rp; opus_int32 complexity; -#ifndef FIXED_POINT - AnalysisInfo analysis_info; -#endif const CELTMode *celt_mode; opus_int32 bitrates[256]; opus_val16 bandLogE[42]; @@ -710,24 +701,6 @@ static int opus_multistream_encode_native RESTORE_STACK; return OPUS_BAD_ARG; } -#ifndef FIXED_POINT - analysis_info.valid = 0; - if (complexity >= 7 && Fs==48000) - { - opus_int32 delay_compensation; - int channels; - - channels = st->layout.nb_streams + st->layout.nb_coupled_streams; - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation)); - delay_compensation -= Fs/400; - - frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm_analysis, - frame_size, st->variable_duration, channels, Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix, &analysis_info); - } else -#endif - { - frame_size = frame_size_select(frame_size, st->variable_duration, Fs); - } /* Validate frame_size before using it to allocate stack space. This mirrors the checks in opus_encode[_float](). */ if (400*frame_size != Fs && 200*frame_size != Fs && @@ -783,6 +756,7 @@ static int opus_multistream_encode_native OpusEncoder *enc; int len; int curr_max; + int c1, c2; opus_repacketizer_init(&rp); enc = (OpusEncoder*)ptr; @@ -805,6 +779,8 @@ static int opus_multistream_encode_native bandLogE[21+i] = bandSMR[21*right+i]; } } + c1 = left; + c2 = right; } else { int i; int chan = get_mono_channel(&st->layout, s, -1); @@ -816,6 +792,8 @@ static int opus_multistream_encode_native for (i=0;i<21;i++) bandLogE[i] = bandSMR[21*chan+i]; } + c1 = chan; + c2 = -1; } if (st->surround) opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); @@ -824,11 +802,7 @@ static int opus_multistream_encode_native /* Reserve three bytes for the last stream and four for the others */ curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1); curr_max = IMIN(curr_max,MS_FRAME_TMP); - len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth -#ifndef FIXED_POINT - , &analysis_info -#endif - ); + len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth, pcm, c1, c2, st->layout.nb_channels, downmix); if (len<0) { RESTORE_STACK; @@ -901,7 +875,7 @@ int opus_multistream_encode( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16); + pcm, frame_size, data, max_data_bytes, 16, downmix_int); } #ifndef DISABLE_FLOAT_API @@ -914,7 +888,7 @@ int opus_multistream_encode_float( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 16); + pcm, frame_size, data, max_data_bytes, 16, downmix_float); } #endif @@ -929,9 +903,8 @@ int opus_multistream_encode_float opus_int32 max_data_bytes ) { - int channels = st->layout.nb_streams + st->layout.nb_coupled_streams; return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 24, downmix_float, pcm+channels*st->analysis.analysis_offset); + pcm, frame_size, data, max_data_bytes, 24, downmix_float); } int opus_multistream_encode( @@ -942,9 +915,8 @@ int opus_multistream_encode( opus_int32 max_data_bytes ) { - int channels = st->layout.nb_streams + st->layout.nb_coupled_streams; return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int, pcm+channels*st->analysis.analysis_offset); + pcm, frame_size, data, max_data_bytes, 16, downmix_int); } #endif diff --git a/src/opus_private.h b/src/opus_private.h index 9d8210b5..cdb467be 100644 --- a/src/opus_private.h +++ b/src/opus_private.h @@ -82,9 +82,9 @@ int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev); #define OPUS_SET_FORCE_MODE_REQUEST 11002 #define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x) -typedef void (*downmix_func)(const void *, float *, int, int, int); -void downmix_float(const void *_x, float *sub, int subframe, int offset, int C); -void downmix_int(const void *_x, float *sub, int subframe, int offset, int C); +typedef void (*downmix_func)(const void *, float *, int, int, int, int, int); +void downmix_float(const void *_x, float *sub, int subframe, int offset, int c1, int c2, int C); +void downmix_int(const void *_x, float *sub, int subframe, int offset, int c1, int c2, int C); int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering, @@ -95,11 +95,8 @@ int encode_size(int size, unsigned char *data); opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, - unsigned char *data, opus_int32 out_data_bytes, int lsb_depth -#ifndef FIXED_POINT - , AnalysisInfo *analysis_info -#endif - ); + unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, + const void *analysis_pcm, int c1, int c2, int analysis_channels, downmix_func downmix); int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited, -- cgit v1.2.3 From 3ab03e05561f81f0eacac3ee724baac33b89683b Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Fri, 6 Sep 2013 16:00:39 -0400 Subject: First pass at making the analysis code run with FIXED_POINT Code is still float, but at least tonality esitmation seems to work. Speech/music analysis is still disabled. --- Makefile.am | 3 ++- celt/arch.h | 1 + celt/celt.h | 10 +++++----- celt/celt_encoder.c | 8 ++++---- celt/fixed_generic.h | 1 + src/analysis.c | 27 +++++++++++++++------------ src/analysis.h | 2 +- src/mlp.c | 4 ++-- src/opus_encoder.c | 49 ++++++++++++++++++++++++++++++++++++------------- src/opus_private.h | 6 +++--- 10 files changed, 70 insertions(+), 41 deletions(-) diff --git a/Makefile.am b/Makefile.am index b090ca09..edbcc42e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -20,9 +20,10 @@ if FIXED_POINT SILK_SOURCES += $(SILK_SOURCES_FIXED) else SILK_SOURCES += $(SILK_SOURCES_FLOAT) -OPUS_SOURCES += $(OPUS_SOURCES_FLOAT) endif +OPUS_SOURCES += $(OPUS_SOURCES_FLOAT) + if CPU_ARM CELT_SOURCES += $(CELT_SOURCES_ARM) endif diff --git a/celt/arch.h b/celt/arch.h index 78e2635f..f9c98567 100644 --- a/celt/arch.h +++ b/celt/arch.h @@ -185,6 +185,7 @@ typedef float celt_ener; #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) #define MULT16_16_Q11_32(a,b) ((a)*(b)) +#define MULT16_16_Q11(a,b) ((a)*(b)) #define MULT16_16_Q13(a,b) ((a)*(b)) #define MULT16_16_Q14(a,b) ((a)*(b)) #define MULT16_16_Q15(a,b) ((a)*(b)) diff --git a/celt/celt.h b/celt/celt.h index 1c504d19..cdb76c8b 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -52,11 +52,11 @@ extern "C" { typedef struct { int valid; - opus_val16 tonality; - opus_val16 tonality_slope; - opus_val16 noisiness; - opus_val16 activity; - opus_val16 music_prob; + float tonality; + float tonality_slope; + float noisiness; + float activity; + float music_prob; int bandwidth; }AnalysisInfo; diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 2bc8fee6..a63e9616 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -819,7 +819,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); trim -= SHR16(surround_trim, DB_SHIFT-8); trim -= 2*SHR16(tf_estimate, 14-8); -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (analysis->valid) { trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05f))); @@ -1142,7 +1142,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 target = base_target; /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/ -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (analysis->valid && analysis->activity<.4) target -= (opus_int32)((coded_bins<activity)); #endif @@ -1167,7 +1167,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 QCONST16(0.02f,14) : QCONST16(0.04f,14); target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1); -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API /* Apply tonality boost */ if (analysis->valid && !lfe) { @@ -1872,7 +1872,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<end-1; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (st->analysis.valid) { int min_bandwidth; diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h index 0e77976e..657e67c8 100644 --- a/celt/fixed_generic.h +++ b/celt/fixed_generic.h @@ -116,6 +116,7 @@ #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) +#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) #define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13)) #define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14)) #define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15)) diff --git a/src/analysis.c b/src/analysis.c index f4373150..547e5a4b 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -253,10 +253,10 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con for (i=0;iinmem[i]); - in[i].i = MULT16_16(w, tonal->inmem[N2+i]); - in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]); - in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]); + in[i].r = w*tonal->inmem[i]; + in[i].i = w*tonal->inmem[N2+i]; + in[N-i-1].r = w*tonal->inmem[N-i-1]; + in[N-i-1].i = w*tonal->inmem[N+N2-i-1]; } OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); @@ -325,8 +325,8 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con float stationarity; for (i=tbands[b];iE[tonal->E_count][b] = E; frame_noisiness += nE/(1e-15f+E); - frame_loudness += celt_sqrt(E+1e-10f); + frame_loudness += sqrt(E+1e-10f); logE[b] = (float)log(E+1e-10f); tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); @@ -348,11 +348,11 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con L1=L2=0; for (i=0;iE[i][b]); + L1 += sqrt(tonal->E[i][b]); L2 += tonal->E[i][b]; } - stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2)); + stationarity = MIN16(0.99f,L1/sqrt(EPSILON+NB_FRAMES*L2)); stationarity *= stationarity; stationarity *= stationarity; frame_stationarity += stationarity; @@ -379,6 +379,9 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con bandwidth = 0; maxE = 0; noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8))); +#ifdef FIXED_POINT + noise_floor *= 1<<(15+SIG_SHIFT); +#endif noise_floor *= noise_floor; for (b=0;bmem[i] = BFCC[i]; } for (i=0;i<9;i++) - features[11+i] = celt_sqrt(tonal->std[i]); + features[11+i] = sqrt(tonal->std[i]); features[20] = info->tonality; features[21] = info->activity; features[22] = frame_stationarity; diff --git a/src/analysis.h b/src/analysis.h index 4371a577..ebd36c86 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -42,7 +42,7 @@ typedef struct { float angle[240]; float d_angle[240]; float d2_angle[240]; - float inmem[ANALYSIS_BUF_SIZE]; + opus_val32 inmem[ANALYSIS_BUF_SIZE]; int mem_fill; /* number of usable samples in the buffer */ float prev_band_tonality[NB_TBANDS]; float prev_tonality; diff --git a/src/mlp.c b/src/mlp.c index 90e94a5f..dd7d34fc 100644 --- a/src/mlp.c +++ b/src/mlp.c @@ -43,9 +43,9 @@ static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ /*double x, y;*/ opus_val16 dy, yy; /* Q14 */ /*x = 1.9073e-06*_x;*/ - if (_x>=QCONST32(10,19)) + if (_x>=QCONST32(8,19)) return QCONST32(1.,14); - if (_x<=-QCONST32(10,19)) + if (_x<=-QCONST32(8,19)) return -QCONST32(1.,14); xx = EXTRACT16(SHR32(_x, 8)); /*i = lrint(25*x);*/ diff --git a/src/opus_encoder.c b/src/opus_encoder.c index dad04082..27b7143b 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -98,7 +98,7 @@ struct OpusEncoder { int energy_masking; StereoWidthState width_mem; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API TonalityAnalysisState analysis; int detected_bandwidth; int analysis_offset; @@ -551,7 +551,7 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m return st->user_bitrate_bps; } -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API /* Don't use more than 60 ms for the frame size analysis */ #define MAX_DYNAMIC_FRAMESIZE 24 /* Estimates how much the bitrate will be boosted based on the sub-frame energy */ @@ -697,10 +697,10 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int bestLM=0; int subframe; int pos; - VARDECL(opus_val16, sub); + VARDECL(opus_val32, sub); subframe = Fs/400; - ALLOC(sub, subframe, opus_val16); + ALLOC(sub, subframe, opus_val32); e[0]=mem[0]; e_1[0]=1.f/(EPSILON+mem[0]); if (buffering) @@ -759,30 +759,41 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, #endif #ifndef DISABLE_FLOAT_API -void downmix_float(const void *_x, float *sub, int subframe, int offset, int c1, int c2, int C) +void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) { const float *x; int j; x = (const float *)_x; for (j=0;j-1) { for (j=0;jsilk_mode.complexity >= 7 && st->Fs==48000) { frame_size = run_analysis(&st->analysis, celt_mode, pcm, analysis_pcm, @@ -982,7 +1005,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->voice_ratio = -1; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API st->detected_bandwidth = 0; if (analysis_info.valid) { @@ -1624,7 +1647,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->use_vbr) { opus_int32 bonus=0; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) { bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); @@ -1726,7 +1749,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ec_enc_shrink(&enc, nb_compr_bytes); } -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (redundancy || st->mode != MODE_SILK_ONLY) celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); #endif diff --git a/src/opus_private.h b/src/opus_private.h index cdb467be..64c325ab 100644 --- a/src/opus_private.h +++ b/src/opus_private.h @@ -82,9 +82,9 @@ int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev); #define OPUS_SET_FORCE_MODE_REQUEST 11002 #define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x) -typedef void (*downmix_func)(const void *, float *, int, int, int, int, int); -void downmix_float(const void *_x, float *sub, int subframe, int offset, int c1, int c2, int C); -void downmix_int(const void *_x, float *sub, int subframe, int offset, int c1, int c2, int C); +typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int); +void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); +void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering, -- cgit v1.2.3 From fc1b1f9b441c2ffe090046c968e5d95f2eafb038 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Fri, 6 Sep 2013 16:32:50 -0400 Subject: Makes speech/music detection work with FIXED_POINT (code still float) --- src/analysis.c | 6 +++++- src/mlp.c | 35 ++++++++++++++++++++++++++++++----- src/mlp.h | 4 ++-- src/tansig_table.h | 2 +- 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/analysis.c b/src/analysis.c index 547e5a4b..6a1840b1 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -327,6 +327,10 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con { float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; +#ifdef FIXED_POINT + /* FIXME: It's probably best to change the BFCC filter initial state instead */ + binE *= 5.55e-17f; +#endif E += binE; tE += binE*tonality[i]; nE += binE*2.f*(.5f-noisiness[i]); @@ -479,7 +483,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con features[23] = info->tonality_slope; features[24] = tonal->lowECount; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API mlp_process(&net, features, frame_probs); frame_probs[0] = .5f*(frame_probs[0]+1); /* Curve fitting between the MLP probability and the actual probability */ diff --git a/src/mlp.c b/src/mlp.c index dd7d34fc..73b1d315 100644 --- a/src/mlp.c +++ b/src/mlp.c @@ -35,7 +35,7 @@ #include "tansig_table.h" #define MAX_NEURONS 100 -#ifdef FIXED_POINT +#if 0 static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ { int i; @@ -62,11 +62,11 @@ static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ } #else /*extern const float tansig_table[501];*/ -static inline opus_val16 tansig_approx(opus_val16 x) +static inline float tansig_approx(float x) { int i; - opus_val16 y, dy; - opus_val16 sign=1; + float y, dy; + float sign=1; if (x>=8) return 1; if (x<=-8) @@ -85,6 +85,7 @@ static inline opus_val16 tansig_approx(opus_val16 x) } #endif +#if 0 void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) { int j; @@ -108,4 +109,28 @@ void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); } } - +#else +void mlp_process(const MLP *m, const float *in, float *out) +{ + int j; + float hidden[MAX_NEURONS]; + const float *W = m->weights; + /* Copy to tmp_in */ + for (j=0;jtopo[1];j++) + { + int k; + float sum = *W++; + for (k=0;ktopo[0];k++) + sum = sum + in[k]**W++; + hidden[j] = tansig_approx(sum); + } + for (j=0;jtopo[2];j++) + { + int k; + float sum = *W++; + for (k=0;ktopo[1];k++) + sum = sum + hidden[k]**W++; + out[j] = tansig_approx(sum); + } +} +#endif diff --git a/src/mlp.h b/src/mlp.h index 68ff68d8..86c8e061 100644 --- a/src/mlp.h +++ b/src/mlp.h @@ -33,9 +33,9 @@ typedef struct { int layers; const int *topo; - const opus_val16 *weights; + const float *weights; } MLP; -void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out); +void mlp_process(const MLP *m, const float *in, float *out); #endif /* _MLP_H_ */ diff --git a/src/tansig_table.h b/src/tansig_table.h index 885ea3e8..c76f844a 100644 --- a/src/tansig_table.h +++ b/src/tansig_table.h @@ -1,6 +1,6 @@ /* This file is auto-generated by gen_tables */ -static const opus_val16 tansig_table[201] = { +static const float tansig_table[201] = { 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f, 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f, 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f, -- cgit v1.2.3 From ba7dbb365ebb1acff445a017d31e1bb859f0a9fb Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Mon, 9 Sep 2013 16:39:19 -0400 Subject: Analysis scaling fixes This should make the scaling the same for fixed and float. It changes the float scaling too by normalizing by the number of channels, which matters for bandwidth detection. --- celt/celt_encoder.c | 2 +- src/analysis.c | 6 +++--- src/opus_encoder.c | 39 +++++++++++++++++++-------------------- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index a63e9616..308f29f6 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -822,7 +822,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, #ifndef DISABLE_FLOAT_API if (analysis->valid) { - trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05f))); + trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), QCONST16(2.f, 8)*(analysis->tonality_slope+.05f))); } #endif diff --git a/src/analysis.c b/src/analysis.c index 6a1840b1..b79e869a 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -347,7 +347,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con tonal->highE[b]+=.5f; tonal->lowE[b]-=.5f; } - relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]); + relativeE += (logE[b]-tonal->lowE[b])/(1e-15+tonal->highE[b]-tonal->lowE[b]); L1=L2=0; for (i=0;iE[i][b]; } - stationarity = MIN16(0.99f,L1/sqrt(EPSILON+NB_FRAMES*L2)); + stationarity = MIN16(0.99f,L1/sqrt(1e-15+NB_FRAMES*L2)); stationarity *= stationarity; stationarity *= stationarity; frame_stationarity += stationarity; /*band_tonality[b] = tE/(1e-15+E)*/; - band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]); + band_tonality[b] = MAX16(tE/(1e-15+E), stationarity*tonal->prev_band_tonality[b]); #if 0 if (b>=NB_TONAL_SKIP_BANDS) { diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 27b7143b..1dfd9be5 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -762,6 +762,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) { const float *x; + opus_val32 scale; int j; x = (const float *)_x; for (j=0;j Date: Mon, 9 Sep 2013 17:11:25 -0400 Subject: Fixes delayed decision for fixed-point --- src/opus_encoder.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 1dfd9be5..5eca57a3 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -1883,13 +1883,6 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size, VARDECL(opus_int16, in); ALLOC_STACK; - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - if(frame_size<0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - ALLOC(in, frame_size*st->channels, opus_int16); for (i=0;ichannels;i++) @@ -1903,11 +1896,6 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size, opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size, unsigned char *data, opus_int32 out_data_bytes) { - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - if(frame_size<0) - { - return OPUS_BAD_ARG; - } return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, 0, -2, st->channels, downmix_int); } -- cgit v1.2.3 From 8824fdb1c242b450f981a8ba1f23c4dadeaf6558 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Tue, 10 Sep 2013 01:15:19 -0400 Subject: Adds surround masking to SILK --- src/opus_encoder.c | 36 ++++++++++++++++++++++++++++++++++-- src/opus_multistream_encoder.c | 5 ++++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 5eca57a3..5599d203 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -95,7 +95,7 @@ struct OpusEncoder { int silk_bw_switch; /* Sampling rate (at the API level) */ int first; - int energy_masking; + opus_val16 * energy_masking; StereoWidthState width_mem; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; #ifndef DISABLE_FLOAT_API @@ -1478,6 +1478,38 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->silk_mode.bitRate = total_bitRate; } + /* Surround masking for SILK */ + if (st->energy_masking && st->use_vbr && !st->lfe) + { + opus_val32 mask_sum=0; + opus_val16 masking_depth; + opus_int32 rate_offset; + int c; + int end = 17; + opus_int16 srate = 16000; + if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND) + { + end = 13; + srate = 8000; + } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) + { + end = 15; + srate = 12000; + } + for (c=0;cchannels;c++) + { + for(i=0;ienergy_masking[21*c+i]; + } + /* Conservative rate reduction, we cut the masking in half */ + masking_depth = HALF16(mask_sum / end*st->channels); + masking_depth = MAX16(QCONST16(-2.f, DB_SHIFT), MIN16(masking_depth, QCONST16(0.f, DB_SHIFT))); + rate_offset = PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT); + rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3); + st->silk_mode.bitRate += rate_offset; + bytes_target += rate_offset * frame_size / (8 * st->Fs); + } + st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; st->silk_mode.nChannelsAPI = st->channels; st->silk_mode.nChannelsInternal = st->stream_channels; @@ -2338,7 +2370,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) case OPUS_SET_ENERGY_MASK_REQUEST: { opus_val16 *value = va_arg(ap, opus_val16*); - st->energy_masking = (value!=NULL); + st->energy_masking = value; ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); } break; diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 91ce2cdc..8bf97ed0 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -741,10 +741,13 @@ static int opus_multistream_encode_native opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s])); if (st->surround) { - opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); if (s < st->layout.nb_coupled_streams) + { + /* To preserve the spatial image, force stereo CELT on coupled streams */ + opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2)); + } } } -- cgit v1.2.3 From 978e4cb507b1fdb90d62e2b15537ebd56087b542 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Wed, 11 Sep 2013 00:51:22 -0400 Subject: Code for handling upsampling in surround_analysis() Still untested. --- src/opus_multistream_encoder.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 8bf97ed0..985a8413 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -226,15 +226,20 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b int LM=3; int pos[8] = {0}; int upsample; + int frame_size; opus_val32 bandE[21]; opus_val16 maskLogE[3][21]; VARDECL(opus_val32, in); VARDECL(opus_val16, x); VARDECL(opus_val32, out); SAVE_STACK; - ALLOC(in, len+overlap, opus_val32); + + upsample = resampling_factor(rate); + frame_size = len*upsample; + + ALLOC(in, frame_size+overlap, opus_val32); ALLOC(x, len, opus_val16); - ALLOC(freq, len, opus_val32); + ALLOC(freq, frame_size, opus_val32); channel_pos(channels, pos); @@ -242,20 +247,18 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b for (i=0;i<21;i++) maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT); - upsample = resampling_factor(rate); for (c=0;cpreemph, preemph_mem+c, 0); + preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0); clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1); if (upsample != 1) { - int bound = len/upsample; + int bound = len; for (i=0;i Date: Wed, 11 Sep 2013 23:34:51 -0400 Subject: Surround analysis should now work for non-20ms frame sizes. Not well tested. --- src/opus_multistream_encoder.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 985a8413..3065b6f7 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -222,8 +222,7 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b { int c; int i; - /* FIXME: pass LM properly */ - int LM=3; + int LM; int pos[8] = {0}; int upsample; int frame_size; @@ -237,6 +236,10 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b upsample = resampling_factor(rate); frame_size = len*upsample; + for (LM=0;LM<=celt_mode->maxLM;LM++) + if (celt_mode->shortMdctSize< Date: Thu, 12 Sep 2013 03:05:43 -0400 Subject: Tuning the surround masking We're now a bit more conservative and only take into account the bands that are being codec. Also weighting the rate saving by the band width. --- celt/celt_encoder.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 308f29f6..41c28ab6 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -1535,27 +1535,35 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, for(i=0;iend;i++) surround_dynalloc[i] = 0; /* This computes how much masking takes place between surround channels */ - if (st->energy_mask&&!st->lfe) + if (st->start==0&&st->energy_mask&&!st->lfe) { + int mask_end; opus_val32 mask_avg=0; opus_val32 diff=0; + int count=0; + mask_end = st->lastCodedBands; for (c=0;cend;i++) + for(i=0;ienergy_mask[nbEBands*c+i]; - diff += st->energy_mask[i+c*nbEBands]*(opus_int32)(1+2*i-st->end); + mask_avg += st->energy_mask[nbEBands*c+i]*(eBands[i+1]-eBands[i]); + count += (eBands[i+1]-eBands[i]); + diff += st->energy_mask[i+c*nbEBands]*(opus_int32)(1+2*i-mask_end); } } - mask_avg = DIV32_16(mask_avg,C*st->end); + mask_avg = DIV32_16(mask_avg,count); + /* Just being conservative here */ + mask_avg -= HALF32(HALF32(mask_avg)); mask_avg = MAX16(mask_avg, -QCONST16(2.f, DB_SHIFT)); - diff = diff*6/(C*(st->end-1)*(st->end+1)*st->end); - diff = MAX32(MIN32(diff, QCONST32(.05f, DB_SHIFT)), -QCONST32(.05f, DB_SHIFT)); - for(i=0;iend;i++) + diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); + /* Again, being conservative */ + diff = HALF32(diff); + diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT)); + for(i=0;iend)); + lin = mask_avg + HALF32(diff*(1+2*i-mask_end)); if (C==2) unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]) - lin; else -- cgit v1.2.3 From ae7dc8a13fb61a50204c5b28aac3c1c4c2e1c0dc Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Fri, 13 Sep 2013 15:05:50 -0400 Subject: More surround masking tuning --- celt/celt_encoder.c | 47 +++++++++++++++++++++++++++++++++++++---------- src/opus_encoder.c | 9 ++++++++- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 41c28ab6..12b5581a 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -1538,6 +1538,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (st->start==0&&st->energy_mask&&!st->lfe) { int mask_end; + int midband; + int count_dynalloc; opus_val32 mask_avg=0; opus_val32 diff=0; int count=0; @@ -1546,38 +1548,63 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { for(i=0;ienergy_mask[nbEBands*c+i]*(eBands[i+1]-eBands[i]); - count += (eBands[i+1]-eBands[i]); - diff += st->energy_mask[i+c*nbEBands]*(opus_int32)(1+2*i-mask_end); + opus_val16 mask; + mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i], + QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); + if (mask > 0) + mask = HALF16(mask); + mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]); + count += eBands[i+1]-eBands[i]; + diff += MULT16_16(mask, 1+2*i-mask_end); } } mask_avg = DIV32_16(mask_avg,count); - /* Just being conservative here */ - mask_avg -= HALF32(HALF32(mask_avg)); - mask_avg = MAX16(mask_avg, -QCONST16(2.f, DB_SHIFT)); diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); /* Again, being conservative */ diff = HALF32(diff); diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT)); + /* Find the band that's in the middle of the coded spectrum */ + for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++); + count_dynalloc=0; for(i=0;ienergy_mask[i], st->energy_mask[nbEBands+i]) - lin; + unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]); else - unmask = st->energy_mask[i] - lin; + unmask = st->energy_mask[i]; + unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT)); + unmask -= lin; if (unmask > QCONST16(.25f, DB_SHIFT)) { surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT); + count_dynalloc++; + } + } + if (count_dynalloc>=3) + { + /* If we need dynalloc in many bands, it's probably because our + initial masking rate was too low. */ + mask_avg += QCONST16(.25f, DB_SHIFT); + if (mask_avg>0) + { + /* Something went really wrong in the original calculations, + disabling masking. */ + mask_avg = 0; + diff = 0; + for(i=0;ilfe) diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 5599d203..07115677 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -1499,7 +1499,14 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ for (c=0;cchannels;c++) { for(i=0;ienergy_masking[21*c+i]; + { + opus_val16 mask; + mask = MAX16(MIN16(st->energy_masking[21*c+i], + QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); + if (mask > 0) + mask = HALF16(mask); + mask_sum += mask; + } } /* Conservative rate reduction, we cut the masking in half */ masking_depth = HALF16(mask_sum / end*st->channels); -- cgit v1.2.3 From 6fbfed6a0b927f0b54340081675c116955dd3705 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Fri, 13 Sep 2013 17:16:38 -0400 Subject: Allowing surround to use lower bandwidth than fullband --- src/opus_multistream_encoder.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 3065b6f7..32c6f0aa 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -747,7 +747,18 @@ static int opus_multistream_encode_native opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s])); if (st->surround) { - opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + opus_int32 equiv_rate; + equiv_rate = st->bitrate_bps; + if (frame_size*50 < Fs) + equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels; + if (equiv_rate > 112000) + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + else if (equiv_rate > 76000) + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND)); + else if (equiv_rate > 48000) + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND)); + else + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND)); if (s < st->layout.nb_coupled_streams) { /* To preserve the spatial image, force stereo CELT on coupled streams */ -- cgit v1.2.3 From 36a21ed53636ed4471aa62a8d06dcf175d6ff395 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sat, 14 Sep 2013 15:46:09 -0400 Subject: Very basic surround rate calibration --- celt/celt_encoder.c | 2 ++ src/opus_encoder.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 12b5581a..189b4129 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -1559,6 +1559,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } } mask_avg = DIV32_16(mask_avg,count); + mask_avg += QCONST16(.2f, DB_SHIFT); diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); /* Again, being conservative */ diff = HALF32(diff); @@ -1601,6 +1602,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT)); } } + mask_avg += QCONST16(.2f, DB_SHIFT); /* Convert to 1/64th units used for the trim */ surround_trim = 64*diff; /*printf("%d %d ", mask_avg, surround_trim);*/ diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 07115677..3823272c 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -1510,9 +1510,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } /* Conservative rate reduction, we cut the masking in half */ masking_depth = HALF16(mask_sum / end*st->channels); - masking_depth = MAX16(QCONST16(-2.f, DB_SHIFT), MIN16(masking_depth, QCONST16(0.f, DB_SHIFT))); rate_offset = PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT); rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3); + rate_offset += QCONST16(.4f, DB_SHIFT); st->silk_mode.bitRate += rate_offset; bytes_target += rate_offset * frame_size / (8 * st->Fs); } -- cgit v1.2.3 From b90e63b5bab727bc33dce854b6a4d7e325fef3ab Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Mon, 16 Sep 2013 13:08:52 -0400 Subject: Moves frame size selection back out of opus_encode_native() Should fix delayed decision on surround, but not well tested. --- src/analysis.c | 25 +++--------- src/analysis.h | 6 +-- src/opus_encoder.c | 90 +++++++++++++++++++++++++++++++++++------- src/opus_multistream_encoder.c | 20 +++++++++- src/opus_private.h | 6 ++- 5 files changed, 107 insertions(+), 40 deletions(-) diff --git a/src/analysis.c b/src/analysis.c index b79e869a..e078b4cd 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -618,38 +618,25 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con RESTORE_STACK; } -int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const opus_val16 *pcm, - const void *analysis_pcm, int frame_size, int variable_duration, int c1, int c2, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) +int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, + int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) { int offset; int pcm_len; /* Avoid overflow/wrap-around of the analysis buffer */ - frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size); + analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); - pcm_len = frame_size - analysis->analysis_offset; + pcm_len = analysis_frame_size - analysis->analysis_offset; offset = analysis->analysis_offset; do { tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); offset += 480; pcm_len -= 480; } while (pcm_len>0); - analysis->analysis_offset = frame_size; + analysis->analysis_offset = analysis_frame_size; - if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) - { - int LM = 3; - LM = optimize_framesize(pcm, frame_size, C, Fs, bitrate_bps, - analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix); - while ((Fs/400<frame_size) - LM--; - frame_size = (Fs/400<analysis_offset -= frame_size; /* Only perform analysis up to 20-ms frames. Longer ones will be split if diff --git a/src/analysis.h b/src/analysis.h index ebd36c86..a4a22cae 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -83,8 +83,8 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len); -int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const opus_val16 *pcm, - const void *analysis_pcm, int frame_size, int variable_duration, int c1, int c2, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); +int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, + int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); #endif diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 3823272c..b0ec383a 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -849,6 +849,26 @@ opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_ return new_size; } +opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, + int variable_duration, int C, opus_int32 Fs, int bitrate_bps, + int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem) +{ + if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) + { + int LM = 3; + LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps, + 0, subframe_mem, delay_compensation, downmix); + while ((Fs/400<frame_size) + LM--; + frame_size = (Fs/400<silk_mode.complexity >= 7 && st->Fs==48000) + if (analysis_pcm != NULL && st->silk_mode.complexity >= 7 && st->Fs==48000) { - frame_size = run_analysis(&st->analysis, celt_mode, pcm, analysis_pcm, - frame_size, st->variable_duration, c1, c2, analysis_channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix, &analysis_info); + frame_size = run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, + c1, c2, analysis_channels, st->Fs, + lsb_depth, downmix, &analysis_info); } else #endif { @@ -1373,7 +1394,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->user_forced_mode = MODE_CELT_ONLY; tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, - analysis_pcm, c1, c2, analysis_channels, downmix); + NULL, 0, c1, c2, analysis_channels, downmix); if (tmp_len<0) { RESTORE_STACK; @@ -1915,49 +1936,88 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifdef FIXED_POINT #ifndef DISABLE_FLOAT_API -opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size, +opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, unsigned char *data, opus_int32 max_data_bytes) { int i, ret; + int frame_size; + int delay_compensation; VARDECL(opus_int16, in); ALLOC_STACK; + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); + ALLOC(in, frame_size*st->channels, opus_int16); for (i=0;ichannels;i++) in[i] = FLOAT2INT16(pcm[i]); - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, 0, -2, st->channels, downmix_float); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); RESTORE_STACK; return ret; } #endif -opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size, +opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, unsigned char *data, opus_int32 out_data_bytes) { - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, 0, -2, st->channels, downmix_int); + int frame_size; + int delay_compensation; + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); } #else -opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size, +opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, unsigned char *data, opus_int32 max_data_bytes) { int i, ret; + int frame_size; + int delay_compensation; VARDECL(float, in); ALLOC_STACK; - ALLOC(in, frame_size*st->channels, float); + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); - for (i=0;ichannels;i++) + ALLOC(in, analysis_frame_size*st->channels, float); + + for (i=0;ichannels;i++) in[i] = (1.0f/32768)*pcm[i]; - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, 0, -2, st->channels, downmix_int); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); RESTORE_STACK; return ret; } -opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size, +opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, unsigned char *data, opus_int32 out_data_bytes) { - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, pcm, 0, -2, st->channels, downmix_float); + int frame_size; + int delay_compensation; + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); } #endif diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 32c6f0aa..31447bf9 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -72,6 +72,7 @@ typedef void (*opus_copy_channel_in_func)( struct OpusMSEncoder { ChannelLayout layout; int lfe_stream; + int application; int variable_duration; int surround; opus_int32 bitrate_bps; @@ -416,6 +417,7 @@ static int opus_multistream_encoder_init_impl( if (!surround) st->lfe_stream = -1; st->bitrate_bps = OPUS_AUTO; + st->application = application; st->variable_duration = OPUS_FRAMESIZE_ARG; for (i=0;ilayout.nb_channels;i++) st->layout.mapping[i] = mapping[i]; @@ -666,7 +668,7 @@ static int opus_multistream_encode_native OpusMSEncoder *st, opus_copy_channel_in_func copy_channel_in, const void *pcm, - int frame_size, + int analysis_frame_size, unsigned char *data, opus_int32 max_data_bytes, int lsb_depth, @@ -689,6 +691,7 @@ static int opus_multistream_encode_native opus_val16 bandLogE[42]; opus_val32 *mem = NULL; opus_val32 *preemph_mem=NULL; + int frame_size; ALLOC_STACK; if (st->surround) @@ -702,6 +705,18 @@ static int opus_multistream_encode_native opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_COMPLEXITY(&complexity)); opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode)); + { + opus_int32 delay_compensation; + int channels; + + channels = st->layout.nb_streams + st->layout.nb_coupled_streams; + opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation)); + delay_compensation -= Fs/400; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, channels, Fs, st->bitrate_bps, + delay_compensation, downmix, st->subframe_mem); + } + if (400*frame_size < Fs) { RESTORE_STACK; @@ -822,7 +837,8 @@ static int opus_multistream_encode_native /* Reserve three bytes for the last stream and four for the others */ curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1); curr_max = IMIN(curr_max,MS_FRAME_TMP); - len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth, pcm, c1, c2, st->layout.nb_channels, downmix); + len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth, + pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix); if (len<0) { RESTORE_STACK; diff --git a/src/opus_private.h b/src/opus_private.h index 64c325ab..0e739ebb 100644 --- a/src/opus_private.h +++ b/src/opus_private.h @@ -94,9 +94,13 @@ int encode_size(int size, unsigned char *data); opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); +opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, + int variable_duration, int C, opus_int32 Fs, int bitrate_bps, + int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem); + opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, - const void *analysis_pcm, int c1, int c2, int analysis_channels, downmix_func downmix); + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix); int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited, -- cgit v1.2.3 From f5645f5c72fc214d60c297881d771d27af56c4bf Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Mon, 16 Sep 2013 14:36:53 -0400 Subject: Makes the fixed-point build only run the float analysis at complexity 10 Default complexity is now 9. --- src/opus_encoder.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/opus_encoder.c b/src/opus_encoder.c index b0ec383a..6db6d5de 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -201,7 +201,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat st->silk_mode.payloadSize_ms = 20; st->silk_mode.bitRate = 25000; st->silk_mode.packetLossPercentage = 0; - st->silk_mode.complexity = 10; + st->silk_mode.complexity = 9; st->silk_mode.useInBandFEC = 0; st->silk_mode.useDTX = 0; st->silk_mode.useCBR = 0; @@ -212,7 +212,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR; celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0)); - celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(10)); + celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity)); st->use_vbr = 1; /* Makes constrained VBR the default (safer for real-time use) */ @@ -853,6 +853,7 @@ opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps, int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem) { +#ifndef DISABLE_FLOAT_API if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) { int LM = 3; @@ -861,7 +862,9 @@ opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, while ((Fs/400<frame_size) LM--; frame_size = (Fs/400<silk_mode.complexity >= 10 && st->Fs==48000) +#else if (analysis_pcm != NULL && st->silk_mode.complexity >= 7 && st->Fs==48000) +#endif { frame_size = run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, c1, c2, analysis_channels, st->Fs, -- cgit v1.2.3 From a4c2512aa4fbd49142e2026ab5cdeacaa1e1e394 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sat, 28 Sep 2013 17:22:41 -0400 Subject: run_analysis() doesn't need to return the frame size anymore --- src/analysis.c | 4 +--- src/analysis.h | 2 +- src/opus_encoder.c | 12 ++---------- 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/analysis.c b/src/analysis.c index e078b4cd..c130e3a4 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -618,7 +618,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con RESTORE_STACK; } -int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, +void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) { @@ -643,6 +643,4 @@ int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, con they're in CELT-only mode. */ analysis_info->valid = 0; tonality_get_info(analysis, analysis_info, frame_size); - - return frame_size; } diff --git a/src/analysis.h b/src/analysis.h index a4a22cae..cf37792d 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -83,7 +83,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len); -int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, +void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 6db6d5de..906ca376 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -1016,19 +1016,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (analysis_pcm != NULL && st->silk_mode.complexity >= 7 && st->Fs==48000) #endif { - frame_size = run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, + run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, c1, c2, analysis_channels, st->Fs, lsb_depth, downmix, &analysis_info); - } else -#endif - { - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); } - if(frame_size<0) - { - return OPUS_BAD_ARG; - } - +#endif st->voice_ratio = -1; -- cgit v1.2.3 From 292544453fc89a9a2d14781328f8b1a517e756bc Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sat, 28 Sep 2013 19:29:23 -0400 Subject: Makes analysis work when encoding more than 20 ms at a time --- src/analysis.c | 33 +++++++++++++++++---------------- src/opus_encoder.c | 13 +++++++++++-- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/analysis.c b/src/analysis.c index c130e3a4..53247df9 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -625,22 +625,23 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co int offset; int pcm_len; - /* Avoid overflow/wrap-around of the analysis buffer */ - analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); - - pcm_len = analysis_frame_size - analysis->analysis_offset; - offset = analysis->analysis_offset; - do { - tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); - offset += 480; - pcm_len -= 480; - } while (pcm_len>0); - analysis->analysis_offset = analysis_frame_size; - - analysis->analysis_offset -= frame_size; - - /* Only perform analysis up to 20-ms frames. Longer ones will be split if - they're in CELT-only mode. */ + if (analysis_pcm != NULL) + { + /* Avoid overflow/wrap-around of the analysis buffer */ + analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); + + pcm_len = analysis_frame_size - analysis->analysis_offset; + offset = analysis->analysis_offset; + do { + tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); + offset += 480; + pcm_len -= 480; + } while (pcm_len>0); + analysis->analysis_offset = analysis_frame_size; + + analysis->analysis_offset -= frame_size; + } + analysis_info->valid = 0; tonality_get_info(analysis, analysis_info, frame_size); } diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 906ca376..3dcd35ae 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -982,6 +982,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ opus_val16 stereo_width; const CELTMode *celt_mode; AnalysisInfo analysis_info; + int analysis_read_pos_bak=-1; + int analysis_read_subframe_bak=-1; VARDECL(opus_val16, tmp_prefill); ALLOC_STACK; @@ -1011,11 +1013,13 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); #ifndef DISABLE_FLOAT_API #ifdef FIXED_POINT - if (analysis_pcm != NULL && st->silk_mode.complexity >= 10 && st->Fs==48000) + if (st->silk_mode.complexity >= 10 && st->Fs==48000) #else - if (analysis_pcm != NULL && st->silk_mode.complexity >= 7 && st->Fs==48000) + if (st->silk_mode.complexity >= 7 && st->Fs==48000) #endif { + analysis_read_pos_bak = st->analysis.read_pos; + analysis_read_subframe_bak = st->analysis.read_subframe; run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, c1, c2, analysis_channels, st->Fs, lsb_depth, downmix, &analysis_info); @@ -1362,6 +1366,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ VARDECL(OpusRepacketizer, rp); opus_int32 bytes_per_frame; + if (analysis_read_pos_bak!= -1) + { + st->analysis.read_pos = analysis_read_pos_bak; + st->analysis.read_subframe = analysis_read_subframe_bak; + } nb_frames = frame_size > st->Fs/25 ? 3 : 2; bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames); -- cgit v1.2.3 From 6b9087aa6efbb98731216d2b03cc977826fd6062 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sat, 28 Sep 2013 23:51:11 -0400 Subject: Avoids unnecessary copying in opus_encode for delayed decision --- src/opus_encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 3dcd35ae..c151fd07 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -2004,9 +2004,9 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, downmix_float, st->analysis.subframe_mem); - ALLOC(in, analysis_frame_size*st->channels, float); + ALLOC(in, frame_size*st->channels, float); - for (i=0;ichannels;i++) + for (i=0;ichannels;i++) in[i] = (1.0f/32768)*pcm[i]; ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); RESTORE_STACK; -- cgit v1.2.3