diff options
Diffstat (limited to 'src/opus_encoder.c')
-rw-r--r-- | src/opus_encoder.c | 349 |
1 files changed, 216 insertions, 133 deletions
diff --git a/src/opus_encoder.c b/src/opus_encoder.c index ac40edac..c151fd07 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -95,10 +95,10 @@ struct OpusEncoder { int silk_bw_switch; /* Sampling rate (at the API level) */ int first; - int energy_masking; + opus_val16 * energy_masking; StereoWidthState width_mem; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API TonalityAnalysisState analysis; int detected_bandwidth; int analysis_offset; @@ -201,7 +201,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat st->silk_mode.payloadSize_ms = 20; st->silk_mode.bitRate = 25000; st->silk_mode.packetLossPercentage = 0; - st->silk_mode.complexity = 10; + st->silk_mode.complexity = 9; st->silk_mode.useInBandFEC = 0; st->silk_mode.useDTX = 0; st->silk_mode.useCBR = 0; @@ -212,7 +212,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR; celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0)); - celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(10)); + celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity)); st->use_vbr = 1; /* Makes constrained VBR the default (safer for real-time use) */ @@ -551,7 +551,7 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m return st->user_bitrate_bps; } -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API /* Don't use more than 60 ms for the frame size analysis */ #define MAX_DYNAMIC_FRAMESIZE 24 /* Estimates how much the bitrate will be boosted based on the sub-frame energy */ @@ -685,32 +685,6 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_ return best_state; } -void downmix_float(const void *_x, float *sub, int subframe, int offset, int C) -{ - const float *x; - int c, j; - x = (const float *)_x; - for (j=0;j<subframe;j++) - sub[j] = x[(j+offset)*C]; - for (c=1;c<C;c++) - for (j=0;j<subframe;j++) - sub[j] += x[(j+offset)*C+c]; -} - -void downmix_int(const void *_x, float *sub, int subframe, int offset, int C) -{ - const opus_int16 *x; - int c, j; - x = (const opus_int16 *)_x; - for (j=0;j<subframe;j++) - sub[j] = x[(j+offset)*C]; - for (c=1;c<C;c++) - for (j=0;j<subframe;j++) - sub[j] += x[(j+offset)*C+c]; - for (j=0;j<subframe;j++) - sub[j] *= (1.f/32768); -} - int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering, downmix_func downmix) @@ -723,10 +697,10 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int bestLM=0; int subframe; int pos; - VARDECL(opus_val16, sub); + VARDECL(opus_val32, sub); subframe = Fs/400; - ALLOC(sub, subframe, opus_val16); + ALLOC(sub, subframe, opus_val32); e[0]=mem[0]; e_1[0]=1.f/(EPSILON+mem[0]); if (buffering) @@ -754,7 +728,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int j; tmp=EPSILON; - downmix(x, sub, subframe, i*subframe, C); + downmix(x, sub, subframe, i*subframe, 0, -2, C); if (i==0) memx = sub[0]; for (j=0;j<subframe;j++) @@ -784,6 +758,76 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, #endif +#ifndef DISABLE_FLOAT_API +void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) +{ + const float *x; + opus_val32 scale; + int j; + x = (const float *)_x; + for (j=0;j<subframe;j++) + sub[j] = SCALEIN(x[(j+offset)*C+c1]); + if (c2>-1) + { + for (j=0;j<subframe;j++) + sub[j] += SCALEIN(x[(j+offset)*C+c2]); + } else if (c2==-2) + { + int c; + for (c=1;c<C;c++) + { + for (j=0;j<subframe;j++) + sub[j] += SCALEIN(x[(j+offset)*C+c]); + } + } +#ifdef FIXED_POINT + scale = (1<<SIG_SHIFT); +#else + scale = 1.f; +#endif + if (C==-2) + scale /= C; + else + scale /= 2; + for (j=0;j<subframe;j++) + sub[j] *= scale; +} +#endif + +void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) +{ + const opus_int16 *x; + opus_val32 scale; + int j; + x = (const opus_int16 *)_x; + for (j=0;j<subframe;j++) + sub[j] = x[(j+offset)*C+c1]; + if (c2>-1) + { + for (j=0;j<subframe;j++) + sub[j] += x[(j+offset)*C+c2]; + } else if (c2==-2) + { + int c; + for (c=1;c<C;c++) + { + for (j=0;j<subframe;j++) + sub[j] += x[(j+offset)*C+c]; + } + } +#ifdef FIXED_POINT + scale = (1<<SIG_SHIFT); +#else + scale = 1.f/32768; +#endif + if (C==-2) + scale /= C; + else + scale /= 2; + for (j=0;j<subframe;j++) + sub[j] *= scale; +} + opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs) { int new_size; @@ -805,6 +849,29 @@ opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_ return new_size; } +opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, + int variable_duration, int C, opus_int32 Fs, int bitrate_bps, + int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem) +{ +#ifndef DISABLE_FLOAT_API + if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) + { + int LM = 3; + LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps, + 0, subframe_mem, delay_compensation, downmix); + while ((Fs/400<<LM)>frame_size) + LM--; + frame_size = (Fs/400<<LM); + } else +#endif + { + frame_size = frame_size_select(frame_size, variable_duration, Fs); + } + if (frame_size<0) + return -1; + return frame_size; +} + opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem) { opus_val16 corr; @@ -883,11 +950,8 @@ opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int3 } opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, - unsigned char *data, opus_int32 out_data_bytes, int lsb_depth -#ifndef FIXED_POINT - , AnalysisInfo *analysis_info -#endif - ) + unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix) { void *silk_enc; CELTEncoder *celt_enc; @@ -916,6 +980,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */ int total_buffer; opus_val16 stereo_width; + const CELTMode *celt_mode; + AnalysisInfo analysis_info; + int analysis_read_pos_bak=-1; + int analysis_read_subframe_bak=-1; VARDECL(opus_val16, tmp_prefill); ALLOC_STACK; @@ -941,17 +1009,34 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ lsb_depth = IMIN(lsb_depth, st->lsb_depth); + analysis_info.valid = 0; + celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); +#ifndef DISABLE_FLOAT_API +#ifdef FIXED_POINT + if (st->silk_mode.complexity >= 10 && st->Fs==48000) +#else + if (st->silk_mode.complexity >= 7 && st->Fs==48000) +#endif + { + analysis_read_pos_bak = st->analysis.read_pos; + analysis_read_subframe_bak = st->analysis.read_subframe; + run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, + c1, c2, analysis_channels, st->Fs, + lsb_depth, downmix, &analysis_info); + } +#endif + st->voice_ratio = -1; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API st->detected_bandwidth = 0; - if (analysis_info->valid) + if (analysis_info.valid) { int analysis_bandwidth; if (st->signal_type == OPUS_AUTO) - st->voice_ratio = (int)floor(.5+100*(1-analysis_info->music_prob)); + st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob)); - analysis_bandwidth = analysis_info->bandwidth; + analysis_bandwidth = analysis_info.bandwidth; if (analysis_bandwidth<=12) st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; else if (analysis_bandwidth<=14) @@ -1281,6 +1366,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ VARDECL(OpusRepacketizer, rp); opus_int32 bytes_per_frame; + if (analysis_read_pos_bak!= -1) + { + st->analysis.read_pos = analysis_read_pos_bak; + st->analysis.read_subframe = analysis_read_subframe_bak; + } nb_frames = frame_size > st->Fs/25 ? 3 : 2; bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames); @@ -1310,11 +1400,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ if (to_celt && i==nb_frames-1) st->user_forced_mode = MODE_CELT_ONLY; - tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth -#ifndef FIXED_POINT - , analysis_info -#endif - ); + tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, + tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, + NULL, 0, c1, c2, analysis_channels, downmix); if (tmp_len<0) { RESTORE_STACK; @@ -1419,6 +1507,45 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->silk_mode.bitRate = total_bitRate; } + /* Surround masking for SILK */ + if (st->energy_masking && st->use_vbr && !st->lfe) + { + opus_val32 mask_sum=0; + opus_val16 masking_depth; + opus_int32 rate_offset; + int c; + int end = 17; + opus_int16 srate = 16000; + if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND) + { + end = 13; + srate = 8000; + } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) + { + end = 15; + srate = 12000; + } + for (c=0;c<st->channels;c++) + { + for(i=0;i<end;i++) + { + opus_val16 mask; + mask = MAX16(MIN16(st->energy_masking[21*c+i], + QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); + if (mask > 0) + mask = HALF16(mask); + mask_sum += mask; + } + } + /* Conservative rate reduction, we cut the masking in half */ + masking_depth = HALF16(mask_sum / end*st->channels); + rate_offset = PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT); + rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3); + rate_offset += QCONST16(.4f, DB_SHIFT); + st->silk_mode.bitRate += rate_offset; + bytes_target += rate_offset * frame_size / (8 * st->Fs); + } + st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; st->silk_mode.nChannelsAPI = st->channels; st->silk_mode.nChannelsInternal = st->stream_channels; @@ -1476,9 +1603,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (prefill) { opus_int32 zero=0; - const CELTMode *celt_mode; int prefill_offset; - celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode a discontinuity. The exact location is what we need to avoid leaving any "gap" in the audio when mixing with the redundant CELT frame. Here we can afford to @@ -1589,12 +1714,12 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->use_vbr) { opus_int32 bonus=0; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) { bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); - if (analysis_info->valid) - bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info->tonality)); + if (analysis_info.valid) + bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality)); } #endif celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1)); @@ -1625,9 +1750,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /* gain_fade() and stereo_fade() need to be after the buffer copying because we don't want any of this to affect the SILK part */ if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { - const CELTMode *celt_mode; - - celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); gain_fade(pcm_buf, pcm_buf, st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs); } @@ -1638,9 +1760,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /* Apply stereo width reduction (at low bitrates) */ if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { opus_val16 g1, g2; - const CELTMode *celt_mode; - - celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); g1 = st->hybrid_stereo_width_Q14; g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14); #ifdef FIXED_POINT @@ -1697,9 +1816,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ec_enc_shrink(&enc, nb_compr_bytes); } -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (redundancy || st->mode != MODE_SILK_ONLY) - celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info)); + celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); #endif /* 5 ms redundant frame for CELT->SILK */ @@ -1825,114 +1944,88 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifdef FIXED_POINT #ifndef DISABLE_FLOAT_API -opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size, +opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, unsigned char *data, opus_int32 max_data_bytes) { int i, ret; + int frame_size; + int delay_compensation; VARDECL(opus_int16, in); ALLOC_STACK; - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - if(frame_size<0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); ALLOC(in, frame_size*st->channels, opus_int16); for (i=0;i<frame_size*st->channels;i++) in[i] = FLOAT2INT16(pcm[i]); - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); RESTORE_STACK; return ret; } #endif -opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size, +opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, unsigned char *data, opus_int32 out_data_bytes) { - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - if(frame_size<0) - { - return OPUS_BAD_ARG; - } - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16); + int frame_size; + int delay_compensation; + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); } #else -opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size, +opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, unsigned char *data, opus_int32 max_data_bytes) { int i, ret; - const CELTMode *celt_mode; + int frame_size; int delay_compensation; - int lsb_depth; VARDECL(float, in); - AnalysisInfo analysis_info; ALLOC_STACK; - opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode)); if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) delay_compensation = 0; else delay_compensation = st->delay_compensation; - - lsb_depth = IMIN(16, st->lsb_depth); - - analysis_info.valid = 0; - if (st->silk_mode.complexity >= 7 && st->Fs==48000) - { - frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset, - frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_int, &analysis_info); - } else { - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - } - if(frame_size<0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); ALLOC(in, frame_size*st->channels, float); for (i=0;i<frame_size*st->channels;i++) in[i] = (1.0f/32768)*pcm[i]; - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, &analysis_info); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); RESTORE_STACK; return ret; } -opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size, +opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, unsigned char *data, opus_int32 out_data_bytes) { - const CELTMode *celt_mode; + int frame_size; int delay_compensation; - int lsb_depth; - AnalysisInfo analysis_info; - - opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode)); if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) delay_compensation = 0; else delay_compensation = st->delay_compensation; - - lsb_depth = IMIN(24, st->lsb_depth); - - analysis_info.valid = 0; - if (st->silk_mode.complexity >= 7 && st->Fs==48000) - { - frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset, - frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_float, &analysis_info); - } else { - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - } - if(frame_size<0) - { - return OPUS_BAD_ARG; - } - - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, &analysis_info); - + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); } #endif @@ -2349,20 +2442,10 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); } break; - case OPUS_SET_ENERGY_SAVE_REQUEST: - { - opus_val16 *value = va_arg(ap, opus_val16*); - if (!value) - { - goto bad_arg; - } - ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_SAVE(value)); - } - break; case OPUS_SET_ENERGY_MASK_REQUEST: { opus_val16 *value = va_arg(ap, opus_val16*); - st->energy_masking = (value!=NULL); + st->energy_masking = value; ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); } break; |