diff options
author | flim <flim@google.com> | 2016-01-26 14:33:44 +0100 |
---|---|---|
committer | flim <flim@google.com> | 2016-03-04 17:40:32 +0100 |
commit | c91ee5b5642fcc4969150f73d5f6848f88bf1638 (patch) | |
tree | 900cb39b975dfed729e37e5810fef61fd92d7a70 /src | |
parent | 1391dbf0ccd121ce7a49d30e2142d36c8d404990 (diff) | |
download | libopus-c91ee5b5642fcc4969150f73d5f6848f88bf1638.tar.gz |
Upgrade Opus to v1.1.2android-wear-n-preview-3android-wear-n-preview-1android-n-preview-3
Change-Id: I8211751bab026ab236a612c6e0873f8bdbcd6c98
Diffstat (limited to 'src')
-rw-r--r-- | src/analysis.c | 44 | ||||
-rw-r--r-- | src/analysis.h | 17 | ||||
-rw-r--r-- | src/mlp.c | 117 | ||||
-rw-r--r-- | src/mlp.h | 8 | ||||
-rw-r--r-- | src/mlp_data.c | 4 | ||||
-rw-r--r-- | src/opus.c | 21 | ||||
-rw-r--r-- | src/opus_decoder.c | 91 | ||||
-rw-r--r-- | src/opus_demo.c | 43 | ||||
-rw-r--r-- | src/opus_encoder.c | 101 | ||||
-rw-r--r-- | src/opus_multistream_decoder.c | 4 | ||||
-rw-r--r-- | src/opus_multistream_encoder.c | 102 | ||||
-rw-r--r-- | src/opus_private.h | 19 | ||||
-rw-r--r-- | src/repacketizer.c | 5 |
13 files changed, 377 insertions, 199 deletions
diff --git a/src/analysis.c b/src/analysis.c index 778a62aa..360ebcc8 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -39,8 +39,6 @@ #include "mlp.h" #include "stack_alloc.h" -extern const MLP net; - #ifndef M_PI #define M_PI 3.141592653 #endif @@ -140,6 +138,21 @@ static OPUS_INLINE float fast_atan2f(float y, float x) { } } +void tonality_analysis_init(TonalityAnalysisState *tonal) +{ + /* Initialize reusable fields. */ + tonal->arch = opus_select_arch(); + /* Clear remaining fields. */ + tonality_analysis_reset(tonal); +} + +void tonality_analysis_reset(TonalityAnalysisState *tonal) +{ + /* Clear non-reusable fields. */ + char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START; + OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal)); +} + void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len) { int pos; @@ -189,7 +202,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int info_out->music_prob = psum; } -void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) +static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) { int i, b; const kiss_fft_state *kfft; @@ -262,7 +275,16 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); tonal->mem_fill = 240 + remaining; - opus_fft(kfft, in, out); + opus_fft(kfft, in, out, tonal->arch); +#ifndef FIXED_POINT + /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */ + if (celt_isnan(out[0].r)) + { + info->valid = 0; + RESTORE_STACK; + return; + } +#endif for (i=1;i<N2;i++) { @@ -334,6 +356,16 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con tE += binE*tonality[i]; nE += binE*2.f*(.5f-noisiness[i]); } +#ifndef FIXED_POINT + /* Check for extreme band energies that could cause NaNs later. */ + if (!(E<1e9f) || celt_isnan(E)) + { + info->valid = 0; + RESTORE_STACK; + return; + } +#endif + tonal->E[tonal->E_count][b] = E; frame_noisiness += nE/(1e-15f+E); @@ -611,8 +643,6 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/ info->noisiness = frame_noisiness; info->valid = 1; - if (info_out!=NULL) - OPUS_COPY(info_out, info, 1); RESTORE_STACK; } @@ -631,7 +661,7 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co pcm_len = analysis_frame_size - analysis->analysis_offset; offset = analysis->analysis_offset; do { - tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); + tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); offset += 480; pcm_len -= 480; } while (pcm_len>0); diff --git a/src/analysis.h b/src/analysis.h index be0388fa..9eae56a5 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -39,6 +39,8 @@ #define DETECT_SIZE 200 typedef struct { + int arch; +#define TONALITY_ANALYSIS_RESET_START angle float angle[240]; float d_angle[240]; float d2_angle[240]; @@ -78,8 +80,19 @@ typedef struct { AnalysisInfo info[DETECT_SIZE]; } TonalityAnalysisState; -void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, - const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix); +/** Initialize a TonalityAnalysisState struct. + * + * This performs some possibly slow initialization steps which should + * not be repeated every analysis step. No allocated memory is retained + * by the state struct, so no cleanup call is required. + */ +void tonality_analysis_init(TonalityAnalysisState *analysis); + +/** Reset a TonalityAnalysisState stuct. + * + * Call this when there's a discontinuity in the data. + */ +void tonality_analysis_reset(TonalityAnalysisState *analysis); void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len); @@ -41,77 +41,82 @@ #if 0 static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ { - int i; - opus_val16 xx; /* Q11 */ - /*double x, y;*/ - opus_val16 dy, yy; /* Q14 */ - /*x = 1.9073e-06*_x;*/ - if (_x>=QCONST32(8,19)) - return QCONST32(1.,14); - if (_x<=-QCONST32(8,19)) - return -QCONST32(1.,14); - xx = EXTRACT16(SHR32(_x, 8)); - /*i = lrint(25*x);*/ - i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); - /*x -= .04*i;*/ - xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); - /*x = xx*(1./2048);*/ - /*y = tansig_table[250+i];*/ - yy = tansig_table[250+i]; - /*y = yy*(1./16384);*/ - dy = 16384-MULT16_16_Q14(yy,yy); - yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); - return yy; + int i; + opus_val16 xx; /* Q11 */ + /*double x, y;*/ + opus_val16 dy, yy; /* Q14 */ + /*x = 1.9073e-06*_x;*/ + if (_x>=QCONST32(8,19)) + return QCONST32(1.,14); + if (_x<=-QCONST32(8,19)) + return -QCONST32(1.,14); + xx = EXTRACT16(SHR32(_x, 8)); + /*i = lrint(25*x);*/ + i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); + /*x -= .04*i;*/ + xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); + /*x = xx*(1./2048);*/ + /*y = tansig_table[250+i];*/ + yy = tansig_table[250+i]; + /*y = yy*(1./16384);*/ + dy = 16384-MULT16_16_Q14(yy,yy); + yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); + return yy; } #else /*extern const float tansig_table[501];*/ static OPUS_INLINE float tansig_approx(float x) { - int i; - float y, dy; - float sign=1; - /* Tests are reversed to catch NaNs */ + int i; + float y, dy; + float sign=1; + /* Tests are reversed to catch NaNs */ if (!(x<8)) return 1; if (!(x>-8)) return -1; - if (x<0) - { - x=-x; - sign=-1; - } - i = (int)floor(.5f+25*x); - x -= .04f*i; - y = tansig_table[i]; - dy = 1-y*y; - y = y + x*dy*(1 - y*x); - return sign*y; +#ifndef FIXED_POINT + /* Another check in case of -ffast-math */ + if (celt_isnan(x)) + return 0; +#endif + if (x<0) + { + x=-x; + sign=-1; + } + i = (int)floor(.5f+25*x); + x -= .04f*i; + y = tansig_table[i]; + dy = 1-y*y; + y = y + x*dy*(1 - y*x); + return sign*y; } #endif #if 0 void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) { - int j; - opus_val16 hidden[MAX_NEURONS]; - const opus_val16 *W = m->weights; - /* Copy to tmp_in */ - for (j=0;j<m->topo[1];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),8); - for (k=0;k<m->topo[0];k++) - sum = MAC16_16(sum, in[k],*W++); - hidden[j] = tansig_approx(sum); - } - for (j=0;j<m->topo[2];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),14); - for (k=0;k<m->topo[1];k++) - sum = MAC16_16(sum, hidden[k], *W++); - out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); - } + int j; + opus_val16 hidden[MAX_NEURONS]; + const opus_val16 *W = m->weights; + /* Copy to tmp_in */ + for (j=0;j<m->topo[1];j++) + { + int k; + opus_val32 sum = SHL32(EXTEND32(*W++),8); + for (k=0;k<m->topo[0];k++) + sum = MAC16_16(sum, in[k],*W++); + hidden[j] = tansig_approx(sum); + } + for (j=0;j<m->topo[2];j++) + { + int k; + opus_val32 sum = SHL32(EXTEND32(*W++),14); + for (k=0;k<m->topo[1];k++) + sum = MAC16_16(sum, hidden[k], *W++); + out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); + } } #else void mlp_process(const MLP *m, const float *in, float *out) @@ -31,11 +31,13 @@ #include "arch.h" typedef struct { - int layers; - const int *topo; - const float *weights; + int layers; + const int *topo; + const float *weights; } MLP; +extern const MLP net; + void mlp_process(const MLP *m, const float *in, float *out); #endif /* _MLP_H_ */ diff --git a/src/mlp_data.c b/src/mlp_data.c index 401c4c02..c2fda4e2 100644 --- a/src/mlp_data.c +++ b/src/mlp_data.c @@ -1,6 +1,10 @@ /* The contents of this file was automatically generated by mlp_train.c It contains multi-layer perceptron (MLP) weights. */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #include "mlp.h" /* RMS error was 0.138320, seed was 1361535663 */ @@ -166,6 +166,27 @@ static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *siz } } +int opus_packet_get_samples_per_frame(const unsigned char *data, + opus_int32 Fs) +{ + int audiosize; + if (data[0]&0x80) + { + audiosize = ((data[0]>>3)&0x3); + audiosize = (Fs<<audiosize)/400; + } else if ((data[0]&0x60) == 0x60) + { + audiosize = (data[0]&0x08) ? Fs/50 : Fs/100; + } else { + audiosize = ((data[0]>>3)&0x3); + if (audiosize == 3) + audiosize = Fs*60/1000; + else + audiosize = (Fs<<audiosize)/100; + } + return audiosize; +} + int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, int self_delimited, unsigned char *out_toc, const unsigned char *frames[48], opus_int16 size[48], diff --git a/src/opus_decoder.c b/src/opus_decoder.c index 919ba521..080bec50 100644 --- a/src/opus_decoder.c +++ b/src/opus_decoder.c @@ -33,7 +33,7 @@ # error "OPUS_BUILD _MUST_ be defined to build Opus. This probably means you need other defines as well, as in a config.h. See the included build files for details." #endif -#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) +#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) && !defined(OPUS_WILL_BE_SLOW) # pragma message "You appear to be compiling without optimization, if so opus will be very slow." #endif @@ -59,6 +59,7 @@ struct OpusDecoder { opus_int32 Fs; /** Sampling rate (at the API level) */ silk_DecControlStruct DecControl; int decode_gain; + int arch; /* Everything beyond this point gets cleared on a reset */ #define OPUS_DECODER_RESET_START stream_channels @@ -77,12 +78,6 @@ struct OpusDecoder { opus_uint32 rangeFinal; }; -#ifdef FIXED_POINT -static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { - return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; -} -#endif - int opus_decoder_get_size(int channels) { @@ -137,6 +132,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels) st->prev_mode = 0; st->frame_size = Fs/400; + st->arch = opus_select_arch(); return OPUS_OK; } @@ -215,7 +211,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, VARDECL(opus_val16, pcm_transition_silk); int pcm_transition_celt_size; VARDECL(opus_val16, pcm_transition_celt); - opus_val16 *pcm_transition; + opus_val16 *pcm_transition=NULL; int redundant_audio_size; VARDECL(opus_val16, redundant_audio); @@ -230,6 +226,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, int F2_5, F5, F10, F20; const opus_val16 *window; opus_uint32 redundant_rng = 0; + int celt_accum; ALLOC_STACK; silk_dec = (char*)st+st->silk_dec_offset; @@ -295,6 +292,14 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } } + /* In fixed-point, we can tell CELT to do the accumulation on top of the + SILK PCM buffer. This saves some stack space. */ +#ifdef FIXED_POINT + celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10); +#else + celt_accum = 0; +#endif + pcm_transition_silk_size = ALLOC_NONE; pcm_transition_celt_size = ALLOC_NONE; if (data!=NULL && st->prev_mode > 0 && ( @@ -325,14 +330,20 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } /* Don't allocate any memory when in CELT-only mode */ - pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; + pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; ALLOC(pcm_silk, pcm_silk_size, opus_int16); /* SILK processing */ if (mode != MODE_CELT_ONLY) { int lost_flag, decoded_samples; - opus_int16 *pcm_ptr = pcm_silk; + opus_int16 *pcm_ptr; +#ifdef FIXED_POINT + if (celt_accum) + pcm_ptr = pcm; + else +#endif + pcm_ptr = pcm_silk; if (st->prev_mode==MODE_CELT_ONLY) silk_InitDecoder( silk_dec ); @@ -366,7 +377,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, /* Call SILK decoder */ int first_frame = decoded_samples == 0; silk_ret = silk_Decode( silk_dec, &st->DecControl, - lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size ); + lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size, st->arch ); if( silk_ret ) { if (lost_flag) { /* PLC failure should not be fatal */ @@ -462,7 +473,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, { celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, - redundant_audio, F5, NULL); + redundant_audio, F5, NULL, 0); celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); } @@ -477,25 +488,28 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); /* Decode CELT */ celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data, - len, pcm, celt_frame_size, &dec); + len, pcm, celt_frame_size, &dec, celt_accum); } else { unsigned char silence[2] = {0xFF, 0xFF}; - for (i=0;i<frame_size*st->channels;i++) - pcm[i] = 0; + if (!celt_accum) + { + for (i=0;i<frame_size*st->channels;i++) + pcm[i] = 0; + } /* For hybrid -> SILK transitions, we let the CELT MDCT do a fade-out by decoding a silence frame */ if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) ) { celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL); + celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum); } } - if (mode != MODE_CELT_ONLY) + if (mode != MODE_CELT_ONLY && !celt_accum) { #ifdef FIXED_POINT for (i=0;i<frame_size*st->channels;i++) - pcm[i] = SAT16(pcm[i] + pcm_silk[i]); + pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i])); #else for (i=0;i<frame_size*st->channels;i++) pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]); @@ -514,7 +528,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL); + celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); @@ -710,6 +724,7 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data, { VARDECL(opus_int16, out); int ret, i; + int nb_samples; ALLOC_STACK; if(frame_size<=0) @@ -717,6 +732,14 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data, RESTORE_STACK; return OPUS_BAD_ARG; } + if (data != NULL && len > 0 && !decode_fec) + { + nb_samples = opus_decoder_get_nb_samples(st, data, len); + if (nb_samples>0) + frame_size = IMIN(frame_size, nb_samples); + else + return OPUS_INVALID_PACKET; + } ALLOC(out, frame_size*st->channels, opus_int16); ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0); @@ -737,6 +760,7 @@ int opus_decode(OpusDecoder *st, const unsigned char *data, { VARDECL(float, out); int ret, i; + int nb_samples; ALLOC_STACK; if(frame_size<=0) @@ -745,6 +769,14 @@ int opus_decode(OpusDecoder *st, const unsigned char *data, return OPUS_BAD_ARG; } + if (data != NULL && len > 0 && !decode_fec) + { + nb_samples = opus_decoder_get_nb_samples(st, data, len); + if (nb_samples>0) + frame_size = IMIN(frame_size, nb_samples); + else + return OPUS_INVALID_PACKET; + } ALLOC(out, frame_size*st->channels, float); ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1); @@ -904,27 +936,6 @@ int opus_packet_get_bandwidth(const unsigned char *data) return bandwidth; } -int opus_packet_get_samples_per_frame(const unsigned char *data, - opus_int32 Fs) -{ - int audiosize; - if (data[0]&0x80) - { - audiosize = ((data[0]>>3)&0x3); - audiosize = (Fs<<audiosize)/400; - } else if ((data[0]&0x60) == 0x60) - { - audiosize = (data[0]&0x08) ? Fs/50 : Fs/100; - } else { - audiosize = ((data[0]>>3)&0x3); - if (audiosize == 3) - audiosize = Fs*60/1000; - else - audiosize = (Fs<<audiosize)/100; - } - return audiosize; -} - int opus_packet_get_nb_channels(const unsigned char *data) { return (data[0]&0x4) ? 2 : 1; diff --git a/src/opus_demo.c b/src/opus_demo.c index f8cdf031..9e99a3b2 100644 --- a/src/opus_demo.c +++ b/src/opus_demo.c @@ -48,7 +48,7 @@ void print_usage( char* argv[] ) "<bits per second> [options] <input> <output>\n", argv[0]); fprintf(stderr, " %s -d <sampling rate (Hz)> <channels (1/2)> " "[options] <input> <output>\n\n", argv[0]); - fprintf(stderr, "mode: voip | audio | restricted-lowdelay\n" ); + fprintf(stderr, "application: voip | audio | restricted-lowdelay\n" ); fprintf(stderr, "options:\n" ); fprintf(stderr, "-e : only runs the encoder (output the bit-stream)\n" ); fprintf(stderr, "-d : only runs the decoder (reads the bit-stream as input)\n" ); @@ -245,14 +245,14 @@ int main(int argc, char *argv[]) double bits=0.0, bits_max=0.0, bits_act=0.0, bits2=0.0, nrg; double tot_samples=0; opus_uint64 tot_in, tot_out; - int bandwidth=-1; + int bandwidth=OPUS_AUTO; const char *bandwidth_string; int lost = 0, lost_prev = 1; int toggle = 0; opus_uint32 enc_final_range[2]; opus_uint32 dec_final_range; int encode_only=0, decode_only=0; - int max_frame_size = 960*6; + int max_frame_size = 48000*2; int curr_read=0; int sweep_bps = 0; int random_framesize=0, newsize=0, delayed_celt=0; @@ -336,15 +336,12 @@ int main(int argc, char *argv[]) /* defaults: */ use_vbr = 1; - bandwidth = OPUS_AUTO; max_payload_bytes = MAX_PACKET; complexity = 10; use_inbandfec = 0; forcechannels = OPUS_AUTO; use_dtx = 0; packet_loss_perc = 0; - max_frame_size = 2*48000; - curr_read=0; while( args < argc - 2 ) { /* process command line options */ @@ -576,7 +573,7 @@ int main(int argc, char *argv[]) bandwidth_string = "fullband"; break; case OPUS_AUTO: - bandwidth_string = "auto"; + bandwidth_string = "auto bandwidth"; break; default: bandwidth_string = "unknown"; @@ -588,7 +585,7 @@ int main(int argc, char *argv[]) (long)sampling_rate, channels); else fprintf(stderr, "Encoding %ld Hz input at %.3f kb/s " - "in %s mode with %d-sample frames.\n", + "in %s with %d-sample frames.\n", (long)sampling_rate, bitrate_bps*0.001, bandwidth_string, frame_size); @@ -637,7 +634,7 @@ int main(int argc, char *argv[]) case 4: newsize=sampling_rate/25; break; case 5: newsize=3*sampling_rate/50; break; } - while (newsize < sampling_rate/25 && bitrate_bps-fabs(sweep_bps) <= 3*12*sampling_rate/newsize) + while (newsize < sampling_rate/25 && bitrate_bps-abs(sweep_bps) <= 3*12*sampling_rate/newsize) newsize*=2; if (newsize < sampling_rate/100 && frame_size >= sampling_rate/100) { @@ -837,35 +834,39 @@ int main(int argc, char *argv[]) } lost_prev = lost; - - /* count bits */ - bits += len[toggle]*8; - bits_max = ( len[toggle]*8 > bits_max ) ? len[toggle]*8 : bits_max; if( count >= use_inbandfec ) { - nrg = 0.0; + /* count bits */ + bits += len[toggle]*8; + bits_max = ( len[toggle]*8 > bits_max ) ? len[toggle]*8 : bits_max; + bits2 += len[toggle]*len[toggle]*64; if (!decode_only) { + nrg = 0.0; for ( k = 0; k < frame_size * channels; k++ ) { nrg += in[ k ] * (double)in[ k ]; } + nrg /= frame_size * channels; + if( nrg > 1e5 ) { + bits_act += len[toggle]*8; + count_act++; + } } - if ( ( nrg / ( frame_size * channels ) ) > 1e5 ) { - bits_act += len[toggle]*8; - count_act++; - } - /* Variance */ - bits2 += len[toggle]*len[toggle]*64; } count++; toggle = (toggle + use_inbandfec) & 1; } + + /* Print out bitrate statistics */ + if(decode_only) + frame_size = (int)(tot_samples / count); + count -= use_inbandfec; fprintf (stderr, "average bitrate: %7.3f kb/s\n", 1e-3*bits*sampling_rate/tot_samples); fprintf (stderr, "maximum bitrate: %7.3f kb/s\n", 1e-3*bits_max*sampling_rate/frame_size); if (!decode_only) fprintf (stderr, "active bitrate: %7.3f kb/s\n", - 1e-3*bits_act*sampling_rate/(frame_size*(double)count_act)); + 1e-3*bits_act*sampling_rate/(1e-15+frame_size*(double)count_act)); fprintf (stderr, "bitrate standard deviation: %7.3f kb/s\n", 1e-3*sqrt(bits2/count - bits*bits/(count*(double)count))*sampling_rate/frame_size); /* Close any files to which intermediate results were stored */ diff --git a/src/opus_encoder.c b/src/opus_encoder.c index fbd3de63..a7e19127 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -38,6 +38,7 @@ #include "float_cast.h" #include "opus.h" #include "arch.h" +#include "pitch.h" #include "opus_private.h" #include "os_support.h" #include "cpu_support.h" @@ -80,6 +81,10 @@ struct OpusEncoder { int lsb_depth; int encoder_buffer; int lfe; + int arch; +#ifndef DISABLE_FLOAT_API + TonalityAnalysisState analysis; +#endif #define OPUS_ENCODER_RESET_START stream_channels int stream_channels; @@ -99,12 +104,9 @@ struct OpusEncoder { StereoWidthState width_mem; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; #ifndef DISABLE_FLOAT_API - TonalityAnalysisState analysis; int detected_bandwidth; - int analysis_offset; #endif opus_uint32 rangeFinal; - int arch; }; /* Transition tables for the voice and music. First column is the @@ -231,7 +233,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat st->lsb_depth = 24; st->variable_duration = OPUS_FRAMESIZE_ARG; - /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead + /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead + 1.5 ms for SILK resamplers and stereo prediction) */ st->delay_compensation = st->Fs/250; @@ -242,6 +244,10 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat st->mode = MODE_HYBRID; st->bandwidth = OPUS_BANDWIDTH_FULLBAND; +#ifndef DISABLE_FLOAT_API + tonality_analysis_init(&st->analysis); +#endif + return OPUS_OK; } @@ -648,7 +654,7 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_ return best_state; } -int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, +static int optimize_framesize(const void *x, int len, int C, opus_int32 Fs, int bitrate, opus_val16 tonality, float *mem, int buffering, downmix_func downmix) { @@ -660,6 +666,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int bestLM=0; int subframe; int pos; + int offset; VARDECL(opus_val32, sub); subframe = Fs/400; @@ -670,9 +677,8 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, { /* Consider the CELT delay when not in restricted-lowdelay */ /* We assume the buffering is between 2.5 and 5 ms */ - int offset = 2*subframe - buffering; + offset = 2*subframe - buffering; celt_assert(offset>=0 && offset <= subframe); - x += C*offset; len -= offset; e[1]=mem[1]; e_1[1]=1.f/(EPSILON+mem[1]); @@ -681,6 +687,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, pos = 3; } else { pos=1; + offset=0; } N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE); /* Just silencing a warning, it's really initialized later */ @@ -692,7 +699,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int j; tmp=EPSILON; - downmix(x, sub, subframe, i*subframe, 0, -2, C); + downmix(x, sub, subframe, i*subframe+offset, 0, -2, C); if (i==0) memx = sub[0]; for (j=0;j<subframe;j++) @@ -836,6 +843,12 @@ opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, LM--; frame_size = (Fs/400<<LM); } else +#else + (void)analysis_pcm; + (void)C; + (void)bitrate_bps; + (void)delay_compensation; + (void)downmix; #endif { frame_size = frame_size_select(frame_size, variable_duration, Fs); @@ -924,7 +937,8 @@ opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int3 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, - const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix) + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, + int analysis_channels, downmix_func downmix, int float_api) { void *silk_enc; CELTEncoder *celt_enc; @@ -954,9 +968,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ int total_buffer; opus_val16 stereo_width; const CELTMode *celt_mode; +#ifndef DISABLE_FLOAT_API AnalysisInfo analysis_info; int analysis_read_pos_bak=-1; int analysis_read_subframe_bak=-1; +#endif VARDECL(opus_val16, tmp_prefill); ALLOC_STACK; @@ -982,9 +998,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ lsb_depth = IMIN(lsb_depth, st->lsb_depth); - analysis_info.valid = 0; celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); #ifndef DISABLE_FLOAT_API + analysis_info.valid = 0; #ifdef FIXED_POINT if (st->silk_mode.complexity >= 10 && st->Fs==48000) #else @@ -997,6 +1013,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ c1, c2, analysis_channels, st->Fs, lsb_depth, downmix, &analysis_info); } +#else + (void)analysis_pcm; + (void)analysis_size; #endif st->voice_ratio = -1; @@ -1377,7 +1396,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->user_forced_mode = MODE_CELT_ONLY; tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, - NULL, 0, c1, c2, analysis_channels, downmix); + NULL, 0, c1, c2, analysis_channels, downmix, float_api); if (tmp_len<0) { RESTORE_STACK; @@ -1424,8 +1443,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ec_enc_init(&enc, data, max_data_bytes-1); ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16); - for (i=0;i<total_buffer*st->channels;i++) - pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i]; + OPUS_COPY(pcm_buf, &st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels], total_buffer*st->channels); if (st->mode == MODE_CELT_ONLY) hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); @@ -1444,7 +1462,20 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } else { dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); } - +#ifndef FIXED_POINT + if (float_api) + { + opus_val32 sum; + sum = celt_inner_prod(&pcm_buf[total_buffer*st->channels], &pcm_buf[total_buffer*st->channels], frame_size*st->channels, st->arch); + /* This should filter out both NaNs and ridiculous signals that could + cause NaNs further down. */ + if (!(sum < 1e9f) || celt_isnan(sum)) + { + OPUS_CLEAR(&pcm_buf[total_buffer*st->channels], frame_size*st->channels); + st->hp_mem[0] = st->hp_mem[1] = st->hp_mem[2] = st->hp_mem[3] = 0; + } + } +#endif /* SILK processing */ @@ -1599,8 +1630,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400); gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset, 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs); - for(i=0;i<prefill_offset;i++) - st->delay_buffer[i]=0; + OPUS_CLEAR(st->delay_buffer, prefill_offset); #ifdef FIXED_POINT pcm_silk = st->delay_buffer; #else @@ -1727,15 +1757,18 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16); if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0) { - for (i=0;i<st->channels*st->Fs/400;i++) - tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i]; + OPUS_COPY(tmp_prefill, &st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels], st->channels*st->Fs/400); } - for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++) - st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size]; - for (;i<st->encoder_buffer*st->channels;i++) - st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i]; - + if (st->channels*(st->encoder_buffer-(frame_size+total_buffer)) > 0) + { + OPUS_MOVE(st->delay_buffer, &st->delay_buffer[st->channels*frame_size], st->channels*(st->encoder_buffer-frame_size-total_buffer)); + OPUS_COPY(&st->delay_buffer[st->channels*(st->encoder_buffer-frame_size-total_buffer)], + &pcm_buf[0], + (frame_size+total_buffer)*st->channels); + } else { + OPUS_COPY(st->delay_buffer, &pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels], st->encoder_buffer*st->channels); + } /* gain_fade() and stereo_fade() need to be after the buffer copying because we don't want any of this to affect the SILK part */ if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { @@ -1955,7 +1988,8 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra for (i=0;i<frame_size*st->channels;i++) in[i] = FLOAT2INT16(pcm[i]); - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); RESTORE_STACK; return ret; } @@ -1977,7 +2011,8 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram , st->analysis.subframe_mem #endif ); - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); } #else @@ -2002,7 +2037,8 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram for (i=0;i<frame_size*st->channels;i++) in[i] = (1.0f/32768)*pcm[i]; - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); RESTORE_STACK; return ret; } @@ -2019,7 +2055,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_fra st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, downmix_float, st->analysis.subframe_mem); return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, - pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); + pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); } #endif @@ -2108,7 +2144,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) case OPUS_SET_MAX_BANDWIDTH_REQUEST: { opus_int32 value = va_arg(ap, opus_int32); - if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) + if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) { goto bad_arg; } @@ -2418,11 +2454,14 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) { void *silk_enc; silk_EncControlStruct dummy; + char *start; silk_enc = (char*)st+st->silk_enc_offset; +#ifndef DISABLE_FLOAT_API + tonality_analysis_reset(&st->analysis); +#endif - OPUS_CLEAR((char*)&st->OPUS_ENCODER_RESET_START, - sizeof(OpusEncoder)- - ((char*)&st->OPUS_ENCODER_RESET_START - (char*)st)); + start = (char*)&st->OPUS_ENCODER_RESET_START; + OPUS_CLEAR(start, sizeof(OpusEncoder) - (start - (char*)st)); celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); silk_InitEncoder( silk_enc, st->arch, &dummy ); diff --git a/src/opus_multistream_decoder.c b/src/opus_multistream_decoder.c index a05fa1e7..b95eaa6e 100644 --- a/src/opus_multistream_decoder.c +++ b/src/opus_multistream_decoder.c @@ -75,7 +75,7 @@ int opus_multistream_decoder_init( char *ptr; if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) return OPUS_BAD_ARG; st->layout.nb_channels = channels; @@ -119,7 +119,7 @@ OpusMSDecoder *opus_multistream_decoder_create( int ret; OpusMSDecoder *st; if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) { if (error) *error = OPUS_BAD_ARG; diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 49e27913..9e857735 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -41,6 +41,7 @@ #include "modes.h" #include "bands.h" #include "quant_bands.h" +#include "pitch.h" typedef struct { int nb_streams; @@ -71,6 +72,7 @@ typedef void (*opus_copy_channel_in_func)( struct OpusMSEncoder { ChannelLayout layout; + int arch; int lfe_stream; int application; int variable_duration; @@ -98,7 +100,8 @@ static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st) else ptr += align(mono_size); } - return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32)); + /* void* cast avoids clang -Wcast-align warning */ + return (opus_val32*)(void*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32)); } static opus_val32 *ms_get_window_mem(OpusMSEncoder *st) @@ -117,7 +120,8 @@ static opus_val32 *ms_get_window_mem(OpusMSEncoder *st) else ptr += align(mono_size); } - return (opus_val32*)ptr; + /* void* cast avoids clang -Wcast-align warning */ + return (opus_val32*)(void*)ptr; } static int validate_encoder_layout(const ChannelLayout *layout) @@ -199,7 +203,7 @@ static opus_val16 logSum(opus_val16 a, opus_val16 b) max = b; diff = SUB32(EXTEND32(b),EXTEND32(a)); } - if (diff >= QCONST16(8.f, DB_SHIFT)) + if (!(diff < QCONST16(8.f, DB_SHIFT))) /* inverted to catch NaNs */ return max; #ifdef FIXED_POINT low = SHR32(diff, DB_SHIFT-1); @@ -218,7 +222,7 @@ opus_val16 logSum(opus_val16 a, opus_val16 b) #endif void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem, - int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in + int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch ) { int c; @@ -257,7 +261,21 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b OPUS_COPY(in, mem+c*overlap, overlap); (*copy_channel_in)(x, 1, pcm, channels, c, len); celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0); - clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1); +#ifndef FIXED_POINT + { + opus_val32 sum; + sum = celt_inner_prod(in, in, frame_size+overlap, 0); + /* This should filter out both NaNs and ridiculous signals that could + cause NaNs further down. */ + if (!(sum < 1e9f) || celt_isnan(sum)) + { + OPUS_CLEAR(in, frame_size+overlap); + preemph_mem[c] = 0; + } + } +#endif + clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, + overlap, celt_mode->maxLM-LM, 1, arch); if (upsample != 1) { int bound = len; @@ -267,7 +285,7 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b freq[i] = 0; } - compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<<LM); + compute_band_energies(celt_mode, freq, bandE, 21, 1, LM); amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1); /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */ for (i=1;i<21;i++) @@ -408,9 +426,10 @@ static int opus_multistream_encoder_init_impl( char *ptr; if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) return OPUS_BAD_ARG; + st->arch = opus_select_arch(); st->layout.nb_channels = channels; st->layout.nb_streams = streams; st->layout.nb_coupled_streams = coupled_streams; @@ -530,7 +549,7 @@ OpusMSEncoder *opus_multistream_encoder_create( int ret; OpusMSEncoder *st; if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) { if (error) *error = OPUS_BAD_ARG; @@ -566,6 +585,7 @@ OpusMSEncoder *opus_multistream_surround_encoder_create( ) { int ret; + opus_int32 size; OpusMSEncoder *st; if ((channels>255) || (channels<1)) { @@ -573,7 +593,14 @@ OpusMSEncoder *opus_multistream_surround_encoder_create( *error = OPUS_BAD_ARG; return NULL; } - st = (OpusMSEncoder *)opus_alloc(opus_multistream_surround_encoder_get_size(channels, mapping_family)); + size = opus_multistream_surround_encoder_get_size(channels, mapping_family); + if (!size) + { + if (error) + *error = OPUS_UNIMPLEMENTED; + return NULL; + } + st = (OpusMSEncoder *)opus_alloc(size); if (st==NULL) { if (error) @@ -591,7 +618,7 @@ OpusMSEncoder *opus_multistream_surround_encoder_create( return st; } -static void surround_rate_allocation( +static opus_int32 surround_rate_allocation( OpusMSEncoder *st, opus_int32 *rate, int frame_size @@ -605,6 +632,7 @@ static void surround_rate_allocation( int lfe_offset; int coupled_ratio; /* Q8 */ int lfe_ratio; /* Q8 */ + opus_int32 rate_sum=0; ptr = (char*)st + align(sizeof(OpusMSEncoder)); opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); @@ -660,7 +688,10 @@ static void surround_rate_allocation( rate[i] = stream_offset+channel_rate; else rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8); + rate[i] = IMAX(rate[i], 500); + rate_sum += rate[i]; } + return rate_sum; } /* Max size in case the encoder decides to return three frames */ @@ -674,7 +705,8 @@ static int opus_multistream_encode_native unsigned char *data, opus_int32 max_data_bytes, int lsb_depth, - downmix_func downmix + downmix_func downmix, + int float_api ) { opus_int32 Fs; @@ -694,6 +726,8 @@ static int opus_multistream_encode_native opus_val32 *mem = NULL; opus_val32 *preemph_mem=NULL; int frame_size; + opus_int32 rate_sum; + opus_int32 smallest_packet; ALLOC_STACK; if (st->surround) @@ -737,6 +771,14 @@ static int opus_multistream_encode_native RESTORE_STACK; return OPUS_BAD_ARG; } + + /* Smallest packet the encoder can produce. */ + smallest_packet = st->layout.nb_streams*2-1; + if (max_data_bytes < smallest_packet) + { + RESTORE_STACK; + return OPUS_BUFFER_TOO_SMALL; + } ALLOC(buf, 2*frame_size, opus_val16); coupled_size = opus_encoder_get_size(2); mono_size = opus_encoder_get_size(1); @@ -744,21 +786,23 @@ static int opus_multistream_encode_native ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16); if (st->surround) { - surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in); - } - - if (max_data_bytes < 4*st->layout.nb_streams-1) - { - RESTORE_STACK; - return OPUS_BUFFER_TOO_SMALL; + surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch); } /* Compute bitrate allocation between streams (this could be a lot better) */ - surround_rate_allocation(st, bitrates, frame_size); + rate_sum = surround_rate_allocation(st, bitrates, frame_size); if (!vbr) - max_data_bytes = IMIN(max_data_bytes, 3*st->bitrate_bps/(3*8*Fs/frame_size)); - + { + if (st->bitrate_bps == OPUS_AUTO) + { + max_data_bytes = IMIN(max_data_bytes, 3*rate_sum/(3*8*Fs/frame_size)); + } else if (st->bitrate_bps != OPUS_BITRATE_MAX) + { + max_data_bytes = IMIN(max_data_bytes, IMAX(smallest_packet, + 3*st->bitrate_bps/(3*8*Fs/frame_size))); + } + } ptr = (char*)st + align(sizeof(OpusMSEncoder)); for (s=0;s<st->layout.nb_streams;s++) { @@ -843,13 +887,15 @@ static int opus_multistream_encode_native opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); /* number of bytes left (+Toc) */ curr_max = max_data_bytes - tot_size; - /* Reserve three bytes for the last stream and four for the others */ - curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1); + /* Reserve one byte for the last stream and two for the others */ + curr_max -= IMAX(0,2*(st->layout.nb_streams-s-1)-1); curr_max = IMIN(curr_max,MS_FRAME_TMP); + /* Repacketizer will add one or two bytes for self-delimited frames */ + if (s != st->layout.nb_streams-1) curr_max -= curr_max>253 ? 2 : 1; if (!vbr && s == st->layout.nb_streams-1) opus_encoder_ctl(enc, OPUS_SET_BITRATE(curr_max*(8*Fs/frame_size))); len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth, - pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix); + pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix, float_api); if (len<0) { RESTORE_STACK; @@ -922,7 +968,7 @@ int opus_multistream_encode( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int); + pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0); } #ifndef DISABLE_FLOAT_API @@ -935,7 +981,7 @@ int opus_multistream_encode_float( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 16, downmix_float); + pcm, frame_size, data, max_data_bytes, 16, downmix_float, 1); } #endif @@ -951,7 +997,7 @@ int opus_multistream_encode_float ) { return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 24, downmix_float); + pcm, frame_size, data, max_data_bytes, 24, downmix_float, 1); } int opus_multistream_encode( @@ -963,7 +1009,7 @@ int opus_multistream_encode( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int); + pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0); } #endif diff --git a/src/opus_private.h b/src/opus_private.h index 83225f2b..3b62eed0 100644 --- a/src/opus_private.h +++ b/src/opus_private.h @@ -33,6 +33,8 @@ #include "opus.h" #include "celt.h" +#include <stddef.h> /* offsetof */ + struct OpusRepacketizer { unsigned char toc; int nb_frames; @@ -86,10 +88,6 @@ typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); -int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, - int bitrate, opus_val16 tonality, float *mem, int buffering, - downmix_func downmix); - int encode_size(int size, unsigned char *data); opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); @@ -104,16 +102,23 @@ opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, - const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix); + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, + int analysis_channels, downmix_func downmix, int float_api); int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited, opus_int32 *packet_offset, int soft_clip); -/* Make sure everything's aligned to sizeof(void *) bytes */ +/* Make sure everything is properly aligned. */ static OPUS_INLINE int align(int i) { - return (i+(int)sizeof(void *)-1)&-(int)sizeof(void *); + struct foo {char c; union { void* p; opus_int32 i; opus_val32 v; } u;}; + + unsigned int alignment = offsetof(struct foo, u); + + /* Optimizing compilers should optimize div and multiply into and + for all sensible alignment values. */ + return ((i + alignment - 1) / alignment) * alignment; } int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, diff --git a/src/repacketizer.c b/src/repacketizer.c index a62675ce..f27e9ab9 100644 --- a/src/repacketizer.c +++ b/src/repacketizer.c @@ -219,8 +219,9 @@ opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int } if (pad) { - for (i=ptr-data;i<maxlen;i++) - data[i] = 0; + /* Fill padding with zeros. */ + while (ptr<data+maxlen) + *ptr++=0; } return tot_size; } |