diff options
author | Linfeng Zhang <linfengz@google.com> | 2016-06-16 16:21:02 -0700 |
---|---|---|
committer | Felicia Lim <flim@google.com> | 2017-01-17 14:04:37 -0800 |
commit | 783ad76766e1f6b6aaca5d6eb415ac8a8269e1f2 (patch) | |
tree | 120458893d45586c9c1f3ee07135a71b4d22fc01 /celt/x86 | |
parent | c9ba55208c842a1681d82e7d7ff44fafedd2a853 (diff) | |
download | libopus-783ad76766e1f6b6aaca5d6eb415ac8a8269e1f2.tar.gz |
Revise celt_fir_c() to not pass in argument "mem"
The "mem" in celt_fir_c() either is contained in the head of input "x"
in reverse order already, or can be easily attached to the head of "x"
before calling the function. Removing argument "mem" can eliminate the
redundant buffer copies inside.
Update celt_fir_sse4_1() accordingly.
Diffstat (limited to 'celt/x86')
-rw-r--r-- | celt/x86/celt_lpc_sse.c | 51 | ||||
-rw-r--r-- | celt/x86/celt_lpc_sse.h | 10 | ||||
-rw-r--r-- | celt/x86/x86_celt_map.c | 1 |
3 files changed, 14 insertions, 48 deletions
diff --git a/celt/x86/celt_lpc_sse.c b/celt/x86/celt_lpc_sse.c index 67e5592a..12a9b0e5 100644 --- a/celt/x86/celt_lpc_sse.c +++ b/celt/x86/celt_lpc_sse.c @@ -40,63 +40,32 @@ #if defined(FIXED_POINT) -void celt_fir_sse4_1(const opus_val16 *_x, +void celt_fir_sse4_1(const opus_val16 *x, const opus_val16 *num, - opus_val16 *_y, + opus_val16 *y, int N, int ord, - opus_val16 *mem, int arch) { int i,j; VARDECL(opus_val16, rnum); - VARDECL(opus_val16, x); __m128i vecNoA; opus_int32 noA ; SAVE_STACK; ALLOC(rnum, ord, opus_val16); - ALLOC(x, N+ord, opus_val16); for(i=0;i<ord;i++) rnum[i] = num[ord-i-1]; - for(i=0;i<ord;i++) - x[i] = mem[ord-i-1]; - - for (i=0;i<N-7;i+=8) - { - x[i+ord ]=_x[i ]; - x[i+ord+1]=_x[i+1]; - x[i+ord+2]=_x[i+2]; - x[i+ord+3]=_x[i+3]; - x[i+ord+4]=_x[i+4]; - x[i+ord+5]=_x[i+5]; - x[i+ord+6]=_x[i+6]; - x[i+ord+7]=_x[i+7]; - } - - for (;i<N-3;i+=4) - { - x[i+ord ]=_x[i ]; - x[i+ord+1]=_x[i+1]; - x[i+ord+2]=_x[i+2]; - x[i+ord+3]=_x[i+3]; - } - - for (;i<N;i++) - x[i+ord]=_x[i]; - - for(i=0;i<ord;i++) - mem[i] = _x[N-i-1]; #ifdef SMALL_FOOTPRINT for (i=0;i<N;i++) { - opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); + opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); for (j=0;j<ord;j++) { - sum = MAC16_16(sum,rnum[j],x[i+j]); + sum = MAC16_16(sum,rnum[j],x[i+j-ord]); } - _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); + y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); } #else noA = EXTEND32(1) << SIG_SHIFT >> 1; @@ -107,22 +76,22 @@ void celt_fir_sse4_1(const opus_val16 *_x, opus_val32 sums[4] = {0}; __m128i vecSum, vecX; - xcorr_kernel(rnum, x+i, sums, ord, arch); + xcorr_kernel(rnum, x+i-ord, sums, ord, arch); vecSum = _mm_loadu_si128((__m128i *)sums); vecSum = _mm_add_epi32(vecSum, vecNoA); vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT); - vecX = OP_CVTEPI16_EPI32_M64(_x + i); + vecX = OP_CVTEPI16_EPI32_M64(x + i); vecSum = _mm_add_epi32(vecSum, vecX); vecSum = _mm_packs_epi32(vecSum, vecSum); - _mm_storel_epi64((__m128i *)(_y + i), vecSum); + _mm_storel_epi64((__m128i *)(y + i), vecSum); } for (;i<N;i++) { opus_val32 sum = 0; for (j=0;j<ord;j++) - sum = MAC16_16(sum, rnum[j], x[i + j]); - _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); + sum = MAC16_16(sum, rnum[j], x[i+j-ord]); + y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT))); } #endif diff --git a/celt/x86/celt_lpc_sse.h b/celt/x86/celt_lpc_sse.h index c5ec796e..7d1ecf75 100644 --- a/celt/x86/celt_lpc_sse.h +++ b/celt/x86/celt_lpc_sse.h @@ -41,12 +41,11 @@ void celt_fir_sse4_1( opus_val16 *y, int N, int ord, - opus_val16 *mem, int arch); #if defined(OPUS_X86_PRESUME_SSE4_1) -#define celt_fir(x, num, y, N, ord, mem, arch) \ - ((void)arch, celt_fir_sse4_1(x, num, y, N, ord, mem, arch)) +#define celt_fir(x, num, y, N, ord, arch) \ + ((void)arch, celt_fir_sse4_1(x, num, y, N, ord, arch)) #else @@ -56,11 +55,10 @@ extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( opus_val16 *y, int N, int ord, - opus_val16 *mem, int arch); -# define celt_fir(x, num, y, N, ord, mem, arch) \ - ((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, mem, arch)) +# define celt_fir(x, num, y, N, ord, arch) \ + ((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, arch)) #endif #endif diff --git a/celt/x86/x86_celt_map.c b/celt/x86/x86_celt_map.c index 80559a28..d39d88ed 100644 --- a/celt/x86/x86_celt_map.c +++ b/celt/x86/x86_celt_map.c @@ -47,7 +47,6 @@ void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( opus_val16 *y, int N, int ord, - opus_val16 *mem, int arch ) = { celt_fir_c, /* non-sse */ |