From ba8713ccb5fd1ee893630d2d1bcd86aacfc3aeea Mon Sep 17 00:00:00 2001 From: Viswanath Puttagunta Date: Fri, 15 May 2015 12:42:25 -0500 Subject: armv7: Optimize fixed point FFT using NE10 library Uses NEON optimized fixed point FFT routines in NE10 library. Signed-off-by: Timothy B. Terriberry --- celt/arm/arm_celt_map.c | 24 +++++++----- celt/arm/celt_ne10_fft.c | 95 +++++++++++++++++++++++++++++++----------------- celt/arm/fft_arm.h | 26 ++++++------- 3 files changed, 87 insertions(+), 58 deletions(-) (limited to 'celt/arm') diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c index 4c2d28ce..b5bd44dc 100644 --- a/celt/arm/arm_celt_map.c +++ b/celt/arm/arm_celt_map.c @@ -52,23 +52,26 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, celt_pitch_xcorr_c, /* Media */ celt_pitch_xcorr_float_neon /* Neon */ }; +# endif +# endif /* FIXED_POINT */ -# if defined(HAVE_ARM_NE10) -# if defined(CUSTOM_MODES) +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# if defined(HAVE_ARM_NE10) +# if defined(CUSTOM_MODES) int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { opus_fft_alloc_arch_c, /* ARMv4 */ opus_fft_alloc_arch_c, /* EDSP */ opus_fft_alloc_arch_c, /* Media */ - opus_fft_alloc_arm_float_neon /* Neon with NE10 library support */ + opus_fft_alloc_arm_neon /* Neon with NE10 library support */ }; void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { opus_fft_free_arch_c, /* ARMv4 */ opus_fft_free_arch_c, /* EDSP */ opus_fft_free_arch_c, /* Media */ - opus_fft_free_arm_float_neon /* Neon with NE10 */ + opus_fft_free_arm_neon /* Neon with NE10 */ }; -# endif /* CUSTOM_MODES */ +# endif /* CUSTOM_MODES */ void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, const kiss_fft_cpx *fin, @@ -76,7 +79,7 @@ void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, opus_fft_c, /* ARMv4 */ opus_fft_c, /* EDSP */ opus_fft_c, /* Media */ - opus_fft_float_neon /* Neon with NE10 */ + opus_fft_neon /* Neon with NE10 */ }; void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, @@ -85,9 +88,10 @@ void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, opus_ifft_c, /* ARMv4 */ opus_ifft_c, /* EDSP */ opus_ifft_c, /* Media */ - opus_ifft_float_neon /* Neon with NE10 */ + opus_ifft_neon /* Neon with NE10 */ }; +# if !defined(FIXED_POINT) void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, @@ -112,8 +116,8 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, clt_mdct_backward_float_neon /* Neon with NE10 */ }; -# endif /* HAVE_ARM_NE10 */ -# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */ -# endif /* FIXED_POINT */ +# endif /* !FIXED_POINT */ +# endif /* HAVE_ARM_NE10 */ +# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */ #endif /* OPUS_HAVE_RTCD */ diff --git a/celt/arm/celt_ne10_fft.c b/celt/arm/celt_ne10_fft.c index 2ba8c559..42d96a71 100644 --- a/celt/arm/celt_ne10_fft.c +++ b/celt/arm/celt_ne10_fft.c @@ -43,15 +43,31 @@ #include "stack_alloc.h" #if !defined(FIXED_POINT) -# if defined(CUSTOM_MODES) +# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon +# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t +# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t +# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32 +# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t +# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon +#else +# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft) +# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t +# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t +# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32 +# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32 +# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t +# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon +#endif + +#if defined(CUSTOM_MODES) /* nfft lengths in NE10 that support scaled fft */ -#define NE10_FFTSCALED_SUPPORT_MAX 4 +# define NE10_FFTSCALED_SUPPORT_MAX 4 static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = { 480, 240, 120, 60 }; -int opus_fft_alloc_arm_float_neon(kiss_fft_state *st) +int opus_fft_alloc_arm_neon(kiss_fft_state *st) { int i; size_t memneeded = sizeof(struct arch_fft_state); @@ -71,7 +87,7 @@ int opus_fft_alloc_arm_float_neon(kiss_fft_state *st) } else { st->arch_fft->is_supported = 1; - st->arch_fft->priv = (void *)ne10_fft_alloc_c2c_float32_neon(st->nfft); + st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft); if (st->arch_fft->priv == NULL) { return -1; } @@ -79,69 +95,80 @@ int opus_fft_alloc_arm_float_neon(kiss_fft_state *st) return 0; } -void opus_fft_free_arm_float_neon(kiss_fft_state *st) +void opus_fft_free_arm_neon(kiss_fft_state *st) { - ne10_fft_cfg_float32_t cfg; + NE10_FFT_CFG_TYPE_T cfg; if (!st->arch_fft) return; - cfg = (ne10_fft_cfg_float32_t)st->arch_fft->priv; + cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv; if (cfg) - ne10_fft_destroy_c2c_float32(cfg); + NE10_FFT_DESTROY_C2C_TYPE(cfg); opus_free(st->arch_fft); } -# endif +#endif -void opus_fft_float_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout) +void opus_fft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) { - ne10_fft_state_float32_t state; - ne10_fft_cfg_float32_t cfg = &state; - VARDECL(ne10_fft_cpx_float32_t, buffer); + NE10_FFT_STATE_TYPE_T state; + NE10_FFT_CFG_TYPE_T cfg = &state; + VARDECL(NE10_FFT_CPX_TYPE_T, buffer); SAVE_STACK; - ALLOC(buffer, st->nfft, ne10_fft_cpx_float32_t); + ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T); if (!st->arch_fft->is_supported) { /* This nfft length (scaled fft) not supported in NE10 */ opus_fft_c(st, fin, fout); } else { - memcpy((void *)cfg, st->arch_fft->priv, sizeof(ne10_fft_state_float32_t)); - state.buffer = (ne10_fft_cpx_float32_t *)&buffer[0]; + memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T)); + state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0]; +#if !defined(FIXED_POINT) state.is_forward_scaled = 1; - ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout, - (ne10_fft_cpx_float32_t *)fin, - cfg, 0); + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 0); +#else + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 0, 1); +#endif } RESTORE_STACK; } -void opus_ifft_float_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout) +void opus_ifft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) { - ne10_fft_state_float32_t state; - ne10_fft_cfg_float32_t cfg = &state; - VARDECL(ne10_fft_cpx_float32_t, buffer); + NE10_FFT_STATE_TYPE_T state; + NE10_FFT_CFG_TYPE_T cfg = &state; + VARDECL(NE10_FFT_CPX_TYPE_T, buffer); SAVE_STACK; - ALLOC(buffer, st->nfft, ne10_fft_cpx_float32_t); + ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T); if (!st->arch_fft->is_supported) { /* This nfft length (scaled fft) not supported in NE10 */ opus_ifft_c(st, fin, fout); } else { - memcpy((void *)cfg, st->arch_fft->priv, sizeof(ne10_fft_state_float32_t)); - state.buffer = (ne10_fft_cpx_float32_t *)&buffer[0]; + memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T)); + state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0]; +#if !defined(FIXED_POINT) state.is_backward_scaled = 0; - ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout, - (ne10_fft_cpx_float32_t *)fin, - cfg, 1); + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 1); +#else + NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, + (NE10_FFT_CPX_TYPE_T *)fin, + cfg, 1, 0); +#endif } RESTORE_STACK; } -#endif /* !defined(FIXED_POINT) */ diff --git a/celt/arm/fft_arm.h b/celt/arm/fft_arm.h index e57b0aa6..0cb55d8e 100644 --- a/celt/arm/fft_arm.h +++ b/celt/arm/fft_arm.h @@ -37,38 +37,36 @@ #include "config.h" #include "kiss_fft.h" -#if !defined(FIXED_POINT) #if defined(HAVE_ARM_NE10) -int opus_fft_alloc_arm_float_neon(kiss_fft_state *st); -void opus_fft_free_arm_float_neon(kiss_fft_state *st); +int opus_fft_alloc_arm_neon(kiss_fft_state *st); +void opus_fft_free_arm_neon(kiss_fft_state *st); -void opus_fft_float_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout); +void opus_fft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout); -void opus_ifft_float_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout); +void opus_ifft_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout); #if !defined(OPUS_HAVE_RTCD) #define OVERRIDE_OPUS_FFT (1) #define opus_fft_alloc_arch(_st, arch) \ - ((void)(arch), opus_fft_alloc_arm_float_neon(_st)) + ((void)(arch), opus_fft_alloc_arm_neon(_st)) #define opus_fft_free_arch(_st, arch) \ - ((void)(arch), opus_fft_free_arm_float_neon(_st)) + ((void)(arch), opus_fft_free_arm_neon(_st)) #define opus_fft(_st, _fin, _fout, arch) \ - ((void)(arch), opus_fft_float_neon(_st, _fin, _fout)) + ((void)(arch), opus_fft_neon(_st, _fin, _fout)) #define opus_ifft(_st, _fin, _fout, arch) \ - ((void)(arch), opus_ifft_float_neon(_st, _fin, _fout)) + ((void)(arch), opus_ifft_neon(_st, _fin, _fout)) #endif /* OPUS_HAVE_RTCD */ #endif /* HAVE_ARM_NE10 */ -#endif /* FIXED_POINT */ #endif -- cgit v1.2.3