diff options
31 files changed, 1492 insertions, 143 deletions
diff --git a/Makefile.am b/Makefile.am index 4094e33e..4d3a8880 100644 --- a/Makefile.am +++ b/Makefile.am @@ -10,7 +10,7 @@ lib_LTLIBRARIES = libopus.la DIST_SUBDIRS = doc AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk \ - -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed + -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS) include celt_sources.mk include silk_sources.mk @@ -51,6 +51,10 @@ if OPUS_ARM_NEON_INTR CELT_SOURCES += $(CELT_SOURCES_ARM_NEON_INTR) endif +if HAVE_ARM_NE10 +CELT_SOURCES += $(CELT_SOURCES_ARM_NE10) +endif + if OPUS_ARM_EXTERNAL_ASM noinst_LTLIBRARIES = libarmasm.la libarmasm_la_SOURCES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S) @@ -69,7 +73,7 @@ include opus_headers.mk libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(OPUS_SOURCES) libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@ -libopus_la_LIBADD = $(LIBM) +libopus_la_LIBADD = $(NE10_LIBS) $(LIBM) if OPUS_ARM_EXTERNAL_ASM libopus_la_LIBADD += libarmasm.la endif @@ -85,32 +89,35 @@ TESTS = celt/tests/test_unit_types celt/tests/test_unit_mathops celt/tests/test_ opus_demo_SOURCES = src/opus_demo.c -opus_demo_LDADD = libopus.la $(LIBM) +opus_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM) repacketizer_demo_SOURCES = src/repacketizer_demo.c -repacketizer_demo_LDADD = libopus.la $(LIBM) +repacketizer_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM) opus_compare_SOURCES = src/opus_compare.c opus_compare_LDADD = $(LIBM) tests_test_opus_api_SOURCES = tests/test_opus_api.c tests/test_opus_common.h -tests_test_opus_api_LDADD = libopus.la $(LIBM) +tests_test_opus_api_LDADD = libopus.la $(NE10_LIBS) $(LIBM) tests_test_opus_encode_SOURCES = tests/test_opus_encode.c tests/test_opus_common.h -tests_test_opus_encode_LDADD = libopus.la $(LIBM) +tests_test_opus_encode_LDADD = libopus.la $(NE10_LIBS) $(LIBM) tests_test_opus_decode_SOURCES = tests/test_opus_decode.c tests/test_opus_common.h -tests_test_opus_decode_LDADD = libopus.la $(LIBM) +tests_test_opus_decode_LDADD = libopus.la $(NE10_LIBS) $(LIBM) tests_test_opus_padding_SOURCES = tests/test_opus_padding.c tests/test_opus_common.h -tests_test_opus_padding_LDADD = libopus.la $(LIBM) +tests_test_opus_padding_LDADD = libopus.la $(NE10_LIBS) $(LIBM) celt_tests_test_unit_cwrs32_SOURCES = celt/tests/test_unit_cwrs32.c celt_tests_test_unit_cwrs32_LDADD = $(LIBM) celt_tests_test_unit_dft_SOURCES = celt/tests/test_unit_dft.c -celt_tests_test_unit_dft_LDADD = $(LIBM) +celt_tests_test_unit_dft_LDADD = $(NE10_LIBS) $(LIBM) +if OPUS_ARM_EXTERNAL_ASM +celt_tests_test_unit_dft_LDADD += libarmasm.la +endif celt_tests_test_unit_entropy_SOURCES = celt/tests/test_unit_entropy.c celt_tests_test_unit_entropy_LDADD = $(LIBM) @@ -119,16 +126,19 @@ celt_tests_test_unit_laplace_SOURCES = celt/tests/test_unit_laplace.c celt_tests_test_unit_laplace_LDADD = $(LIBM) celt_tests_test_unit_mathops_SOURCES = celt/tests/test_unit_mathops.c -celt_tests_test_unit_mathops_LDADD = $(LIBM) +celt_tests_test_unit_mathops_LDADD = $(NE10_LIBS) $(LIBM) if OPUS_ARM_EXTERNAL_ASM celt_tests_test_unit_mathops_LDADD += libarmasm.la endif celt_tests_test_unit_mdct_SOURCES = celt/tests/test_unit_mdct.c -celt_tests_test_unit_mdct_LDADD = $(LIBM) +celt_tests_test_unit_mdct_LDADD = $(NE10_LIBS) $(LIBM) +if OPUS_ARM_EXTERNAL_ASM +celt_tests_test_unit_mdct_LDADD += libarmasm.la +endif celt_tests_test_unit_rotation_SOURCES = celt/tests/test_unit_rotation.c -celt_tests_test_unit_rotation_LDADD = $(LIBM) +celt_tests_test_unit_rotation_LDADD = $(NE10_LIBS) $(LIBM) if OPUS_ARM_EXTERNAL_ASM celt_tests_test_unit_rotation_LDADD += libarmasm.la endif @@ -286,5 +296,6 @@ endif if OPUS_ARM_NEON_INTR CELT_ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) -$(CELT_ARM_NEON_INTR_OBJ) $(OPT_UNIT_TEST_OBJ): CFLAGS += $(OPUS_ARM_NEON_INTR_CFLAGS) +$(CELT_ARM_NEON_INTR_OBJ) $(OPT_UNIT_TEST_OBJ): CFLAGS += \ + $(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS) endif diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c index 68c224df..87ba3b3e 100644 --- a/celt/arm/arm_celt_map.c +++ b/celt/arm/arm_celt_map.c @@ -30,6 +30,8 @@ #endif #include "pitch.h" +#include "kiss_fft.h" +#include "mdct.h" #if defined(OPUS_HAVE_RTCD) @@ -42,7 +44,7 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ }; # else /* !FIXED_POINT */ -# if defined(OPUS_ARM_NEON_INTR) +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *, opus_val32 *, int, int) = { celt_pitch_xcorr_c, /* ARMv4 */ @@ -50,7 +52,46 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, celt_pitch_xcorr_c, /* Media */ celt_pitch_xcorr_float_neon /* Neon */ }; -# endif -# endif -#endif +# if defined(HAVE_ARM_NE10) +# if defined(CUSTOM_MODES) +int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { + opus_fft_alloc_arch_c, /* ARMv4 */ + opus_fft_alloc_arch_c, /* EDSP */ + opus_fft_alloc_arch_c, /* Media */ + opus_fft_alloc_arm_float_neon /* Neon with NE10 library support */ +}; + +void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { + opus_fft_free_arch_c, /* ARMv4 */ + opus_fft_free_arch_c, /* EDSP */ + opus_fft_free_arch_c, /* Media */ + opus_fft_free_arm_float_neon /* Neon with NE10 */ +}; +# endif /* CUSTOM_MODES */ + +void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) = { + opus_fft_c, /* ARMv4 */ + opus_fft_c, /* EDSP */ + opus_fft_c, /* Media */ + opus_fft_float_neon /* Neon with NE10 */ +}; + +void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, + kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, + int overlap, int shift, + int stride, int arch) = { + clt_mdct_forward_c, /* ARMv4 */ + clt_mdct_forward_c, /* EDSP */ + clt_mdct_forward_c, /* Media */ + clt_mdct_forward_float_neon /* Neon with NE10 */ +}; +# endif /* HAVE_ARM_NE10 */ +# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */ +# endif /* FIXED_POINT */ + +#endif /* OPUS_HAVE_RTCD */ diff --git a/celt/arm/celt_ne10_fft.c b/celt/arm/celt_ne10_fft.c new file mode 100644 index 00000000..fc4b0da0 --- /dev/null +++ b/celt/arm/celt_ne10_fft.c @@ -0,0 +1,121 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file celt_ne10_fft.c + @brief ARM Neon optimizations for fft using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SKIP_CONFIG_H +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#endif + +#include <NE10_init.h> +#include <NE10_dsp.h> +#include "os_support.h" +#include "kiss_fft.h" +#include "stack_alloc.h" + +#if !defined(FIXED_POINT) +# if defined(CUSTOM_MODES) + +/* nfft lengths in NE10 that support scaled fft */ +#define NE10_FFTSCALED_SUPPORT_MAX 4 +static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = { + 480, 240, 120, 60 +}; + +int opus_fft_alloc_arm_float_neon(kiss_fft_state *st) +{ + int i; + size_t memneeded = sizeof(struct arch_fft_state); + + st->arch_fft = (arch_fft_state *)opus_alloc(memneeded); + if (!st->arch_fft) + return -1; + + for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) { + if(st->nfft == ne10_fft_scaled_support[i]) + break; + } + if (i == NE10_FFTSCALED_SUPPORT_MAX) { + /* This nfft length (scaled fft) is not supported in NE10 */ + st->arch_fft->is_supported = 0; + st->arch_fft->priv = NULL; + } + else { + st->arch_fft->is_supported = 1; + st->arch_fft->priv = (void *)ne10_fft_alloc_c2c_float32_neon(st->nfft); + if (st->arch_fft->priv == NULL) { + return -1; + } + } + return 0; +} + +void opus_fft_free_arm_float_neon(kiss_fft_state *st) +{ + ne10_fft_cfg_float32_t cfg; + + if (!st->arch_fft) + return; + + cfg = (ne10_fft_cfg_float32_t)st->arch_fft->priv; + if (cfg) + ne10_fft_destroy_c2c_float32(cfg); + opus_free(st->arch_fft); +} +# endif + +void opus_fft_float_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout) +{ + ne10_fft_state_float32_t state; + ne10_fft_cfg_float32_t cfg = &state; + VARDECL(ne10_fft_cpx_float32_t, buffer); + SAVE_STACK; + ALLOC(buffer, st->nfft, ne10_fft_cpx_float32_t); + + if (!st->arch_fft->is_supported) { + /* This nfft length (scaled fft) not supported in NE10 */ + opus_fft_c(st, fin, fout); + } + else { + memcpy((void *)cfg, st->arch_fft->priv, sizeof(ne10_fft_state_float32_t)); + state.buffer = (ne10_fft_cpx_float32_t *)&buffer[0]; + state.is_forward_scaled = 1; + + ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout, + (ne10_fft_cpx_float32_t *)fin, + cfg, 0); + } + RESTORE_STACK; +} +#endif /* !defined(FIXED_POINT) */ diff --git a/celt/arm/celt_ne10_mdct.c b/celt/arm/celt_ne10_mdct.c new file mode 100644 index 00000000..1c6e9158 --- /dev/null +++ b/celt/arm/celt_ne10_mdct.c @@ -0,0 +1,159 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file celt_ne10_mdct.c + @brief ARM Neon optimizations for mdct using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SKIP_CONFIG_H +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#endif + +#include "kiss_fft.h" +#include "_kiss_fft_guts.h" +#include "mdct.h" +#include "stack_alloc.h" + +#if !defined(FIXED_POINT) + +void clt_mdct_forward_float_neon(const mdct_lookup *l, + kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, + int overlap, int shift, int stride, int arch) +{ + int i; + int N, N2, N4; + VARDECL(kiss_fft_scalar, f); + VARDECL(kiss_fft_cpx, f2); + const kiss_fft_state *st = l->kfft[shift]; + const kiss_twiddle_scalar *trig; + + SAVE_STACK; + + N = l->n; + trig = l->trig; + for (i=0;i<shift;i++) + { + N >>= 1; + trig += N; + } + N2 = N>>1; + N4 = N>>2; + + ALLOC(f, N2, kiss_fft_scalar); + ALLOC(f2, N4, kiss_fft_cpx); + + /* Consider the input to be composed of four blocks: [a, b, c, d] */ + /* Window, shuffle, fold */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); + const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; + for(i=0;i<((overlap+3)>>2);i++) + { + /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ + *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); + *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + } + wp1 = window; + wp2 = window+overlap-1; + for(;i<N4-((overlap+3)>>2);i++) + { + /* Real part arranged as a-bR, Imag part arranged as -c-dR */ + *yp++ = *xp2; + *yp++ = *xp1; + xp1+=2; + xp2-=2; + } + for(;i<N4;i++) + { + /* Real part arranged as a-bR, Imag part arranged as -c-dR */ + *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2); + *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + } + } + /* Pre-rotation */ + { + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const kiss_twiddle_scalar *t = &trig[0]; + for(i=0;i<N4;i++) + { + kiss_fft_cpx yc; + kiss_twiddle_scalar t0, t1; + kiss_fft_scalar re, im, yr, yi; + t0 = t[i]; + t1 = t[N4+i]; + re = *yp++; + im = *yp++; + yr = S_MUL(re,t0) - S_MUL(im,t1); + yi = S_MUL(im,t0) + S_MUL(re,t1); + yc.r = yr; + yc.i = yi; + f2[i] = yc; + } + } + + opus_fft(st, f2, (kiss_fft_cpx *)f, arch); + + /* Post-rotate */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; + kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); + const kiss_twiddle_scalar *t = &trig[0]; + /* Temp pointers to make it really clear to the compiler what we're doing */ + for(i=0;i<N4;i++) + { + kiss_fft_scalar yr, yi; + yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]); + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); + *yp1 = yr; + *yp2 = yi; + fp++; + yp1 += 2*stride; + yp2 -= 2*stride; + } + } + RESTORE_STACK; +} +#endif /* !defined(FIXED_POINT) */ diff --git a/celt/arm/celt_neon_intr.c b/celt/arm/celt_neon_intr.c index 4a674133..47dce15b 100644 --- a/celt/arm/celt_neon_intr.c +++ b/celt/arm/celt_neon_intr.c @@ -29,9 +29,15 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #include <arm_neon.h> #include "../pitch.h" +#if !defined(FIXED_POINT) /* * Function: xcorr_kernel_neon_float * --------------------------------- @@ -243,3 +249,4 @@ void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, (const float32_t *)_y+i, (float32_t *)xcorr+i, len); } } +#endif diff --git a/celt/arm/fft_arm.h b/celt/arm/fft_arm.h new file mode 100644 index 00000000..e7a30d69 --- /dev/null +++ b/celt/arm/fft_arm.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file fft_arm.h + @brief ARM Neon Intrinsic optimizations for fft using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#if !defined(FFT_ARM_H) +#define FFT_ARM_H + +#include "config.h" +#include "kiss_fft.h" + +#if !defined(FIXED_POINT) +#if defined(HAVE_ARM_NE10) + +int opus_fft_alloc_arm_float_neon(kiss_fft_state *st); +void opus_fft_free_arm_float_neon(kiss_fft_state *st); + +void opus_fft_float_neon(const kiss_fft_state *st, + const kiss_fft_cpx *fin, + kiss_fft_cpx *fout); +#if !defined(OPUS_HAVE_RTCD) +#define OVERRIDE_OPUS_FFT (1) + +#define opus_fft_alloc_arch(_st, arch) \ + ((void)(arch), opus_fft_alloc_arm_float_neon(_st)) + +#define opus_fft_free_arch(_st, arch) \ + ((void)(arch), opus_fft_free_arm_float_neon(_st)) + +#define opus_fft(_st, _fin, _fout, arch) \ + ((void)(arch), opus_fft_float_neon(_st, _fin, _fout)) + +#endif /* OPUS_HAVE_RTCD */ + +#endif /* HAVE_ARM_NE10 */ +#endif /* FIXED_POINT */ + +#endif diff --git a/celt/arm/mdct_arm.h b/celt/arm/mdct_arm.h new file mode 100644 index 00000000..7d60fedc --- /dev/null +++ b/celt/arm/mdct_arm.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/** + @file arm_mdct.h + @brief ARM Neon Intrinsic optimizations for mdct using NE10 library + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(MDCT_ARM_H) +#define MDCT_ARM_H + +#include "config.h" +#include "mdct.h" + +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) +/** Compute a forward MDCT and scale by 4/N, trashes the input array */ +void clt_mdct_forward_float_neon(const mdct_lookup *l, kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, + int shift, int stride, int arch); + +#if !defined(OPUS_HAVE_RTCD) +#define OVERRIDE_OPUS_MDCT (1) +#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \ + clt_mdct_forward_float_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch) +#endif /* OPUS_HAVE_RTCD */ +#endif /* !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) */ + +#endif diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 07e70711..af7d18b3 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -414,7 +414,8 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, /** Apply window and compute the MDCT for all sub-frames and all channels in a frame */ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, - celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample) + celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample, + int arch) { const int overlap = mode->overlap; int N; @@ -435,7 +436,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS for (b=0;b<B;b++) { /* Interleaving the sub-frames while doing the MDCTs */ - clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B); + clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, + &out[b+c*N*B], mode->window, overlap, shift, B, + arch); } } while (++c<CC); if (CC==2&&C==1) @@ -1603,14 +1606,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(bandLogE2, C*nbEBands, opus_val16); if (secondMdct) { - compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample); + compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch); compute_band_energies(mode, freq, bandE, effEnd, C, LM); amp2Log2(mode, effEnd, end, bandE, bandLogE2, C); for (i=0;i<C*nbEBands;i++) bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); } - compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample); + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); if (CC==2&&C==1) tf_chan = 0; compute_band_energies(mode, freq, bandE, effEnd, C, LM); @@ -1736,7 +1739,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { isTransient = 1; shortBlocks = M; - compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample); + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); compute_band_energies(mode, freq, bandE, effEnd, C, LM); amp2Log2(mode, effEnd, end, bandE, bandLogE, C); /* Compensate for the scaling of short vs long mdcts */ diff --git a/celt/dump_modes/Makefile b/celt/dump_modes/Makefile index 763cb303..93f599fb 100644 --- a/celt/dump_modes/Makefile +++ b/celt/dump_modes/Makefile @@ -1,10 +1,32 @@ + CFLAGS=-O2 -Wall -Wextra -DHAVE_CONFIG_H INCLUDES=-I. -I../ -I../.. -I../../include +SOURCES = dump_modes.c \ + ../modes.c \ + ../cwrs.c \ + ../rate.c \ + ../entcode.c \ + ../entenc.c \ + ../entdec.c \ + ../mathops.c \ + ../mdct.c \ + ../kiss_fft.c + +ifdef HAVE_ARM_NE10 +CC = gcc +CFLAGS += -mfpu=neon +INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_PRESUME_NEON_INTR +LIBS = -L$(NE10_LIBDIR) -lNE10 +SOURCES += ../arm/celt_ne10_fft.c \ + dump_modes_arm_ne10.c \ + ../arm/armcpu.c +endif + all: dump_modes dump_modes: - $(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES dump_modes.c ../modes.c ../cwrs.c ../rate.c ../entcode.c ../entenc.c ../entdec.c ../mathops.c ../mdct.c ../kiss_fft.c -o dump_modes -lm + $(PREFIX)$(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES $(SOURCES) -o $@ $(LIBS) -lm clean: rm -f dump_modes diff --git a/celt/dump_modes/dump_modes.c b/celt/dump_modes/dump_modes.c index ae6a8c15..9105a534 100644 --- a/celt/dump_modes/dump_modes.c +++ b/celt/dump_modes/dump_modes.c @@ -35,6 +35,7 @@ #include "modes.h" #include "celt.h" #include "rate.h" +#include "dump_modes_arch.h" #define INT16 "%d" #define INT32 "%d" @@ -62,6 +63,10 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes) fprintf(file, "\n It contains static definitions for some pre-defined modes. */\n"); fprintf(file, "#include \"modes.h\"\n"); fprintf(file, "#include \"rate.h\"\n"); + fprintf(file, "\n#ifdef HAVE_ARM_NE10\n"); + fprintf(file, "#define OVERRIDE_FFT 1\n"); + fprintf(file, "#include \"%s\"\n", ARM_NE10_ARCH_FILE_NAME); + fprintf(file, "#endif\n"); fprintf(file, "\n"); @@ -149,6 +154,9 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes) fprintf (file, "{" WORD16 ", " WORD16 "},%c", mode->mdct.kfft[0]->twiddles[j].r, mode->mdct.kfft[0]->twiddles[j].i,(j+3)%2==0?'\n':' '); fprintf (file, "};\n"); +#ifdef OVERRIDE_FFT + dump_mode_arch(mode); +#endif /* FFT Bitrev tables */ for (k=0;k<=mode->mdct.maxshift;k++) { @@ -183,6 +191,13 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes) fprintf (file, "}, /* factors */\n"); fprintf (file, "fft_bitrev%d, /* bitrev */\n", mode->mdct.kfft[k]->nfft); fprintf (file, "fft_twiddles%d_%d, /* bitrev */\n", mode->Fs, mdctSize); + + fprintf (file, "#ifdef OVERRIDE_FFT\n"); + fprintf (file, "(arch_fft_state *)&cfg_arch_%d,\n", mode->mdct.kfft[k]->nfft); + fprintf (file, "#else\n"); + fprintf (file, "NULL,\n"); + fprintf(file, "#endif\n"); + fprintf (file, "};\n"); fprintf(file, "#endif\n"); @@ -323,8 +338,14 @@ int main(int argc, char **argv) } } file = fopen(BASENAME ".h", "w"); +#ifdef OVERRIDE_FFT + dump_modes_arch_init(m, nb); +#endif dump_modes(file, m, nb); fclose(file); +#ifdef OVERRIDE_FFT + dump_modes_arch_finalize(); +#endif for (i=0;i<nb;i++) opus_custom_mode_destroy(m[i]); free(m); diff --git a/celt/dump_modes/dump_modes_arch.h b/celt/dump_modes/dump_modes_arch.h new file mode 100644 index 00000000..1436926e --- /dev/null +++ b/celt/dump_modes/dump_modes_arch.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef DUMP_MODE_ARCH_H +#define DUMP_MODE_ARCH_H + +void dump_modes_arch_init(); +void dump_mode_arch(CELTMode *mode); +void dump_modes_arch_finalize(); + +#define ARM_NE10_ARCH_FILE_NAME "static_modes_float_arm_ne10.h" + +#if defined(HAVE_ARM_NE10) +#define OVERRIDE_FFT (1) +#endif + +#endif diff --git a/celt/dump_modes/dump_modes_arm_ne10.c b/celt/dump_modes/dump_modes_arm_ne10.c new file mode 100644 index 00000000..d37e7ada --- /dev/null +++ b/celt/dump_modes/dump_modes_arm_ne10.c @@ -0,0 +1,131 @@ +/* Copyright (c) 2015 Xiph.Org Foundation + Written by Viswanath Puttagunta */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if defined(HAVE_CONFIG_H) +# include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include "modes.h" +#include "dump_modes_arch.h" +#include <NE10_dsp.h> + +static FILE *file; + +void dump_modes_arch_init(CELTMode **modes, int nb_modes) +{ + int i; + + file = fopen(ARM_NE10_ARCH_FILE_NAME, "w"); + fprintf(file, "/* The contents of this file was automatically generated by\n"); + fprintf(file, " * dump_mode_arm_ne10.c with arguments:"); + for (i=0;i<nb_modes;i++) + { + CELTMode *mode = modes[i]; + fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts); + } + fprintf(file, "\n * It contains static definitions for some pre-defined modes. */\n"); + fprintf(file, "#include <NE10_init.h>\n\n"); +} + +void dump_modes_arch_finalize() +{ + fclose(file); +} + +void dump_mode_arch(CELTMode *mode) +{ + int k, j; + int mdctSize; + + mdctSize = mode->shortMdctSize*mode->nbShortMdcts; + + fprintf(file, "#ifndef NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize); + fprintf(file, "#define NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize); + /* cfg->factors */ + for(k=0;k<=mode->mdct.maxshift;k++) { + ne10_fft_cfg_float32_t cfg; + cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv; + if (!cfg) + continue; + fprintf(file, "static const ne10_int32_t ne10_factors_%d[%d] = {\n", + mode->mdct.kfft[k]->nfft, (NE10_MAXFACTORS * 2)); + for(j=0;j<(NE10_MAXFACTORS * 2);j++) { + fprintf(file, "%d,%c", cfg->factors[j],(j+16)%15==0?'\n':' '); + } + fprintf (file, "};\n"); + } + + /* cfg->twiddles */ + for(k=0;k<=mode->mdct.maxshift;k++) { + ne10_fft_cfg_float32_t cfg; + cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv; + if (!cfg) + continue; + fprintf(file, "static const ne10_fft_cpx_float32_t ne10_twiddles_%d[%d] = {\n", + mode->mdct.kfft[k]->nfft, mode->mdct.kfft[k]->nfft); + for(j=0;j<mode->mdct.kfft[k]->nfft;j++) { + fprintf(file, "{%#0.8gf,%#0.8gf},%c", cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' '); + } + fprintf (file, "};\n"); + } + + for(k=0;k<=mode->mdct.maxshift;k++) { + ne10_fft_cfg_float32_t cfg; + cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv; + if (!cfg) { + fprintf(file, "/* Ne10 does not support scaled FFT for length = %d */\n", + mode->mdct.kfft[k]->nfft); + fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n", mode->mdct.kfft[k]->nfft); + fprintf(file, "0,\n"); + fprintf(file, "NULL\n"); + fprintf(file, "};\n"); + continue; + } + fprintf(file, "static const ne10_fft_state_float32_t ne10_fft_state_float32_%d = {\n", + mode->mdct.kfft[k]->nfft); + fprintf(file, "%d,\n", cfg->nfft); + fprintf(file, "(ne10_int32_t *)ne10_factors_%d,\n", mode->mdct.kfft[k]->nfft); + fprintf(file, "(ne10_fft_cpx_float32_t *)ne10_twiddles_%d,\n", mode->mdct.kfft[k]->nfft); + fprintf(file, "NULL,\n"); /* buffer */ + fprintf(file, "(ne10_fft_cpx_float32_t *)&ne10_twiddles_%d[%d],\n", + mode->mdct.kfft[k]->nfft, cfg->nfft); + fprintf(file, "/* is_forward_scaled = true */\n"); + fprintf(file, "(ne10_int32_t) 1,\n"); + fprintf(file, "/* is_backward_scaled = false */\n"); + fprintf(file, "(ne10_int32_t) 0,\n"); + fprintf(file, "};\n"); + + fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n", + mode->mdct.kfft[k]->nfft); + fprintf(file, "1,\n"); + fprintf(file, "(void *)&ne10_fft_state_float32_%d,\n", mode->mdct.kfft[k]->nfft); + fprintf(file, "};\n\n"); + } + fprintf(file, "#endif /* end NE10_FFT_PARAMS%d_%d */\n", mode->Fs, mdctSize); +} diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c index cc487fcf..38fd4fb6 100644 --- a/celt/kiss_fft.c +++ b/celt/kiss_fft.c @@ -423,13 +423,19 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) #endif } +int opus_fft_alloc_arch_c(kiss_fft_state *st) { + (void)st; + return 0; +} + /* * * Allocates all necessary storage space for the fft and ifft. * The return value is a contiguous block of memory. As such, * It can be freed with free(). * */ -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base) +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, + const kiss_fft_state *base, int arch) { kiss_fft_state *st=NULL; size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/ @@ -478,22 +484,31 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co if (st->bitrev==NULL) goto fail; compute_bitrev_table(0, bitrev, 1,1, st->factors,st); + + /* Initialize architecture specific fft parameters */ + if (opus_fft_alloc_arch(st, arch)) + goto fail; } return st; fail: - opus_fft_free(st); + opus_fft_free(st, arch); return NULL; } -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem ) +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) { - return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL); + return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); +} + +void opus_fft_free_arch_c(kiss_fft_state *st) { + (void)st; } -void opus_fft_free(const kiss_fft_state *cfg) +void opus_fft_free(const kiss_fft_state *cfg, int arch) { if (cfg) { + opus_fft_free_arch((kiss_fft_state *)cfg, arch); opus_free((opus_int16*)cfg->bitrev); if (cfg->shift < 0) opus_free((kiss_twiddle_cpx*)cfg->twiddles); @@ -551,7 +566,7 @@ void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) } } -void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) { int i; opus_val16 scale; diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h index 390b54d9..99f0e500 100644 --- a/celt/kiss_fft.h +++ b/celt/kiss_fft.h @@ -32,6 +32,7 @@ #include <stdlib.h> #include <math.h> #include "arch.h" +#include "cpu_support.h" #ifdef __cplusplus extern "C" { @@ -77,6 +78,11 @@ typedef struct { 4*4*4*2 */ +typedef struct arch_fft_state{ + int is_supported; + void *priv; +} arch_fft_state; + typedef struct kiss_fft_state{ int nfft; opus_val16 scale; @@ -87,8 +93,15 @@ typedef struct kiss_fft_state{ opus_int16 factors[2*MAXFACTORS]; const opus_int16 *bitrev; const kiss_twiddle_cpx *twiddles; +#ifndef FIXED_POINT + arch_fft_state *arch_fft; +#endif } kiss_fft_state; +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) +#include "arm/fft_arm.h" +#endif + /*typedef struct kiss_fft_state* kiss_fft_cfg;*/ /** @@ -114,9 +127,9 @@ typedef struct kiss_fft_state{ * buffer size in *lenmem. * */ -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base); +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch); -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch); /** * opus_fft(cfg,in_out_buf) @@ -128,13 +141,49 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); * Note that each element is complex and can be accessed like f[k].r and f[k].i * */ -void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); -void opus_fft_free(const kiss_fft_state *cfg); +void opus_fft_free(const kiss_fft_state *cfg, int arch); + + +void opus_fft_free_arch_c(kiss_fft_state *st); +int opus_fft_alloc_arch_c(kiss_fft_state *st); + +#if !defined(OVERRIDE_OPUS_FFT) +/* Is run-time CPU detection enabled on this platform? */ +#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) + +extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])( + kiss_fft_state *st); + +#define opus_fft_alloc_arch(_st, arch) \ + ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) + +extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])( + kiss_fft_state *st); +#define opus_fft_free_arch(_st, arch) \ + ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) + +extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, + const kiss_fft_cpx *fin, kiss_fft_cpx *fout); +#define opus_fft(_cfg, _fin, _fout, arch) \ + ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) +#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ + +#define opus_fft_alloc_arch(_st, arch) \ + ((void)(arch), opus_fft_alloc_arch_c(_st)) + +#define opus_fft_free_arch(_st, arch) \ + ((void)(arch), opus_fft_free_arch_c(_st)) + +#define opus_fft(_cfg, _fin, _fout, arch) \ + ((void)(arch), opus_fft_c(_cfg, _fin, _fout)) +#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ +#endif /* end if !defined(OVERRIDE_OPUS_FFT) */ #ifdef __cplusplus } diff --git a/celt/mdct.c b/celt/mdct.c index 2795d90d..ee6d80ec 100644 --- a/celt/mdct.c +++ b/celt/mdct.c @@ -60,7 +60,7 @@ #ifdef CUSTOM_MODES -int clt_mdct_init(mdct_lookup *l,int N, int maxshift) +int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch) { int i; kiss_twiddle_scalar *trig; @@ -71,9 +71,9 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift) for (i=0;i<=maxshift;i++) { if (i==0) - l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0); + l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch); else - l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]); + l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch); #ifndef ENABLE_TI_DSPLIB55 if (l->kfft[i]==NULL) return 0; @@ -104,11 +104,11 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift) return 1; } -void clt_mdct_clear(mdct_lookup *l) +void clt_mdct_clear(mdct_lookup *l, int arch) { int i; for (i=0;i<=l->maxshift;i++) - opus_fft_free(l->kfft[i]); + opus_fft_free(l->kfft[i], arch); opus_free((kiss_twiddle_scalar*)l->trig); } @@ -116,8 +116,8 @@ void clt_mdct_clear(mdct_lookup *l) /* Forward MDCT trashes the input array */ #ifndef OVERRIDE_clt_mdct_forward -void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, int shift, int stride) +void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, int shift, int stride, int arch) { int i; int N, N2, N4; @@ -132,6 +132,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar int scale_shift = st->scale_shift-1; #endif SAVE_STACK; + (void)arch; scale = st->scale; N = l->n; diff --git a/celt/mdct.h b/celt/mdct.h index d7218213..8aef9087 100644 --- a/celt/mdct.h +++ b/celt/mdct.h @@ -53,13 +53,19 @@ typedef struct { const kiss_twiddle_scalar * OPUS_RESTRICT trig; } mdct_lookup; -int clt_mdct_init(mdct_lookup *l,int N, int maxshift); -void clt_mdct_clear(mdct_lookup *l); +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) +#include "arm/mdct_arm.h" +#endif + + +int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch); +void clt_mdct_clear(mdct_lookup *l, int arch); /** Compute a forward MDCT and scale by 4/N, trashes the input array */ -void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, int shift, int stride); +void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, + int shift, int stride, int arch); /** Compute a backward MDCT (no scaling) and performs weighted overlap-add (scales implicitly by 1/2) */ @@ -67,4 +73,25 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride); +#if !defined(OVERRIDE_OPUS_MDCT) +/* Is run-time CPU detection enabled on this platform? */ +#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) + +extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])( + const mdct_lookup *l, kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window, + int overlap, int shift, int stride, int arch); + +#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \ + ((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \ + _window, _overlap, _shift, \ + _stride, _arch)) +#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ + +#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \ + clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) + +#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ +#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */ + #endif diff --git a/celt/modes.c b/celt/modes.c index 42e68e1c..911686e9 100644 --- a/celt/modes.c +++ b/celt/modes.c @@ -37,6 +37,7 @@ #include "os_support.h" #include "stack_alloc.h" #include "quant_bands.h" +#include "cpu_support.h" static const opus_int16 eband5ms[] = { /*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */ @@ -229,6 +230,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error) opus_val16 *window; opus_int16 *logN; int LM; + int arch = opus_select_arch(); ALLOC_STACK; #if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA) if (global_stack==NULL) @@ -389,7 +391,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error) compute_pulse_cache(mode, mode->maxLM); if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts, - mode->maxLM) == 0) + mode->maxLM, arch) == 0) goto failure; if (error) @@ -408,6 +410,8 @@ failure: #ifdef CUSTOM_MODES void opus_custom_mode_destroy(CELTMode *mode) { + int arch = opus_select_arch(); + if (mode == NULL) return; #ifndef CUSTOM_MODES_ONLY @@ -431,7 +435,7 @@ void opus_custom_mode_destroy(CELTMode *mode) opus_free((opus_int16*)mode->cache.index); opus_free((unsigned char*)mode->cache.bits); opus_free((unsigned char*)mode->cache.caps); - clt_mdct_clear(&mode->mdct); + clt_mdct_clear(&mode->mdct, arch); opus_free((CELTMode *)mode); } diff --git a/celt/pitch.h b/celt/pitch.h index af745eba..65a77a6e 100644 --- a/celt/pitch.h +++ b/celt/pitch.h @@ -47,7 +47,7 @@ #endif #if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \ - || defined(OPUS_ARM_NEON_INTR)) + || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) # include "arm/pitch_arm.h" #endif @@ -188,8 +188,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, #if !defined(OVERRIDE_PITCH_XCORR) /*Is run-time CPU detection enabled on this platform?*/ -# if defined(OPUS_HAVE_RTCD) && \ - (defined(OPUS_ARM_ASM) || (defined(OPUS_ARM_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))) +# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \ + || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \ + && !defined(OPUS_ARM_PRESUME_NEON_INTR))) extern # if defined(FIXED_POINT) opus_val32 diff --git a/celt/static_modes_float.h b/celt/static_modes_float.h index 2fadb625..e102a383 100644 --- a/celt/static_modes_float.h +++ b/celt/static_modes_float.h @@ -4,6 +4,11 @@ #include "modes.h" #include "rate.h" +#ifdef HAVE_ARM_NE10 +#define OVERRIDE_FFT 1 +#include "static_modes_float_arm_ne10.h" +#endif + #ifndef DEF_WINDOW120 #define DEF_WINDOW120 static const opus_val16 window120[120] = { @@ -431,6 +436,11 @@ static const kiss_fft_state fft_state48000_960_0 = { {5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev480, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_480, +#else +NULL, +#endif }; #endif @@ -443,6 +453,11 @@ static const kiss_fft_state fft_state48000_960_1 = { {5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev240, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_240, +#else +NULL, +#endif }; #endif @@ -455,6 +470,11 @@ static const kiss_fft_state fft_state48000_960_2 = { {5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev120, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_120, +#else +NULL, +#endif }; #endif @@ -467,6 +487,11 @@ static const kiss_fft_state fft_state48000_960_3 = { {5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev60, /* bitrev */ fft_twiddles48000_960, /* bitrev */ +#ifdef OVERRIDE_FFT +(arch_fft_state *)&cfg_arch_60, +#else +NULL, +#endif }; #endif diff --git a/celt/static_modes_float_arm_ne10.h b/celt/static_modes_float_arm_ne10.h new file mode 100644 index 00000000..5bcec707 --- /dev/null +++ b/celt/static_modes_float_arm_ne10.h @@ -0,0 +1,404 @@ +/* The contents of this file was automatically generated by + * dump_mode_arm_ne10.c with arguments: 48000 960 + * It contains static definitions for some pre-defined modes. */ +#include <NE10_init.h> + +#ifndef NE10_FFT_PARAMS48000_960 +#define NE10_FFT_PARAMS48000_960 +static const ne10_int32_t ne10_factors_480[64] = { +4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_240[64] = { +3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_120[64] = { +3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_int32_t ne10_factors_60[64] = { +2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, }; +static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f}, +{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f}, +{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f}, +{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f}, +{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f}, +{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f}, +{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f}, +{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f}, +{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f}, +{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f}, +{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f}, +{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f}, +{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f}, +{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f}, +{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f}, +{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f}, +{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f}, +{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f}, +{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f}, +{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f}, +{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f}, +{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f}, +{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f}, +{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f}, +{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f}, +{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f}, +{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f}, +{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f}, +{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f}, +{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f}, +{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f}, +{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f}, +{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f}, +{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f}, +{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f}, +{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f}, +{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f}, +{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f}, +{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f}, +{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f}, +{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f}, +{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f}, +{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f}, +{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f}, +{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f}, +{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f}, +{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f}, +{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f}, +{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f}, +{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f}, +{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f}, +{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f}, +{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f}, +{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f}, +{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f}, +{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f}, +{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f}, +{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f}, +{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f}, +{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f}, +{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f}, +{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f}, +{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f}, +{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f}, +{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f}, +{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f}, +{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f}, +{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f}, +{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f}, +{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f}, +{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f}, +{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f}, +{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f}, +{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f}, +{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f}, +{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f}, +{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f}, +{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f}, +{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f}, +{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f}, +{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f}, +{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f}, +{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f}, +{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f}, +{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f}, +{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f}, +{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f}, +{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f}, +{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f}, +{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f}, +{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f}, +{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f}, +{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f}, +{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f}, +{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f}, +{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f}, +{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f}, +{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f}, +{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f}, +{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f}, +{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f}, +{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f}, +{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f}, +{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f}, +{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f}, +{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f}, +{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f}, +{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f}, +{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f}, +{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f}, +{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f}, +{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f}, +{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f}, +{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f}, +{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f}, +{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f}, +{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f}, +{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f}, +{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f}, +{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f}, +{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f}, +{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f}, +{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f}, +{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f}, +{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f}, +{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f}, +{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f}, +{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f}, +{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f}, +{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f}, +{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f}, +{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f}, +{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f}, +{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f}, +{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f}, +}; +static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f}, +{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f}, +{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f}, +{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f}, +{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f}, +{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f}, +{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f}, +{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f}, +{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f}, +{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f}, +{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f}, +{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f}, +{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f}, +{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f}, +{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f}, +{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f}, +{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f}, +{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f}, +{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f}, +{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f}, +{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f}, +{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f}, +{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f}, +{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f}, +{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f}, +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, +{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f}, +{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f}, +{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f}, +{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f}, +{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f}, +{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f}, +{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f}, +{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f}, +{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f}, +{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f}, +{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f}, +{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f}, +{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f}, +{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f}, +{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f}, +{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f}, +{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f}, +{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f}, +{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f}, +{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f}, +{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f}, +{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f}, +{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f}, +{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f}, +{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f}, +{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f}, +{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f}, +{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f}, +{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f}, +{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f}, +}; +static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f}, +{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f}, +{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f}, +{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f}, +{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f}, +{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f}, +{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f}, +{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f}, +{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f}, +{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f}, +{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f}, +{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f}, +{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f}, +{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f}, +{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f}, +}; +static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = { +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, +{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f}, +{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f}, +{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f}, +{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f}, +{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f}, +}; +static const ne10_fft_state_float32_t ne10_fft_state_float32_480 = { +120, +(ne10_int32_t *)ne10_factors_480, +(ne10_fft_cpx_float32_t *)ne10_twiddles_480, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_480 = { +1, +(void *)&ne10_fft_state_float32_480, +}; + +static const ne10_fft_state_float32_t ne10_fft_state_float32_240 = { +60, +(ne10_int32_t *)ne10_factors_240, +(ne10_fft_cpx_float32_t *)ne10_twiddles_240, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_240 = { +1, +(void *)&ne10_fft_state_float32_240, +}; + +static const ne10_fft_state_float32_t ne10_fft_state_float32_120 = { +30, +(ne10_int32_t *)ne10_factors_120, +(ne10_fft_cpx_float32_t *)ne10_twiddles_120, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_120 = { +1, +(void *)&ne10_fft_state_float32_120, +}; + +static const ne10_fft_state_float32_t ne10_fft_state_float32_60 = { +15, +(ne10_int32_t *)ne10_factors_60, +(ne10_fft_cpx_float32_t *)ne10_twiddles_60, +NULL, +(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15], +/* is_forward_scaled = true */ +(ne10_int32_t) 1, +/* is_backward_scaled = false */ +(ne10_int32_t) 0, +}; +static const arch_fft_state cfg_arch_60 = { +1, +(void *)&ne10_fft_state_float32_60, +}; + +#endif /* end NE10_FFT_PARAMS48000_960 */ diff --git a/celt/tests/test_unit_dft.c b/celt/tests/test_unit_dft.c index 57db0e3a..5fb8bcb4 100644 --- a/celt/tests/test_unit_dft.c +++ b/celt/tests/test_unit_dft.c @@ -45,6 +45,23 @@ #include "mathops.c" #include "entcode.c" +#if defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) +# include "x86/x86cpu.c" +#elif defined(OPUS_HAVE_RTCD) && \ + (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) +# include "arm/armcpu.c" +# include "celt_lpc.c" +# include "pitch.c" +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/celt_neon_intr.c" +# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) +# include "mdct.c" +# include "arm/celt_ne10_fft.c" +# include "arm/celt_ne10_mdct.c" +# endif +# endif +# include "arm/arm_celt_map.c" +#endif #ifndef M_PI #define M_PI 3.141592653 @@ -93,13 +110,13 @@ void check(kiss_fft_cpx * in,kiss_fft_cpx * out,int nfft,int isinverse) } } -void test1d(int nfft,int isinverse) +void test1d(int nfft,int isinverse,int arch) { size_t buflen = sizeof(kiss_fft_cpx)*nfft; kiss_fft_cpx * in = (kiss_fft_cpx*)malloc(buflen); kiss_fft_cpx * out= (kiss_fft_cpx*)malloc(buflen); - kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0); + kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0,arch); int k; for (k=0;k<nfft;++k) { @@ -125,7 +142,7 @@ void test1d(int nfft,int isinverse) if (isinverse) opus_ifft(cfg,in,out); else - opus_fft(cfg,in,out); + opus_fft(cfg,in,out, arch); /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/ @@ -139,26 +156,28 @@ void test1d(int nfft,int isinverse) int main(int argc,char ** argv) { ALLOC_STACK; + int arch = opus_select_arch(); + if (argc>1) { int k; for (k=1;k<argc;++k) { - test1d(atoi(argv[k]),0); - test1d(atoi(argv[k]),1); + test1d(atoi(argv[k]),0,arch); + test1d(atoi(argv[k]),1,arch); } }else{ - test1d(32,0); - test1d(32,1); - test1d(128,0); - test1d(128,1); - test1d(256,0); - test1d(256,1); + test1d(32,0,arch); + test1d(32,1,arch); + test1d(128,0,arch); + test1d(128,1,arch); + test1d(256,0,arch); + test1d(256,1,arch); #ifndef RADIX_TWO_ONLY - test1d(36,0); - test1d(36,1); - test1d(50,0); - test1d(50,1); - test1d(120,0); - test1d(120,1); + test1d(36,0,arch); + test1d(36,1,arch); + test1d(50,0,arch); + test1d(50,1,arch); + test1d(120,0,arch); + test1d(120,1,arch); #endif } return ret; diff --git a/celt/tests/test_unit_mathops.c b/celt/tests/test_unit_mathops.c index 2de39baf..2f43704f 100644 --- a/celt/tests/test_unit_mathops.c +++ b/celt/tests/test_unit_mathops.c @@ -52,23 +52,30 @@ #include "celt.c" #if defined(OPUS_X86_MAY_HAVE_SSE) || defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) -#if defined(OPUS_X86_MAY_HAVE_SSE) -#include "x86/pitch_sse.c" -#endif -#if defined(OPUS_X86_MAY_HAVE_SSE2) -#include "x86/pitch_sse2.c" -#endif -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) -#include "x86/pitch_sse4_1.c" -#include "x86/celt_lpc_sse.c" -#endif -#include "x86/x86_celt_map.c" -#elif ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \ - || defined(OPUS_ARM_NEON_INTR)) -#if defined(OPUS_ARM_NEON_INTR) -#include "arm/celt_neon_intr.c" -#endif -#include "arm/arm_celt_map.c" +# if defined(OPUS_X86_MAY_HAVE_SSE) +# include "x86/pitch_sse.c" +# endif +# if defined(OPUS_X86_MAY_HAVE_SSE2) +# include "x86/pitch_sse2.c" +# endif +# if defined(OPUS_X86_MAY_HAVE_SSE4_1) +# include "x86/pitch_sse4_1.c" +# include "x86/celt_lpc_sse.c" +# endif +# include "x86/x86_celt_map.c" +#elif defined(OPUS_HAVE_RTCD) && \ + (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) +# include "arm/armcpu.c" +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/celt_neon_intr.c" +# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) +# include "kiss_fft.c" +# include "mdct.c" +# include "arm/celt_ne10_fft.c" +# include "arm/celt_ne10_mdct.c" +# endif +# endif +# include "arm/arm_celt_map.c" #endif #ifdef FIXED_POINT diff --git a/celt/tests/test_unit_mdct.c b/celt/tests/test_unit_mdct.c index ac8957fd..4a524e64 100644 --- a/celt/tests/test_unit_mdct.c +++ b/celt/tests/test_unit_mdct.c @@ -46,6 +46,23 @@ #include "mathops.c" #include "entcode.c" +#if defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) +# include "x86/x86cpu.c" +#elif defined(OPUS_HAVE_RTCD) && \ + (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) +# include "arm/armcpu.c" +# include "pitch.c" +# include "celt_lpc.c" +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/celt_neon_intr.c" +# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) +# include "arm/celt_ne10_fft.c" +# include "arm/celt_ne10_mdct.c" +# endif +# endif +# include "arm/arm_celt_map.c" +#endif + #ifndef M_PI #define M_PI 3.141592653 #endif @@ -112,7 +129,7 @@ void check_inv(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinver } -void test1d(int nfft,int isinverse) +void test1d(int nfft,int isinverse,int arch) { mdct_lookup cfg; size_t buflen = sizeof(kiss_fft_scalar)*nfft; @@ -123,7 +140,7 @@ void test1d(int nfft,int isinverse) opus_val16 * window= (opus_val16*)malloc(sizeof(opus_val16)*nfft/2); int k; - clt_mdct_init(&cfg, nfft, 0); + clt_mdct_init(&cfg, nfft, 0, arch); for (k=0;k<nfft;++k) { in[k] = (rand() % 32768) - 16384; } @@ -156,7 +173,7 @@ void test1d(int nfft,int isinverse) out[nfft-k-1] = out[nfft/2+k]; check_inv(in,out,nfft,isinverse); } else { - clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1); + clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1, arch); check(in_copy,out,nfft,isinverse); } /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/ @@ -164,46 +181,48 @@ void test1d(int nfft,int isinverse) free(in); free(out); - clt_mdct_clear(&cfg); + clt_mdct_clear(&cfg, arch); } int main(int argc,char ** argv) { ALLOC_STACK; + int arch = opus_select_arch(); + if (argc>1) { int k; for (k=1;k<argc;++k) { - test1d(atoi(argv[k]),0); - test1d(atoi(argv[k]),1); + test1d(atoi(argv[k]),0,arch); + test1d(atoi(argv[k]),1,arch); } }else{ - test1d(32,0); - test1d(32,1); - test1d(256,0); - test1d(256,1); - test1d(512,0); - test1d(512,1); - test1d(1024,0); - test1d(1024,1); - test1d(2048,0); - test1d(2048,1); + test1d(32,0,arch); + test1d(32,1,arch); + test1d(256,0,arch); + test1d(256,1,arch); + test1d(512,0,arch); + test1d(512,1,arch); + test1d(1024,0,arch); + test1d(1024,1,arch); + test1d(2048,0,arch); + test1d(2048,1,arch); #ifndef RADIX_TWO_ONLY - test1d(36,0); - test1d(36,1); - test1d(40,0); - test1d(40,1); - test1d(60,0); - test1d(60,1); - test1d(120,0); - test1d(120,1); - test1d(240,0); - test1d(240,1); - test1d(480,0); - test1d(480,1); - test1d(960,0); - test1d(960,1); - test1d(1920,0); - test1d(1920,1); + test1d(36,0,arch); + test1d(36,1,arch); + test1d(40,0,arch); + test1d(40,1,arch); + test1d(60,0,arch); + test1d(60,1,arch); + test1d(120,0,arch); + test1d(120,1,arch); + test1d(240,0,arch); + test1d(240,1,arch); + test1d(480,0,arch); + test1d(480,1,arch); + test1d(960,0,arch); + test1d(960,1,arch); + test1d(1920,0,arch); + test1d(1920,1,arch); #endif } return ret; diff --git a/celt/tests/test_unit_rotation.c b/celt/tests/test_unit_rotation.c index 4780005f..932cd247 100644 --- a/celt/tests/test_unit_rotation.c +++ b/celt/tests/test_unit_rotation.c @@ -50,23 +50,30 @@ #include <math.h> #if defined(OPUS_X86_MAY_HAVE_SSE) || defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) -#if defined(OPUS_X86_MAY_HAVE_SSE) -#include "x86/pitch_sse.c" -#endif -#if defined(OPUS_X86_MAY_HAVE_SSE2) -#include "x86/pitch_sse2.c" -#endif -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) -#include "x86/pitch_sse4_1.c" -#include "x86/celt_lpc_sse.c" -#endif -#include "x86/x86_celt_map.c" -#elif ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \ - || defined(OPUS_ARM_NEON_INTR)) -#if defined(OPUS_ARM_NEON_INTR) -#include "arm/celt_neon_intr.c" -#endif -#include "arm/arm_celt_map.c" +# if defined(OPUS_X86_MAY_HAVE_SSE) +# include "x86/pitch_sse.c" +# endif +# if defined(OPUS_X86_MAY_HAVE_SSE2) +# include "x86/pitch_sse2.c" +# endif +# if defined(OPUS_X86_MAY_HAVE_SSE4_1) +# include "x86/pitch_sse4_1.c" +# include "x86/celt_lpc_sse.c" +# endif +# include "x86/x86_celt_map.c" +#elif defined(OPUS_HAVE_RTCD) && \ + (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) +# include "arm/armcpu.c" +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# include "arm/celt_neon_intr.c" +# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) +# include "kiss_fft.c" +# include "mdct.c" +# include "arm/celt_ne10_fft.c" +# include "arm/celt_ne10_mdct.c" +# endif +# endif +# include "arm/arm_celt_map.c" #endif #define MAX_SIZE 100 diff --git a/celt_headers.mk b/celt_headers.mk index d422e090..5dc9e1e1 100644 --- a/celt_headers.mk +++ b/celt_headers.mk @@ -31,12 +31,15 @@ celt/stack_alloc.h \ celt/vq.h \ celt/static_modes_float.h \ celt/static_modes_fixed.h \ +celt/static_modes_float_arm_ne10.h \ celt/arm/armcpu.h \ celt/arm/fixed_armv4.h \ celt/arm/fixed_armv5e.h \ celt/arm/kiss_fft_armv4.h \ celt/arm/kiss_fft_armv5e.h \ celt/arm/pitch_arm.h \ +celt/arm/fft_arm.h \ +celt/arm/mdct_arm.h \ celt/mips/celt_mipsr1.h \ celt/mips/fixed_generic_mipsr1.h \ celt/mips/kiss_fft_mipsr1.h \ diff --git a/celt_sources.mk b/celt_sources.mk index c92693fe..2ffe99a3 100644 --- a/celt_sources.mk +++ b/celt_sources.mk @@ -38,3 +38,7 @@ celt/arm/armopts.s.in CELT_SOURCES_ARM_NEON_INTR = \ celt/arm/celt_neon_intr.c + +CELT_SOURCES_ARM_NE10= \ +celt/arm/celt_ne10_fft.c \ +celt/arm/celt_ne10_mdct.c diff --git a/configure.ac b/configure.ac index de75c094..d94fc7e2 100644 --- a/configure.ac +++ b/configure.ac @@ -378,6 +378,80 @@ AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86 AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")]) AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])]) +AC_DEFUN([OPUS_PATH_NE10], + [ + AC_ARG_WITH(NE10, + AC_HELP_STRING([--with-NE10=PFX],[Prefix where libNE10 is installed (optional)]), + NE10_prefix="$withval", NE10_prefix="") + AC_ARG_WITH(NE10-libraries, + AC_HELP_STRING([--with-NE10-libraries=DIR], + [Directory where libNE10 library is installed (optional)]), + NE10_libraries="$withval", NE10_libraries="") + AC_ARG_WITH(NE10-includes, + AC_HELP_STRING([--with-NE10-includes=DIR], + [Directory where libNE10 header files are installed (optional)]), + NE10_includes="$withval", NE10_includes="") + + if test "x$NE10_libraries" != "x" ; then + NE10_LIBS="-L$NE10_libraries" + elif test "x$NE10_prefix" = "xno" || test "x$NE10_prefix" = "xyes" ; then + NE10_LIBS="" + elif test "x$NE10_prefix" != "x" ; then + NE10_LIBS="-L$NE10_prefix/lib" + elif test "x$prefix" != "xNONE" ; then + NE10_LIBS="-L$prefix/lib" + fi + + if test "x$NE10_prefix" != "xno" ; then + NE10_LIBS="$NE10_LIBS -lNE10" + fi + + if test "x$NE10_includes" != "x" ; then + NE10_CFLAGS="-I$NE10_includes" + elif test "x$NE10_prefix" = "xno" || test "x$NE10_prefix" = "xyes" ; then + NE10_CFLAGS="" + elif test "x$ogg_prefix" != "x" ; then + NE10_CFLAGS="-I$NE10_prefix/include" + elif test "x$prefix" != "xNONE"; then + NE10_CFLAGS="-I$prefix/include" + fi + + AC_MSG_CHECKING(for NE10) + save_CFLAGS="$CFLAGS"; CFLAGS="$NE10_CFLAGS" + save_LIBS="$LIBS"; LIBS="$NE10_LIBS $LIBM" + AC_LINK_IFELSE( + [ + AC_LANG_PROGRAM( + [[#include <NE10_init.h> + ]], + [[ + ne10_fft_cfg_float32_t cfg; + cfg = ne10_fft_alloc_c2c_float32_neon(480); + ]] + ) + ],[ + HAVE_ARM_NE10=1 + AC_MSG_RESULT([yes]) + ],[ + HAVE_ARM_NE10=0 + AC_MSG_RESULT([no]) + NE10_CFLAGS="" + NE10_LIBS="" + ] + ) + CFLAGS="$save_CFLAGS"; LIBS="$save_LIBS" + #Now we know if libNE10 is installed or not + AS_IF([test x"$HAVE_ARM_NE10" = x"1"], + [ + AC_DEFINE([HAVE_ARM_NE10], 1, [NE10 library is installed on host. Make sure it is on target!]) + AC_SUBST(HAVE_ARM_NE10) + AC_SUBST(NE10_CFLAGS) + AC_SUBST(NE10_LIBS) + ],[] + ) + ] +) + AS_IF([test x"$enable_intrinsics" = x"yes"],[ intrinsics_support="" AS_CASE([$host_cpu], @@ -417,7 +491,16 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ AS_IF([test x"$OPUS_ARM_PRESUME_NEON_INTR" = x"1"], [AC_DEFINE([OPUS_ARM_PRESUME_NEON_INTR], 1, [Define if binary requires NEON intrinsics support])]) - AS_IF([test x"$rtcd_support" = x""], + OPUS_PATH_NE10() + AS_IF([test x"$NE10_LIBS" != x""], + [ + intrinsics_support="$intrinsics_support (NE10)" + AS_IF([test x"enable_rtcd" != x"" \ + && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"], + [rtcd_support="$rtcd_support (NE10)"],[]) + ]) + + AS_IF([test x"$rtcd_support" = x""], [rtcd_support=no]) AS_IF([test x"$intrinsics_support" = x""], @@ -588,6 +671,8 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[ AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"]) AM_CONDITIONAL([OPUS_ARM_NEON_INTR], [test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"]) +AM_CONDITIONAL([HAVE_ARM_NE10], + [test x"$HAVE_ARM_NE10" = x"1"]) AM_CONDITIONAL([HAVE_SSE], [test x"$OPUS_X86_MAY_HAVE_SSE" = x"1"]) AM_CONDITIONAL([HAVE_SSE2], diff --git a/src/analysis.c b/src/analysis.c index 401a43e4..322e53c4 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -187,7 +187,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int info_out->music_prob = psum; } -static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) +static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix, int arch) { int i, b; const kiss_fft_state *kfft; @@ -260,7 +260,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); tonal->mem_fill = 240 + remaining; - opus_fft(kfft, in, out); + opus_fft(kfft, in, out, arch); #ifndef FIXED_POINT /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */ if (celt_isnan(out[0].r)) @@ -633,7 +633,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, - int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch) { int offset; int pcm_len; @@ -646,7 +646,7 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co pcm_len = analysis_frame_size - analysis->analysis_offset; offset = analysis->analysis_offset; do { - tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); + tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix, arch); offset += 480; pcm_len -= 480; } while (pcm_len>0); diff --git a/src/analysis.h b/src/analysis.h index 85a73d75..9c328e8b 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -82,6 +82,6 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, - int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch); #endif diff --git a/src/opus_encoder.c b/src/opus_encoder.c index d11e972f..9dbe4bf5 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -1006,7 +1006,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ analysis_read_subframe_bak = st->analysis.read_subframe; run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, c1, c2, analysis_channels, st->Fs, - lsb_depth, downmix, &analysis_info); + lsb_depth, downmix, &analysis_info, st->arch); } #else (void)analysis_pcm; diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index aa6a2672..9e857735 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -72,6 +72,7 @@ typedef void (*opus_copy_channel_in_func)( struct OpusMSEncoder { ChannelLayout layout; + int arch; int lfe_stream; int application; int variable_duration; @@ -221,7 +222,7 @@ opus_val16 logSum(opus_val16 a, opus_val16 b) #endif void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem, - int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in + int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch ) { int c; @@ -273,7 +274,8 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b } } #endif - clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1); + clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, + overlap, celt_mode->maxLM-LM, 1, arch); if (upsample != 1) { int bound = len; @@ -427,6 +429,7 @@ static int opus_multistream_encoder_init_impl( (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) return OPUS_BAD_ARG; + st->arch = opus_select_arch(); st->layout.nb_channels = channels; st->layout.nb_streams = streams; st->layout.nb_coupled_streams = coupled_streams; @@ -783,7 +786,7 @@ static int opus_multistream_encode_native ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16); if (st->surround) { - surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in); + surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch); } /* Compute bitrate allocation between streams (this could be a lot better) */ |