aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am37
-rw-r--r--celt/arm/arm_celt_map.c49
-rw-r--r--celt/arm/celt_ne10_fft.c121
-rw-r--r--celt/arm/celt_ne10_mdct.c159
-rw-r--r--celt/arm/celt_neon_intr.c7
-rw-r--r--celt/arm/fft_arm.h66
-rw-r--r--celt/arm/mdct_arm.h53
-rw-r--r--celt/celt_encoder.c13
-rw-r--r--celt/dump_modes/Makefile24
-rw-r--r--celt/dump_modes/dump_modes.c21
-rw-r--r--celt/dump_modes/dump_modes_arch.h41
-rw-r--r--celt/dump_modes/dump_modes_arm_ne10.c131
-rw-r--r--celt/kiss_fft.c27
-rw-r--r--celt/kiss_fft.h57
-rw-r--r--celt/mdct.c15
-rw-r--r--celt/mdct.h37
-rw-r--r--celt/modes.c8
-rw-r--r--celt/pitch.h7
-rw-r--r--celt/static_modes_float.h25
-rw-r--r--celt/static_modes_float_arm_ne10.h404
-rw-r--r--celt/tests/test_unit_dft.c53
-rw-r--r--celt/tests/test_unit_mathops.c41
-rw-r--r--celt/tests/test_unit_mdct.c83
-rw-r--r--celt/tests/test_unit_rotation.c41
-rw-r--r--celt_headers.mk3
-rw-r--r--celt_sources.mk4
-rw-r--r--configure.ac87
-rw-r--r--src/analysis.c8
-rw-r--r--src/analysis.h2
-rw-r--r--src/opus_encoder.c2
-rw-r--r--src/opus_multistream_encoder.c9
31 files changed, 1492 insertions, 143 deletions
diff --git a/Makefile.am b/Makefile.am
index 4094e33e..4d3a8880 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -10,7 +10,7 @@ lib_LTLIBRARIES = libopus.la
DIST_SUBDIRS = doc
AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk \
- -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed
+ -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS)
include celt_sources.mk
include silk_sources.mk
@@ -51,6 +51,10 @@ if OPUS_ARM_NEON_INTR
CELT_SOURCES += $(CELT_SOURCES_ARM_NEON_INTR)
endif
+if HAVE_ARM_NE10
+CELT_SOURCES += $(CELT_SOURCES_ARM_NE10)
+endif
+
if OPUS_ARM_EXTERNAL_ASM
noinst_LTLIBRARIES = libarmasm.la
libarmasm_la_SOURCES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S)
@@ -69,7 +73,7 @@ include opus_headers.mk
libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(OPUS_SOURCES)
libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@
-libopus_la_LIBADD = $(LIBM)
+libopus_la_LIBADD = $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
libopus_la_LIBADD += libarmasm.la
endif
@@ -85,32 +89,35 @@ TESTS = celt/tests/test_unit_types celt/tests/test_unit_mathops celt/tests/test_
opus_demo_SOURCES = src/opus_demo.c
-opus_demo_LDADD = libopus.la $(LIBM)
+opus_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
repacketizer_demo_SOURCES = src/repacketizer_demo.c
-repacketizer_demo_LDADD = libopus.la $(LIBM)
+repacketizer_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
opus_compare_SOURCES = src/opus_compare.c
opus_compare_LDADD = $(LIBM)
tests_test_opus_api_SOURCES = tests/test_opus_api.c tests/test_opus_common.h
-tests_test_opus_api_LDADD = libopus.la $(LIBM)
+tests_test_opus_api_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
tests_test_opus_encode_SOURCES = tests/test_opus_encode.c tests/test_opus_common.h
-tests_test_opus_encode_LDADD = libopus.la $(LIBM)
+tests_test_opus_encode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
tests_test_opus_decode_SOURCES = tests/test_opus_decode.c tests/test_opus_common.h
-tests_test_opus_decode_LDADD = libopus.la $(LIBM)
+tests_test_opus_decode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
tests_test_opus_padding_SOURCES = tests/test_opus_padding.c tests/test_opus_common.h
-tests_test_opus_padding_LDADD = libopus.la $(LIBM)
+tests_test_opus_padding_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
celt_tests_test_unit_cwrs32_SOURCES = celt/tests/test_unit_cwrs32.c
celt_tests_test_unit_cwrs32_LDADD = $(LIBM)
celt_tests_test_unit_dft_SOURCES = celt/tests/test_unit_dft.c
-celt_tests_test_unit_dft_LDADD = $(LIBM)
+celt_tests_test_unit_dft_LDADD = $(NE10_LIBS) $(LIBM)
+if OPUS_ARM_EXTERNAL_ASM
+celt_tests_test_unit_dft_LDADD += libarmasm.la
+endif
celt_tests_test_unit_entropy_SOURCES = celt/tests/test_unit_entropy.c
celt_tests_test_unit_entropy_LDADD = $(LIBM)
@@ -119,16 +126,19 @@ celt_tests_test_unit_laplace_SOURCES = celt/tests/test_unit_laplace.c
celt_tests_test_unit_laplace_LDADD = $(LIBM)
celt_tests_test_unit_mathops_SOURCES = celt/tests/test_unit_mathops.c
-celt_tests_test_unit_mathops_LDADD = $(LIBM)
+celt_tests_test_unit_mathops_LDADD = $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
celt_tests_test_unit_mathops_LDADD += libarmasm.la
endif
celt_tests_test_unit_mdct_SOURCES = celt/tests/test_unit_mdct.c
-celt_tests_test_unit_mdct_LDADD = $(LIBM)
+celt_tests_test_unit_mdct_LDADD = $(NE10_LIBS) $(LIBM)
+if OPUS_ARM_EXTERNAL_ASM
+celt_tests_test_unit_mdct_LDADD += libarmasm.la
+endif
celt_tests_test_unit_rotation_SOURCES = celt/tests/test_unit_rotation.c
-celt_tests_test_unit_rotation_LDADD = $(LIBM)
+celt_tests_test_unit_rotation_LDADD = $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
celt_tests_test_unit_rotation_LDADD += libarmasm.la
endif
@@ -286,5 +296,6 @@ endif
if OPUS_ARM_NEON_INTR
CELT_ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo)
-$(CELT_ARM_NEON_INTR_OBJ) $(OPT_UNIT_TEST_OBJ): CFLAGS += $(OPUS_ARM_NEON_INTR_CFLAGS)
+$(CELT_ARM_NEON_INTR_OBJ) $(OPT_UNIT_TEST_OBJ): CFLAGS += \
+ $(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS)
endif
diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c
index 68c224df..87ba3b3e 100644
--- a/celt/arm/arm_celt_map.c
+++ b/celt/arm/arm_celt_map.c
@@ -30,6 +30,8 @@
#endif
#include "pitch.h"
+#include "kiss_fft.h"
+#include "mdct.h"
#if defined(OPUS_HAVE_RTCD)
@@ -42,7 +44,7 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
};
# else /* !FIXED_POINT */
-# if defined(OPUS_ARM_NEON_INTR)
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */
@@ -50,7 +52,46 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* Media */
celt_pitch_xcorr_float_neon /* Neon */
};
-# endif
-# endif
-#endif
+# if defined(HAVE_ARM_NE10)
+# if defined(CUSTOM_MODES)
+int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
+ opus_fft_alloc_arch_c, /* ARMv4 */
+ opus_fft_alloc_arch_c, /* EDSP */
+ opus_fft_alloc_arch_c, /* Media */
+ opus_fft_alloc_arm_float_neon /* Neon with NE10 library support */
+};
+
+void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
+ opus_fft_free_arch_c, /* ARMv4 */
+ opus_fft_free_arch_c, /* EDSP */
+ opus_fft_free_arch_c, /* Media */
+ opus_fft_free_arm_float_neon /* Neon with NE10 */
+};
+# endif /* CUSTOM_MODES */
+
+void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
+ const kiss_fft_cpx *fin,
+ kiss_fft_cpx *fout) = {
+ opus_fft_c, /* ARMv4 */
+ opus_fft_c, /* EDSP */
+ opus_fft_c, /* Media */
+ opus_fft_float_neon /* Neon with NE10 */
+};
+
+void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
+ kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 *window,
+ int overlap, int shift,
+ int stride, int arch) = {
+ clt_mdct_forward_c, /* ARMv4 */
+ clt_mdct_forward_c, /* EDSP */
+ clt_mdct_forward_c, /* Media */
+ clt_mdct_forward_float_neon /* Neon with NE10 */
+};
+# endif /* HAVE_ARM_NE10 */
+# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
+# endif /* FIXED_POINT */
+
+#endif /* OPUS_HAVE_RTCD */
diff --git a/celt/arm/celt_ne10_fft.c b/celt/arm/celt_ne10_fft.c
new file mode 100644
index 00000000..fc4b0da0
--- /dev/null
+++ b/celt/arm/celt_ne10_fft.c
@@ -0,0 +1,121 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/**
+ @file celt_ne10_fft.c
+ @brief ARM Neon optimizations for fft using NE10 library
+ */
+
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include <NE10_init.h>
+#include <NE10_dsp.h>
+#include "os_support.h"
+#include "kiss_fft.h"
+#include "stack_alloc.h"
+
+#if !defined(FIXED_POINT)
+# if defined(CUSTOM_MODES)
+
+/* nfft lengths in NE10 that support scaled fft */
+#define NE10_FFTSCALED_SUPPORT_MAX 4
+static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
+ 480, 240, 120, 60
+};
+
+int opus_fft_alloc_arm_float_neon(kiss_fft_state *st)
+{
+ int i;
+ size_t memneeded = sizeof(struct arch_fft_state);
+
+ st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
+ if (!st->arch_fft)
+ return -1;
+
+ for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
+ if(st->nfft == ne10_fft_scaled_support[i])
+ break;
+ }
+ if (i == NE10_FFTSCALED_SUPPORT_MAX) {
+ /* This nfft length (scaled fft) is not supported in NE10 */
+ st->arch_fft->is_supported = 0;
+ st->arch_fft->priv = NULL;
+ }
+ else {
+ st->arch_fft->is_supported = 1;
+ st->arch_fft->priv = (void *)ne10_fft_alloc_c2c_float32_neon(st->nfft);
+ if (st->arch_fft->priv == NULL) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+void opus_fft_free_arm_float_neon(kiss_fft_state *st)
+{
+ ne10_fft_cfg_float32_t cfg;
+
+ if (!st->arch_fft)
+ return;
+
+ cfg = (ne10_fft_cfg_float32_t)st->arch_fft->priv;
+ if (cfg)
+ ne10_fft_destroy_c2c_float32(cfg);
+ opus_free(st->arch_fft);
+}
+# endif
+
+void opus_fft_float_neon(const kiss_fft_state *st,
+ const kiss_fft_cpx *fin,
+ kiss_fft_cpx *fout)
+{
+ ne10_fft_state_float32_t state;
+ ne10_fft_cfg_float32_t cfg = &state;
+ VARDECL(ne10_fft_cpx_float32_t, buffer);
+ SAVE_STACK;
+ ALLOC(buffer, st->nfft, ne10_fft_cpx_float32_t);
+
+ if (!st->arch_fft->is_supported) {
+ /* This nfft length (scaled fft) not supported in NE10 */
+ opus_fft_c(st, fin, fout);
+ }
+ else {
+ memcpy((void *)cfg, st->arch_fft->priv, sizeof(ne10_fft_state_float32_t));
+ state.buffer = (ne10_fft_cpx_float32_t *)&buffer[0];
+ state.is_forward_scaled = 1;
+
+ ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout,
+ (ne10_fft_cpx_float32_t *)fin,
+ cfg, 0);
+ }
+ RESTORE_STACK;
+}
+#endif /* !defined(FIXED_POINT) */
diff --git a/celt/arm/celt_ne10_mdct.c b/celt/arm/celt_ne10_mdct.c
new file mode 100644
index 00000000..1c6e9158
--- /dev/null
+++ b/celt/arm/celt_ne10_mdct.c
@@ -0,0 +1,159 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/**
+ @file celt_ne10_mdct.c
+ @brief ARM Neon optimizations for mdct using NE10 library
+ */
+
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include "mdct.h"
+#include "stack_alloc.h"
+
+#if !defined(FIXED_POINT)
+
+void clt_mdct_forward_float_neon(const mdct_lookup *l,
+ kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 *window,
+ int overlap, int shift, int stride, int arch)
+{
+ int i;
+ int N, N2, N4;
+ VARDECL(kiss_fft_scalar, f);
+ VARDECL(kiss_fft_cpx, f2);
+ const kiss_fft_state *st = l->kfft[shift];
+ const kiss_twiddle_scalar *trig;
+
+ SAVE_STACK;
+
+ N = l->n;
+ trig = l->trig;
+ for (i=0;i<shift;i++)
+ {
+ N >>= 1;
+ trig += N;
+ }
+ N2 = N>>1;
+ N4 = N>>2;
+
+ ALLOC(f, N2, kiss_fft_scalar);
+ ALLOC(f2, N4, kiss_fft_cpx);
+
+ /* Consider the input to be composed of four blocks: [a, b, c, d] */
+ /* Window, shuffle, fold */
+ {
+ /* Temp pointers to make it really clear to the compiler what we're doing */
+ const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+ const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+ kiss_fft_scalar * OPUS_RESTRICT yp = f;
+ const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+ const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+ for(i=0;i<((overlap+3)>>2);i++)
+ {
+ /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+ *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
+ *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
+ xp1+=2;
+ xp2-=2;
+ wp1+=2;
+ wp2-=2;
+ }
+ wp1 = window;
+ wp2 = window+overlap-1;
+ for(;i<N4-((overlap+3)>>2);i++)
+ {
+ /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+ *yp++ = *xp2;
+ *yp++ = *xp1;
+ xp1+=2;
+ xp2-=2;
+ }
+ for(;i<N4;i++)
+ {
+ /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+ *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
+ *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
+ xp1+=2;
+ xp2-=2;
+ wp1+=2;
+ wp2-=2;
+ }
+ }
+ /* Pre-rotation */
+ {
+ kiss_fft_scalar * OPUS_RESTRICT yp = f;
+ const kiss_twiddle_scalar *t = &trig[0];
+ for(i=0;i<N4;i++)
+ {
+ kiss_fft_cpx yc;
+ kiss_twiddle_scalar t0, t1;
+ kiss_fft_scalar re, im, yr, yi;
+ t0 = t[i];
+ t1 = t[N4+i];
+ re = *yp++;
+ im = *yp++;
+ yr = S_MUL(re,t0) - S_MUL(im,t1);
+ yi = S_MUL(im,t0) + S_MUL(re,t1);
+ yc.r = yr;
+ yc.i = yi;
+ f2[i] = yc;
+ }
+ }
+
+ opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
+
+ /* Post-rotate */
+ {
+ /* Temp pointers to make it really clear to the compiler what we're doing */
+ const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
+ kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+ kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+ const kiss_twiddle_scalar *t = &trig[0];
+ /* Temp pointers to make it really clear to the compiler what we're doing */
+ for(i=0;i<N4;i++)
+ {
+ kiss_fft_scalar yr, yi;
+ yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
+ yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
+ *yp1 = yr;
+ *yp2 = yi;
+ fp++;
+ yp1 += 2*stride;
+ yp2 -= 2*stride;
+ }
+ }
+ RESTORE_STACK;
+}
+#endif /* !defined(FIXED_POINT) */
diff --git a/celt/arm/celt_neon_intr.c b/celt/arm/celt_neon_intr.c
index 4a674133..47dce15b 100644
--- a/celt/arm/celt_neon_intr.c
+++ b/celt/arm/celt_neon_intr.c
@@ -29,9 +29,15 @@
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <arm_neon.h>
#include "../pitch.h"
+#if !defined(FIXED_POINT)
/*
* Function: xcorr_kernel_neon_float
* ---------------------------------
@@ -243,3 +249,4 @@ void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
(const float32_t *)_y+i, (float32_t *)xcorr+i, len);
}
}
+#endif
diff --git a/celt/arm/fft_arm.h b/celt/arm/fft_arm.h
new file mode 100644
index 00000000..e7a30d69
--- /dev/null
+++ b/celt/arm/fft_arm.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/**
+ @file fft_arm.h
+ @brief ARM Neon Intrinsic optimizations for fft using NE10 library
+ */
+
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#if !defined(FFT_ARM_H)
+#define FFT_ARM_H
+
+#include "config.h"
+#include "kiss_fft.h"
+
+#if !defined(FIXED_POINT)
+#if defined(HAVE_ARM_NE10)
+
+int opus_fft_alloc_arm_float_neon(kiss_fft_state *st);
+void opus_fft_free_arm_float_neon(kiss_fft_state *st);
+
+void opus_fft_float_neon(const kiss_fft_state *st,
+ const kiss_fft_cpx *fin,
+ kiss_fft_cpx *fout);
+#if !defined(OPUS_HAVE_RTCD)
+#define OVERRIDE_OPUS_FFT (1)
+
+#define opus_fft_alloc_arch(_st, arch) \
+ ((void)(arch), opus_fft_alloc_arm_float_neon(_st))
+
+#define opus_fft_free_arch(_st, arch) \
+ ((void)(arch), opus_fft_free_arm_float_neon(_st))
+
+#define opus_fft(_st, _fin, _fout, arch) \
+ ((void)(arch), opus_fft_float_neon(_st, _fin, _fout))
+
+#endif /* OPUS_HAVE_RTCD */
+
+#endif /* HAVE_ARM_NE10 */
+#endif /* FIXED_POINT */
+
+#endif
diff --git a/celt/arm/mdct_arm.h b/celt/arm/mdct_arm.h
new file mode 100644
index 00000000..7d60fedc
--- /dev/null
+++ b/celt/arm/mdct_arm.h
@@ -0,0 +1,53 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/**
+ @file arm_mdct.h
+ @brief ARM Neon Intrinsic optimizations for mdct using NE10 library
+ */
+
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(MDCT_ARM_H)
+#define MDCT_ARM_H
+
+#include "config.h"
+#include "mdct.h"
+
+#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
+/** Compute a forward MDCT and scale by 4/N, trashes the input array */
+void clt_mdct_forward_float_neon(const mdct_lookup *l, kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 *window, int overlap,
+ int shift, int stride, int arch);
+
+#if !defined(OPUS_HAVE_RTCD)
+#define OVERRIDE_OPUS_MDCT (1)
+#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
+ clt_mdct_forward_float_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
+#endif /* OPUS_HAVE_RTCD */
+#endif /* !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) */
+
+#endif
diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c
index 07e70711..af7d18b3 100644
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -414,7 +414,8 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
/** Apply window and compute the MDCT for all sub-frames and
all channels in a frame */
static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in,
- celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample)
+ celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample,
+ int arch)
{
const int overlap = mode->overlap;
int N;
@@ -435,7 +436,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
for (b=0;b<B;b++)
{
/* Interleaving the sub-frames while doing the MDCTs */
- clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B);
+ clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N,
+ &out[b+c*N*B], mode->window, overlap, shift, B,
+ arch);
}
} while (++c<CC);
if (CC==2&&C==1)
@@ -1603,14 +1606,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ALLOC(bandLogE2, C*nbEBands, opus_val16);
if (secondMdct)
{
- compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample);
+ compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch);
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
amp2Log2(mode, effEnd, end, bandE, bandLogE2, C);
for (i=0;i<C*nbEBands;i++)
bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
}
- compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
+ compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
if (CC==2&&C==1)
tf_chan = 0;
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
@@ -1736,7 +1739,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
{
isTransient = 1;
shortBlocks = M;
- compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
+ compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
/* Compensate for the scaling of short vs long mdcts */
diff --git a/celt/dump_modes/Makefile b/celt/dump_modes/Makefile
index 763cb303..93f599fb 100644
--- a/celt/dump_modes/Makefile
+++ b/celt/dump_modes/Makefile
@@ -1,10 +1,32 @@
+
CFLAGS=-O2 -Wall -Wextra -DHAVE_CONFIG_H
INCLUDES=-I. -I../ -I../.. -I../../include
+SOURCES = dump_modes.c \
+ ../modes.c \
+ ../cwrs.c \
+ ../rate.c \
+ ../entcode.c \
+ ../entenc.c \
+ ../entdec.c \
+ ../mathops.c \
+ ../mdct.c \
+ ../kiss_fft.c
+
+ifdef HAVE_ARM_NE10
+CC = gcc
+CFLAGS += -mfpu=neon
+INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_PRESUME_NEON_INTR
+LIBS = -L$(NE10_LIBDIR) -lNE10
+SOURCES += ../arm/celt_ne10_fft.c \
+ dump_modes_arm_ne10.c \
+ ../arm/armcpu.c
+endif
+
all: dump_modes
dump_modes:
- $(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES dump_modes.c ../modes.c ../cwrs.c ../rate.c ../entcode.c ../entenc.c ../entdec.c ../mathops.c ../mdct.c ../kiss_fft.c -o dump_modes -lm
+ $(PREFIX)$(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES $(SOURCES) -o $@ $(LIBS) -lm
clean:
rm -f dump_modes
diff --git a/celt/dump_modes/dump_modes.c b/celt/dump_modes/dump_modes.c
index ae6a8c15..9105a534 100644
--- a/celt/dump_modes/dump_modes.c
+++ b/celt/dump_modes/dump_modes.c
@@ -35,6 +35,7 @@
#include "modes.h"
#include "celt.h"
#include "rate.h"
+#include "dump_modes_arch.h"
#define INT16 "%d"
#define INT32 "%d"
@@ -62,6 +63,10 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
fprintf(file, "\n It contains static definitions for some pre-defined modes. */\n");
fprintf(file, "#include \"modes.h\"\n");
fprintf(file, "#include \"rate.h\"\n");
+ fprintf(file, "\n#ifdef HAVE_ARM_NE10\n");
+ fprintf(file, "#define OVERRIDE_FFT 1\n");
+ fprintf(file, "#include \"%s\"\n", ARM_NE10_ARCH_FILE_NAME);
+ fprintf(file, "#endif\n");
fprintf(file, "\n");
@@ -149,6 +154,9 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
fprintf (file, "{" WORD16 ", " WORD16 "},%c", mode->mdct.kfft[0]->twiddles[j].r, mode->mdct.kfft[0]->twiddles[j].i,(j+3)%2==0?'\n':' ');
fprintf (file, "};\n");
+#ifdef OVERRIDE_FFT
+ dump_mode_arch(mode);
+#endif
/* FFT Bitrev tables */
for (k=0;k<=mode->mdct.maxshift;k++)
{
@@ -183,6 +191,13 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
fprintf (file, "}, /* factors */\n");
fprintf (file, "fft_bitrev%d, /* bitrev */\n", mode->mdct.kfft[k]->nfft);
fprintf (file, "fft_twiddles%d_%d, /* bitrev */\n", mode->Fs, mdctSize);
+
+ fprintf (file, "#ifdef OVERRIDE_FFT\n");
+ fprintf (file, "(arch_fft_state *)&cfg_arch_%d,\n", mode->mdct.kfft[k]->nfft);
+ fprintf (file, "#else\n");
+ fprintf (file, "NULL,\n");
+ fprintf(file, "#endif\n");
+
fprintf (file, "};\n");
fprintf(file, "#endif\n");
@@ -323,8 +338,14 @@ int main(int argc, char **argv)
}
}
file = fopen(BASENAME ".h", "w");
+#ifdef OVERRIDE_FFT
+ dump_modes_arch_init(m, nb);
+#endif
dump_modes(file, m, nb);
fclose(file);
+#ifdef OVERRIDE_FFT
+ dump_modes_arch_finalize();
+#endif
for (i=0;i<nb;i++)
opus_custom_mode_destroy(m[i]);
free(m);
diff --git a/celt/dump_modes/dump_modes_arch.h b/celt/dump_modes/dump_modes_arch.h
new file mode 100644
index 00000000..1436926e
--- /dev/null
+++ b/celt/dump_modes/dump_modes_arch.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DUMP_MODE_ARCH_H
+#define DUMP_MODE_ARCH_H
+
+void dump_modes_arch_init();
+void dump_mode_arch(CELTMode *mode);
+void dump_modes_arch_finalize();
+
+#define ARM_NE10_ARCH_FILE_NAME "static_modes_float_arm_ne10.h"
+
+#if defined(HAVE_ARM_NE10)
+#define OVERRIDE_FFT (1)
+#endif
+
+#endif
diff --git a/celt/dump_modes/dump_modes_arm_ne10.c b/celt/dump_modes/dump_modes_arm_ne10.c
new file mode 100644
index 00000000..d37e7ada
--- /dev/null
+++ b/celt/dump_modes/dump_modes_arm_ne10.c
@@ -0,0 +1,131 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(HAVE_CONFIG_H)
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "modes.h"
+#include "dump_modes_arch.h"
+#include <NE10_dsp.h>
+
+static FILE *file;
+
+void dump_modes_arch_init(CELTMode **modes, int nb_modes)
+{
+ int i;
+
+ file = fopen(ARM_NE10_ARCH_FILE_NAME, "w");
+ fprintf(file, "/* The contents of this file was automatically generated by\n");
+ fprintf(file, " * dump_mode_arm_ne10.c with arguments:");
+ for (i=0;i<nb_modes;i++)
+ {
+ CELTMode *mode = modes[i];
+ fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts);
+ }
+ fprintf(file, "\n * It contains static definitions for some pre-defined modes. */\n");
+ fprintf(file, "#include <NE10_init.h>\n\n");
+}
+
+void dump_modes_arch_finalize()
+{
+ fclose(file);
+}
+
+void dump_mode_arch(CELTMode *mode)
+{
+ int k, j;
+ int mdctSize;
+
+ mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
+
+ fprintf(file, "#ifndef NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
+ fprintf(file, "#define NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
+ /* cfg->factors */
+ for(k=0;k<=mode->mdct.maxshift;k++) {
+ ne10_fft_cfg_float32_t cfg;
+ cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
+ if (!cfg)
+ continue;
+ fprintf(file, "static const ne10_int32_t ne10_factors_%d[%d] = {\n",
+ mode->mdct.kfft[k]->nfft, (NE10_MAXFACTORS * 2));
+ for(j=0;j<(NE10_MAXFACTORS * 2);j++) {
+ fprintf(file, "%d,%c", cfg->factors[j],(j+16)%15==0?'\n':' ');
+ }
+ fprintf (file, "};\n");
+ }
+
+ /* cfg->twiddles */
+ for(k=0;k<=mode->mdct.maxshift;k++) {
+ ne10_fft_cfg_float32_t cfg;
+ cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
+ if (!cfg)
+ continue;
+ fprintf(file, "static const ne10_fft_cpx_float32_t ne10_twiddles_%d[%d] = {\n",
+ mode->mdct.kfft[k]->nfft, mode->mdct.kfft[k]->nfft);
+ for(j=0;j<mode->mdct.kfft[k]->nfft;j++) {
+ fprintf(file, "{%#0.8gf,%#0.8gf},%c", cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
+ }
+ fprintf (file, "};\n");
+ }
+
+ for(k=0;k<=mode->mdct.maxshift;k++) {
+ ne10_fft_cfg_float32_t cfg;
+ cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
+ if (!cfg) {
+ fprintf(file, "/* Ne10 does not support scaled FFT for length = %d */\n",
+ mode->mdct.kfft[k]->nfft);
+ fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n", mode->mdct.kfft[k]->nfft);
+ fprintf(file, "0,\n");
+ fprintf(file, "NULL\n");
+ fprintf(file, "};\n");
+ continue;
+ }
+ fprintf(file, "static const ne10_fft_state_float32_t ne10_fft_state_float32_%d = {\n",
+ mode->mdct.kfft[k]->nfft);
+ fprintf(file, "%d,\n", cfg->nfft);
+ fprintf(file, "(ne10_int32_t *)ne10_factors_%d,\n", mode->mdct.kfft[k]->nfft);
+ fprintf(file, "(ne10_fft_cpx_float32_t *)ne10_twiddles_%d,\n", mode->mdct.kfft[k]->nfft);
+ fprintf(file, "NULL,\n"); /* buffer */
+ fprintf(file, "(ne10_fft_cpx_float32_t *)&ne10_twiddles_%d[%d],\n",
+ mode->mdct.kfft[k]->nfft, cfg->nfft);
+ fprintf(file, "/* is_forward_scaled = true */\n");
+ fprintf(file, "(ne10_int32_t) 1,\n");
+ fprintf(file, "/* is_backward_scaled = false */\n");
+ fprintf(file, "(ne10_int32_t) 0,\n");
+ fprintf(file, "};\n");
+
+ fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n",
+ mode->mdct.kfft[k]->nfft);
+ fprintf(file, "1,\n");
+ fprintf(file, "(void *)&ne10_fft_state_float32_%d,\n", mode->mdct.kfft[k]->nfft);
+ fprintf(file, "};\n\n");
+ }
+ fprintf(file, "#endif /* end NE10_FFT_PARAMS%d_%d */\n", mode->Fs, mdctSize);
+}
diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c
index cc487fcf..38fd4fb6 100644
--- a/celt/kiss_fft.c
+++ b/celt/kiss_fft.c
@@ -423,13 +423,19 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
#endif
}
+int opus_fft_alloc_arch_c(kiss_fft_state *st) {
+ (void)st;
+ return 0;
+}
+
/*
*
* Allocates all necessary storage space for the fft and ifft.
* The return value is a contiguous block of memory. As such,
* It can be freed with free().
* */
-kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base)
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
+ const kiss_fft_state *base, int arch)
{
kiss_fft_state *st=NULL;
size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
@@ -478,22 +484,31 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co
if (st->bitrev==NULL)
goto fail;
compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
+
+ /* Initialize architecture specific fft parameters */
+ if (opus_fft_alloc_arch(st, arch))
+ goto fail;
}
return st;
fail:
- opus_fft_free(st);
+ opus_fft_free(st, arch);
return NULL;
}
-kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
{
- return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
+ return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
+}
+
+void opus_fft_free_arch_c(kiss_fft_state *st) {
+ (void)st;
}
-void opus_fft_free(const kiss_fft_state *cfg)
+void opus_fft_free(const kiss_fft_state *cfg, int arch)
{
if (cfg)
{
+ opus_fft_free_arch((kiss_fft_state *)cfg, arch);
opus_free((opus_int16*)cfg->bitrev);
if (cfg->shift < 0)
opus_free((kiss_twiddle_cpx*)cfg->twiddles);
@@ -551,7 +566,7 @@ void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
}
}
-void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
int i;
opus_val16 scale;
diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h
index 390b54d9..99f0e500 100644
--- a/celt/kiss_fft.h
+++ b/celt/kiss_fft.h
@@ -32,6 +32,7 @@
#include <stdlib.h>
#include <math.h>
#include "arch.h"
+#include "cpu_support.h"
#ifdef __cplusplus
extern "C" {
@@ -77,6 +78,11 @@ typedef struct {
4*4*4*2
*/
+typedef struct arch_fft_state{
+ int is_supported;
+ void *priv;
+} arch_fft_state;
+
typedef struct kiss_fft_state{
int nfft;
opus_val16 scale;
@@ -87,8 +93,15 @@ typedef struct kiss_fft_state{
opus_int16 factors[2*MAXFACTORS];
const opus_int16 *bitrev;
const kiss_twiddle_cpx *twiddles;
+#ifndef FIXED_POINT
+ arch_fft_state *arch_fft;
+#endif
} kiss_fft_state;
+#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
+#include "arm/fft_arm.h"
+#endif
+
/*typedef struct kiss_fft_state* kiss_fft_cfg;*/
/**
@@ -114,9 +127,9 @@ typedef struct kiss_fft_state{
* buffer size in *lenmem.
* */
-kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base);
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch);
-kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch);
/**
* opus_fft(cfg,in_out_buf)
@@ -128,13 +141,49 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
* Note that each element is complex and can be accessed like
f[k].r and f[k].i
* */
-void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
-void opus_fft_free(const kiss_fft_state *cfg);
+void opus_fft_free(const kiss_fft_state *cfg, int arch);
+
+
+void opus_fft_free_arch_c(kiss_fft_state *st);
+int opus_fft_alloc_arch_c(kiss_fft_state *st);
+
+#if !defined(OVERRIDE_OPUS_FFT)
+/* Is run-time CPU detection enabled on this platform? */
+#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
+
+extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(
+ kiss_fft_state *st);
+
+#define opus_fft_alloc_arch(_st, arch) \
+ ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
+
+extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(
+ kiss_fft_state *st);
+#define opus_fft_free_arch(_st, arch) \
+ ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
+
+extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
+ const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
+#define opus_fft(_cfg, _fin, _fout, arch) \
+ ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
+#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
+
+#define opus_fft_alloc_arch(_st, arch) \
+ ((void)(arch), opus_fft_alloc_arch_c(_st))
+
+#define opus_fft_free_arch(_st, arch) \
+ ((void)(arch), opus_fft_free_arch_c(_st))
+
+#define opus_fft(_cfg, _fin, _fout, arch) \
+ ((void)(arch), opus_fft_c(_cfg, _fin, _fout))
+#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
+#endif /* end if !defined(OVERRIDE_OPUS_FFT) */
#ifdef __cplusplus
}
diff --git a/celt/mdct.c b/celt/mdct.c
index 2795d90d..ee6d80ec 100644
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -60,7 +60,7 @@
#ifdef CUSTOM_MODES
-int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
+int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch)
{
int i;
kiss_twiddle_scalar *trig;
@@ -71,9 +71,9 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
for (i=0;i<=maxshift;i++)
{
if (i==0)
- l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
+ l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch);
else
- l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
+ l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch);
#ifndef ENABLE_TI_DSPLIB55
if (l->kfft[i]==NULL)
return 0;
@@ -104,11 +104,11 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
return 1;
}
-void clt_mdct_clear(mdct_lookup *l)
+void clt_mdct_clear(mdct_lookup *l, int arch)
{
int i;
for (i=0;i<=l->maxshift;i++)
- opus_fft_free(l->kfft[i]);
+ opus_fft_free(l->kfft[i], arch);
opus_free((kiss_twiddle_scalar*)l->trig);
}
@@ -116,8 +116,8 @@ void clt_mdct_clear(mdct_lookup *l)
/* Forward MDCT trashes the input array */
#ifndef OVERRIDE_clt_mdct_forward
-void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
- const opus_val16 *window, int overlap, int shift, int stride)
+void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 *window, int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
@@ -132,6 +132,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
int scale_shift = st->scale_shift-1;
#endif
SAVE_STACK;
+ (void)arch;
scale = st->scale;
N = l->n;
diff --git a/celt/mdct.h b/celt/mdct.h
index d7218213..8aef9087 100644
--- a/celt/mdct.h
+++ b/celt/mdct.h
@@ -53,13 +53,19 @@ typedef struct {
const kiss_twiddle_scalar * OPUS_RESTRICT trig;
} mdct_lookup;
-int clt_mdct_init(mdct_lookup *l,int N, int maxshift);
-void clt_mdct_clear(mdct_lookup *l);
+#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
+#include "arm/mdct_arm.h"
+#endif
+
+
+int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch);
+void clt_mdct_clear(mdct_lookup *l, int arch);
/** Compute a forward MDCT and scale by 4/N, trashes the input array */
-void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in,
- kiss_fft_scalar * OPUS_RESTRICT out,
- const opus_val16 *window, int overlap, int shift, int stride);
+void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 *window, int overlap,
+ int shift, int stride, int arch);
/** Compute a backward MDCT (no scaling) and performs weighted overlap-add
(scales implicitly by 1/2) */
@@ -67,4 +73,25 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride);
+#if !defined(OVERRIDE_OPUS_MDCT)
+/* Is run-time CPU detection enabled on this platform? */
+#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
+
+extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(
+ const mdct_lookup *l, kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
+ int overlap, int shift, int stride, int arch);
+
+#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+ ((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
+ _window, _overlap, _shift, \
+ _stride, _arch))
+#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
+
+#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+ clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
+
+#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
+#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */
+
#endif
diff --git a/celt/modes.c b/celt/modes.c
index 42e68e1c..911686e9 100644
--- a/celt/modes.c
+++ b/celt/modes.c
@@ -37,6 +37,7 @@
#include "os_support.h"
#include "stack_alloc.h"
#include "quant_bands.h"
+#include "cpu_support.h"
static const opus_int16 eband5ms[] = {
/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */
@@ -229,6 +230,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
opus_val16 *window;
opus_int16 *logN;
int LM;
+ int arch = opus_select_arch();
ALLOC_STACK;
#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA)
if (global_stack==NULL)
@@ -389,7 +391,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
compute_pulse_cache(mode, mode->maxLM);
if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts,
- mode->maxLM) == 0)
+ mode->maxLM, arch) == 0)
goto failure;
if (error)
@@ -408,6 +410,8 @@ failure:
#ifdef CUSTOM_MODES
void opus_custom_mode_destroy(CELTMode *mode)
{
+ int arch = opus_select_arch();
+
if (mode == NULL)
return;
#ifndef CUSTOM_MODES_ONLY
@@ -431,7 +435,7 @@ void opus_custom_mode_destroy(CELTMode *mode)
opus_free((opus_int16*)mode->cache.index);
opus_free((unsigned char*)mode->cache.bits);
opus_free((unsigned char*)mode->cache.caps);
- clt_mdct_clear(&mode->mdct);
+ clt_mdct_clear(&mode->mdct, arch);
opus_free((CELTMode *)mode);
}
diff --git a/celt/pitch.h b/celt/pitch.h
index af745eba..65a77a6e 100644
--- a/celt/pitch.h
+++ b/celt/pitch.h
@@ -47,7 +47,7 @@
#endif
#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
- || defined(OPUS_ARM_NEON_INTR))
+ || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
# include "arm/pitch_arm.h"
#endif
@@ -188,8 +188,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
#if !defined(OVERRIDE_PITCH_XCORR)
/*Is run-time CPU detection enabled on this platform?*/
-# if defined(OPUS_HAVE_RTCD) && \
- (defined(OPUS_ARM_ASM) || (defined(OPUS_ARM_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)))
+# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \
+ || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \
+ && !defined(OPUS_ARM_PRESUME_NEON_INTR)))
extern
# if defined(FIXED_POINT)
opus_val32
diff --git a/celt/static_modes_float.h b/celt/static_modes_float.h
index 2fadb625..e102a383 100644
--- a/celt/static_modes_float.h
+++ b/celt/static_modes_float.h
@@ -4,6 +4,11 @@
#include "modes.h"
#include "rate.h"
+#ifdef HAVE_ARM_NE10
+#define OVERRIDE_FFT 1
+#include "static_modes_float_arm_ne10.h"
+#endif
+
#ifndef DEF_WINDOW120
#define DEF_WINDOW120
static const opus_val16 window120[120] = {
@@ -431,6 +436,11 @@ static const kiss_fft_state fft_state48000_960_0 = {
{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev480, /* bitrev */
fft_twiddles48000_960, /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_480,
+#else
+NULL,
+#endif
};
#endif
@@ -443,6 +453,11 @@ static const kiss_fft_state fft_state48000_960_1 = {
{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev240, /* bitrev */
fft_twiddles48000_960, /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_240,
+#else
+NULL,
+#endif
};
#endif
@@ -455,6 +470,11 @@ static const kiss_fft_state fft_state48000_960_2 = {
{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev120, /* bitrev */
fft_twiddles48000_960, /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_120,
+#else
+NULL,
+#endif
};
#endif
@@ -467,6 +487,11 @@ static const kiss_fft_state fft_state48000_960_3 = {
{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev60, /* bitrev */
fft_twiddles48000_960, /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_60,
+#else
+NULL,
+#endif
};
#endif
diff --git a/celt/static_modes_float_arm_ne10.h b/celt/static_modes_float_arm_ne10.h
new file mode 100644
index 00000000..5bcec707
--- /dev/null
+++ b/celt/static_modes_float_arm_ne10.h
@@ -0,0 +1,404 @@
+/* The contents of this file was automatically generated by
+ * dump_mode_arm_ne10.c with arguments: 48000 960
+ * It contains static definitions for some pre-defined modes. */
+#include <NE10_init.h>
+
+#ifndef NE10_FFT_PARAMS48000_960
+#define NE10_FFT_PARAMS48000_960
+static const ne10_int32_t ne10_factors_480[64] = {
+4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_240[64] = {
+3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_120[64] = {
+3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_60[64] = {
+2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
+{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
+{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
+{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
+{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
+{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
+{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
+{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
+{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
+{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
+{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
+{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
+{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
+{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
+{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
+{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
+{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
+{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
+{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
+{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
+{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
+{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
+{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
+{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
+{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f},
+{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f},
+{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f},
+{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f},
+{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f},
+{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f},
+{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f},
+{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f},
+{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f},
+{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f},
+{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f},
+{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f},
+{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f},
+{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f},
+{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f},
+{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f},
+{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f},
+{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f},
+{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f},
+{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f},
+{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f},
+{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f},
+{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f},
+{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f},
+{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f},
+{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f},
+{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f},
+{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f},
+{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f},
+{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f},
+{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f},
+{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f},
+{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f},
+{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f},
+{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f},
+{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f},
+{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f},
+{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f},
+{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f},
+{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f},
+{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
+{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
+{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
+{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
+{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
+{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
+{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
+{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
+{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
+{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
+{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
+{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
+{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
+{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
+{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
+{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
+{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
+{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
+{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
+{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
+{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f},
+{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f},
+{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f},
+{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f},
+{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f},
+{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f},
+{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f},
+{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f},
+{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f},
+{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f},
+{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f},
+{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f},
+{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f},
+{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f},
+{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f},
+{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f},
+{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f},
+{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f},
+{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f},
+{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f},
+{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f},
+{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f},
+{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f},
+{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f},
+{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f},
+{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f},
+{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f},
+{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f},
+{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f},
+{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f},
+{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f},
+{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f},
+{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f},
+{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f},
+{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f},
+{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f},
+{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f},
+{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f},
+{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f},
+{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f},
+{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f},
+{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f},
+{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f},
+{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f},
+{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f},
+{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f},
+{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f},
+{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f},
+{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f},
+{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f},
+{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f},
+{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f},
+{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f},
+{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f},
+{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f},
+{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f},
+{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f},
+{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f},
+{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f},
+{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f},
+};
+static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
+{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
+{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
+{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
+{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
+{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
+{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
+{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
+{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
+{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
+{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
+{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
+{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
+{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
+{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
+{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
+{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
+{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
+{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
+{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
+{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
+{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
+{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
+{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
+{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
+{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
+{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
+{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
+{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
+{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
+{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
+{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
+{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
+{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
+{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
+{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f},
+{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f},
+{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f},
+{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f},
+{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f},
+{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f},
+{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f},
+{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f},
+{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f},
+{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f},
+{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f},
+{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f},
+{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f},
+{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f},
+{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f},
+{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f},
+{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f},
+{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f},
+{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f},
+{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f},
+{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f},
+{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f},
+{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f},
+{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f},
+{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f},
+{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f},
+{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f},
+{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f},
+{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f},
+{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f},
+};
+static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
+{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
+{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
+{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
+{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
+{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
+{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
+{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
+{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
+{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
+{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
+{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
+{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
+{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
+{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
+{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
+{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
+{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
+{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
+{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
+{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
+{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
+{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
+{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
+};
+static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
+{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
+{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
+{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
+{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
+};
+static const ne10_fft_state_float32_t ne10_fft_state_float32_480 = {
+120,
+(ne10_int32_t *)ne10_factors_480,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_480,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_480 = {
+1,
+(void *)&ne10_fft_state_float32_480,
+};
+
+static const ne10_fft_state_float32_t ne10_fft_state_float32_240 = {
+60,
+(ne10_int32_t *)ne10_factors_240,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_240,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_240 = {
+1,
+(void *)&ne10_fft_state_float32_240,
+};
+
+static const ne10_fft_state_float32_t ne10_fft_state_float32_120 = {
+30,
+(ne10_int32_t *)ne10_factors_120,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_120,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_120 = {
+1,
+(void *)&ne10_fft_state_float32_120,
+};
+
+static const ne10_fft_state_float32_t ne10_fft_state_float32_60 = {
+15,
+(ne10_int32_t *)ne10_factors_60,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_60,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_60 = {
+1,
+(void *)&ne10_fft_state_float32_60,
+};
+
+#endif /* end NE10_FFT_PARAMS48000_960 */
diff --git a/celt/tests/test_unit_dft.c b/celt/tests/test_unit_dft.c
index 57db0e3a..5fb8bcb4 100644
--- a/celt/tests/test_unit_dft.c
+++ b/celt/tests/test_unit_dft.c
@@ -45,6 +45,23 @@
#include "mathops.c"
#include "entcode.c"
+#if defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1)
+# include "x86/x86cpu.c"
+#elif defined(OPUS_HAVE_RTCD) && \
+ (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+# include "arm/armcpu.c"
+# include "celt_lpc.c"
+# include "pitch.c"
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+# include "arm/celt_neon_intr.c"
+# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
+# include "mdct.c"
+# include "arm/celt_ne10_fft.c"
+# include "arm/celt_ne10_mdct.c"
+# endif
+# endif
+# include "arm/arm_celt_map.c"
+#endif
#ifndef M_PI
#define M_PI 3.141592653
@@ -93,13 +110,13 @@ void check(kiss_fft_cpx * in,kiss_fft_cpx * out,int nfft,int isinverse)
}
}
-void test1d(int nfft,int isinverse)
+void test1d(int nfft,int isinverse,int arch)
{
size_t buflen = sizeof(kiss_fft_cpx)*nfft;
kiss_fft_cpx * in = (kiss_fft_cpx*)malloc(buflen);
kiss_fft_cpx * out= (kiss_fft_cpx*)malloc(buflen);
- kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0);
+ kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0,arch);
int k;
for (k=0;k<nfft;++k) {
@@ -125,7 +142,7 @@ void test1d(int nfft,int isinverse)
if (isinverse)
opus_ifft(cfg,in,out);
else
- opus_fft(cfg,in,out);
+ opus_fft(cfg,in,out, arch);
/*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
@@ -139,26 +156,28 @@ void test1d(int nfft,int isinverse)
int main(int argc,char ** argv)
{
ALLOC_STACK;
+ int arch = opus_select_arch();
+
if (argc>1) {
int k;
for (k=1;k<argc;++k) {
- test1d(atoi(argv[k]),0);
- test1d(atoi(argv[k]),1);
+ test1d(atoi(argv[k]),0,arch);
+ test1d(atoi(argv[k]),1,arch);
}
}else{
- test1d(32,0);
- test1d(32,1);
- test1d(128,0);
- test1d(128,1);
- test1d(256,0);
- test1d(256,1);
+ test1d(32,0,arch);
+ test1d(32,1,arch);
+ test1d(128,0,arch);
+ test1d(128,1,arch);
+ test1d(256,0,arch);
+ test1d(256,1,arch);
#ifndef RADIX_TWO_ONLY
- test1d(36,0);
- test1d(36,1);
- test1d(50,0);
- test1d(50,1);
- test1d(120,0);
- test1d(120,1);
+ test1d(36,0,arch);
+ test1d(36,1,arch);
+ test1d(50,0,arch);
+ test1d(50,1,arch);
+ test1d(120,0,arch);
+ test1d(120,1,arch);
#endif
}
return ret;
diff --git a/celt/tests/test_unit_mathops.c b/celt/tests/test_unit_mathops.c
index 2de39baf..2f43704f 100644
--- a/celt/tests/test_unit_mathops.c
+++ b/celt/tests/test_unit_mathops.c
@@ -52,23 +52,30 @@
#include "celt.c"
#if defined(OPUS_X86_MAY_HAVE_SSE) || defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1)
-#if defined(OPUS_X86_MAY_HAVE_SSE)
-#include "x86/pitch_sse.c"
-#endif
-#if defined(OPUS_X86_MAY_HAVE_SSE2)
-#include "x86/pitch_sse2.c"
-#endif
-#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
-#include "x86/pitch_sse4_1.c"
-#include "x86/celt_lpc_sse.c"
-#endif
-#include "x86/x86_celt_map.c"
-#elif ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
- || defined(OPUS_ARM_NEON_INTR))
-#if defined(OPUS_ARM_NEON_INTR)
-#include "arm/celt_neon_intr.c"
-#endif
-#include "arm/arm_celt_map.c"
+# if defined(OPUS_X86_MAY_HAVE_SSE)
+# include "x86/pitch_sse.c"
+# endif
+# if defined(OPUS_X86_MAY_HAVE_SSE2)
+# include "x86/pitch_sse2.c"
+# endif
+# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+# include "x86/pitch_sse4_1.c"
+# include "x86/celt_lpc_sse.c"
+# endif
+# include "x86/x86_celt_map.c"
+#elif defined(OPUS_HAVE_RTCD) && \
+ (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+# include "arm/armcpu.c"
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+# include "arm/celt_neon_intr.c"
+# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
+# include "kiss_fft.c"
+# include "mdct.c"
+# include "arm/celt_ne10_fft.c"
+# include "arm/celt_ne10_mdct.c"
+# endif
+# endif
+# include "arm/arm_celt_map.c"
#endif
#ifdef FIXED_POINT
diff --git a/celt/tests/test_unit_mdct.c b/celt/tests/test_unit_mdct.c
index ac8957fd..4a524e64 100644
--- a/celt/tests/test_unit_mdct.c
+++ b/celt/tests/test_unit_mdct.c
@@ -46,6 +46,23 @@
#include "mathops.c"
#include "entcode.c"
+#if defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1)
+# include "x86/x86cpu.c"
+#elif defined(OPUS_HAVE_RTCD) && \
+ (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+# include "arm/armcpu.c"
+# include "pitch.c"
+# include "celt_lpc.c"
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+# include "arm/celt_neon_intr.c"
+# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
+# include "arm/celt_ne10_fft.c"
+# include "arm/celt_ne10_mdct.c"
+# endif
+# endif
+# include "arm/arm_celt_map.c"
+#endif
+
#ifndef M_PI
#define M_PI 3.141592653
#endif
@@ -112,7 +129,7 @@ void check_inv(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinver
}
-void test1d(int nfft,int isinverse)
+void test1d(int nfft,int isinverse,int arch)
{
mdct_lookup cfg;
size_t buflen = sizeof(kiss_fft_scalar)*nfft;
@@ -123,7 +140,7 @@ void test1d(int nfft,int isinverse)
opus_val16 * window= (opus_val16*)malloc(sizeof(opus_val16)*nfft/2);
int k;
- clt_mdct_init(&cfg, nfft, 0);
+ clt_mdct_init(&cfg, nfft, 0, arch);
for (k=0;k<nfft;++k) {
in[k] = (rand() % 32768) - 16384;
}
@@ -156,7 +173,7 @@ void test1d(int nfft,int isinverse)
out[nfft-k-1] = out[nfft/2+k];
check_inv(in,out,nfft,isinverse);
} else {
- clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1);
+ clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1, arch);
check(in_copy,out,nfft,isinverse);
}
/*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
@@ -164,46 +181,48 @@ void test1d(int nfft,int isinverse)
free(in);
free(out);
- clt_mdct_clear(&cfg);
+ clt_mdct_clear(&cfg, arch);
}
int main(int argc,char ** argv)
{
ALLOC_STACK;
+ int arch = opus_select_arch();
+
if (argc>1) {
int k;
for (k=1;k<argc;++k) {
- test1d(atoi(argv[k]),0);
- test1d(atoi(argv[k]),1);
+ test1d(atoi(argv[k]),0,arch);
+ test1d(atoi(argv[k]),1,arch);
}
}else{
- test1d(32,0);
- test1d(32,1);
- test1d(256,0);
- test1d(256,1);
- test1d(512,0);
- test1d(512,1);
- test1d(1024,0);
- test1d(1024,1);
- test1d(2048,0);
- test1d(2048,1);
+ test1d(32,0,arch);
+ test1d(32,1,arch);
+ test1d(256,0,arch);
+ test1d(256,1,arch);
+ test1d(512,0,arch);
+ test1d(512,1,arch);
+ test1d(1024,0,arch);
+ test1d(1024,1,arch);
+ test1d(2048,0,arch);
+ test1d(2048,1,arch);
#ifndef RADIX_TWO_ONLY
- test1d(36,0);
- test1d(36,1);
- test1d(40,0);
- test1d(40,1);
- test1d(60,0);
- test1d(60,1);
- test1d(120,0);
- test1d(120,1);
- test1d(240,0);
- test1d(240,1);
- test1d(480,0);
- test1d(480,1);
- test1d(960,0);
- test1d(960,1);
- test1d(1920,0);
- test1d(1920,1);
+ test1d(36,0,arch);
+ test1d(36,1,arch);
+ test1d(40,0,arch);
+ test1d(40,1,arch);
+ test1d(60,0,arch);
+ test1d(60,1,arch);
+ test1d(120,0,arch);
+ test1d(120,1,arch);
+ test1d(240,0,arch);
+ test1d(240,1,arch);
+ test1d(480,0,arch);
+ test1d(480,1,arch);
+ test1d(960,0,arch);
+ test1d(960,1,arch);
+ test1d(1920,0,arch);
+ test1d(1920,1,arch);
#endif
}
return ret;
diff --git a/celt/tests/test_unit_rotation.c b/celt/tests/test_unit_rotation.c
index 4780005f..932cd247 100644
--- a/celt/tests/test_unit_rotation.c
+++ b/celt/tests/test_unit_rotation.c
@@ -50,23 +50,30 @@
#include <math.h>
#if defined(OPUS_X86_MAY_HAVE_SSE) || defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1)
-#if defined(OPUS_X86_MAY_HAVE_SSE)
-#include "x86/pitch_sse.c"
-#endif
-#if defined(OPUS_X86_MAY_HAVE_SSE2)
-#include "x86/pitch_sse2.c"
-#endif
-#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
-#include "x86/pitch_sse4_1.c"
-#include "x86/celt_lpc_sse.c"
-#endif
-#include "x86/x86_celt_map.c"
-#elif ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
- || defined(OPUS_ARM_NEON_INTR))
-#if defined(OPUS_ARM_NEON_INTR)
-#include "arm/celt_neon_intr.c"
-#endif
-#include "arm/arm_celt_map.c"
+# if defined(OPUS_X86_MAY_HAVE_SSE)
+# include "x86/pitch_sse.c"
+# endif
+# if defined(OPUS_X86_MAY_HAVE_SSE2)
+# include "x86/pitch_sse2.c"
+# endif
+# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+# include "x86/pitch_sse4_1.c"
+# include "x86/celt_lpc_sse.c"
+# endif
+# include "x86/x86_celt_map.c"
+#elif defined(OPUS_HAVE_RTCD) && \
+ (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+# include "arm/armcpu.c"
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+# include "arm/celt_neon_intr.c"
+# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
+# include "kiss_fft.c"
+# include "mdct.c"
+# include "arm/celt_ne10_fft.c"
+# include "arm/celt_ne10_mdct.c"
+# endif
+# endif
+# include "arm/arm_celt_map.c"
#endif
#define MAX_SIZE 100
diff --git a/celt_headers.mk b/celt_headers.mk
index d422e090..5dc9e1e1 100644
--- a/celt_headers.mk
+++ b/celt_headers.mk
@@ -31,12 +31,15 @@ celt/stack_alloc.h \
celt/vq.h \
celt/static_modes_float.h \
celt/static_modes_fixed.h \
+celt/static_modes_float_arm_ne10.h \
celt/arm/armcpu.h \
celt/arm/fixed_armv4.h \
celt/arm/fixed_armv5e.h \
celt/arm/kiss_fft_armv4.h \
celt/arm/kiss_fft_armv5e.h \
celt/arm/pitch_arm.h \
+celt/arm/fft_arm.h \
+celt/arm/mdct_arm.h \
celt/mips/celt_mipsr1.h \
celt/mips/fixed_generic_mipsr1.h \
celt/mips/kiss_fft_mipsr1.h \
diff --git a/celt_sources.mk b/celt_sources.mk
index c92693fe..2ffe99a3 100644
--- a/celt_sources.mk
+++ b/celt_sources.mk
@@ -38,3 +38,7 @@ celt/arm/armopts.s.in
CELT_SOURCES_ARM_NEON_INTR = \
celt/arm/celt_neon_intr.c
+
+CELT_SOURCES_ARM_NE10= \
+celt/arm/celt_ne10_fft.c \
+celt/arm/celt_ne10_mdct.c
diff --git a/configure.ac b/configure.ac
index de75c094..d94fc7e2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -378,6 +378,80 @@ AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86
AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
+AC_DEFUN([OPUS_PATH_NE10],
+ [
+ AC_ARG_WITH(NE10,
+ AC_HELP_STRING([--with-NE10=PFX],[Prefix where libNE10 is installed (optional)]),
+ NE10_prefix="$withval", NE10_prefix="")
+ AC_ARG_WITH(NE10-libraries,
+ AC_HELP_STRING([--with-NE10-libraries=DIR],
+ [Directory where libNE10 library is installed (optional)]),
+ NE10_libraries="$withval", NE10_libraries="")
+ AC_ARG_WITH(NE10-includes,
+ AC_HELP_STRING([--with-NE10-includes=DIR],
+ [Directory where libNE10 header files are installed (optional)]),
+ NE10_includes="$withval", NE10_includes="")
+
+ if test "x$NE10_libraries" != "x" ; then
+ NE10_LIBS="-L$NE10_libraries"
+ elif test "x$NE10_prefix" = "xno" || test "x$NE10_prefix" = "xyes" ; then
+ NE10_LIBS=""
+ elif test "x$NE10_prefix" != "x" ; then
+ NE10_LIBS="-L$NE10_prefix/lib"
+ elif test "x$prefix" != "xNONE" ; then
+ NE10_LIBS="-L$prefix/lib"
+ fi
+
+ if test "x$NE10_prefix" != "xno" ; then
+ NE10_LIBS="$NE10_LIBS -lNE10"
+ fi
+
+ if test "x$NE10_includes" != "x" ; then
+ NE10_CFLAGS="-I$NE10_includes"
+ elif test "x$NE10_prefix" = "xno" || test "x$NE10_prefix" = "xyes" ; then
+ NE10_CFLAGS=""
+ elif test "x$ogg_prefix" != "x" ; then
+ NE10_CFLAGS="-I$NE10_prefix/include"
+ elif test "x$prefix" != "xNONE"; then
+ NE10_CFLAGS="-I$prefix/include"
+ fi
+
+ AC_MSG_CHECKING(for NE10)
+ save_CFLAGS="$CFLAGS"; CFLAGS="$NE10_CFLAGS"
+ save_LIBS="$LIBS"; LIBS="$NE10_LIBS $LIBM"
+ AC_LINK_IFELSE(
+ [
+ AC_LANG_PROGRAM(
+ [[#include <NE10_init.h>
+ ]],
+ [[
+ ne10_fft_cfg_float32_t cfg;
+ cfg = ne10_fft_alloc_c2c_float32_neon(480);
+ ]]
+ )
+ ],[
+ HAVE_ARM_NE10=1
+ AC_MSG_RESULT([yes])
+ ],[
+ HAVE_ARM_NE10=0
+ AC_MSG_RESULT([no])
+ NE10_CFLAGS=""
+ NE10_LIBS=""
+ ]
+ )
+ CFLAGS="$save_CFLAGS"; LIBS="$save_LIBS"
+ #Now we know if libNE10 is installed or not
+ AS_IF([test x"$HAVE_ARM_NE10" = x"1"],
+ [
+ AC_DEFINE([HAVE_ARM_NE10], 1, [NE10 library is installed on host. Make sure it is on target!])
+ AC_SUBST(HAVE_ARM_NE10)
+ AC_SUBST(NE10_CFLAGS)
+ AC_SUBST(NE10_LIBS)
+ ],[]
+ )
+ ]
+)
+
AS_IF([test x"$enable_intrinsics" = x"yes"],[
intrinsics_support=""
AS_CASE([$host_cpu],
@@ -417,7 +491,16 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
AS_IF([test x"$OPUS_ARM_PRESUME_NEON_INTR" = x"1"],
[AC_DEFINE([OPUS_ARM_PRESUME_NEON_INTR], 1, [Define if binary requires NEON intrinsics support])])
- AS_IF([test x"$rtcd_support" = x""],
+ OPUS_PATH_NE10()
+ AS_IF([test x"$NE10_LIBS" != x""],
+ [
+ intrinsics_support="$intrinsics_support (NE10)"
+ AS_IF([test x"enable_rtcd" != x"" \
+ && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
+ [rtcd_support="$rtcd_support (NE10)"],[])
+ ])
+
+ AS_IF([test x"$rtcd_support" = x""],
[rtcd_support=no])
AS_IF([test x"$intrinsics_support" = x""],
@@ -588,6 +671,8 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"])
AM_CONDITIONAL([OPUS_ARM_NEON_INTR],
[test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"])
+AM_CONDITIONAL([HAVE_ARM_NE10],
+ [test x"$HAVE_ARM_NE10" = x"1"])
AM_CONDITIONAL([HAVE_SSE],
[test x"$OPUS_X86_MAY_HAVE_SSE" = x"1"])
AM_CONDITIONAL([HAVE_SSE2],
diff --git a/src/analysis.c b/src/analysis.c
index 401a43e4..322e53c4 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -187,7 +187,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
info_out->music_prob = psum;
}
-static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
+static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix, int arch)
{
int i, b;
const kiss_fft_state *kfft;
@@ -260,7 +260,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
tonal->mem_fill = 240 + remaining;
- opus_fft(kfft, in, out);
+ opus_fft(kfft, in, out, arch);
#ifndef FIXED_POINT
/* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */
if (celt_isnan(out[0].r))
@@ -633,7 +633,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
- int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+ int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch)
{
int offset;
int pcm_len;
@@ -646,7 +646,7 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
pcm_len = analysis_frame_size - analysis->analysis_offset;
offset = analysis->analysis_offset;
do {
- tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+ tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix, arch);
offset += 480;
pcm_len -= 480;
} while (pcm_len>0);
diff --git a/src/analysis.h b/src/analysis.h
index 85a73d75..9c328e8b 100644
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -82,6 +82,6 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
- int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
+ int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch);
#endif
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index d11e972f..9dbe4bf5 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -1006,7 +1006,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
analysis_read_subframe_bak = st->analysis.read_subframe;
run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,
c1, c2, analysis_channels, st->Fs,
- lsb_depth, downmix, &analysis_info);
+ lsb_depth, downmix, &analysis_info, st->arch);
}
#else
(void)analysis_pcm;
diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c
index aa6a2672..9e857735 100644
--- a/src/opus_multistream_encoder.c
+++ b/src/opus_multistream_encoder.c
@@ -72,6 +72,7 @@ typedef void (*opus_copy_channel_in_func)(
struct OpusMSEncoder {
ChannelLayout layout;
+ int arch;
int lfe_stream;
int application;
int variable_duration;
@@ -221,7 +222,7 @@ opus_val16 logSum(opus_val16 a, opus_val16 b)
#endif
void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,
- int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in
+ int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch
)
{
int c;
@@ -273,7 +274,8 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b
}
}
#endif
- clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
+ clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window,
+ overlap, celt_mode->maxLM-LM, 1, arch);
if (upsample != 1)
{
int bound = len;
@@ -427,6 +429,7 @@ static int opus_multistream_encoder_init_impl(
(streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
return OPUS_BAD_ARG;
+ st->arch = opus_select_arch();
st->layout.nb_channels = channels;
st->layout.nb_streams = streams;
st->layout.nb_coupled_streams = coupled_streams;
@@ -783,7 +786,7 @@ static int opus_multistream_encode_native
ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
if (st->surround)
{
- surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);
+ surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch);
}
/* Compute bitrate allocation between streams (this could be a lot better) */