Upgrade Opus to v1.1.4android-o-preview-1 android-n-mr2-preview-2 o-preview

Test: - verified build for arm*/mips*/x86* - checked functionality using an emulator and stagefright Change-Id: Iab6e7315c51e020dc986d57c89934a1205ec7a61
author: Felicia Lim <flim@google.com> 2017-01-25 08:49:31 -0800
committer: Felicia Lim <flim@google.com> 2017-01-25 08:57:38 -0800
commit: 0a1406acbe87c63044e9da7e0ab41bcbfa704f3d (patch)
tree: 6dfda4da354e420a9ac2e256f168609e7d97571a /celt
parent: e65278181df6dea0ac1dde71f2534d66816119d2 (diff)
download: libopus-0a1406acbe87c63044e9da7e0ab41bcbfa704f3d.tar.gz
7 files changed, 731 insertions, 552 deletions
diff --git a/celt/arm/celt_pitch_xcorr_arm-gnu.S_gnu.s b/celt/arm/celt_pitch_xcorr_arm-gnu.S_gnu.s
deleted file mode 100644
index b62c5207..00000000
--- a/celt/arm/celt_pitch_xcorr_arm-gnu.S_gnu.s
+++ /dev/null
@@ -1,552 +0,0 @@
-    .syntax unified
-    .syntax unified
-,: Copyright (c) 2007-2008 CSIRO
-,: Copyright (c) 2007-2009 Xiph.Org Foundation
-,: Copyright (c) 2013      Parrot
-,: Written by Aurélien Zanelli
-,:
-,: Redistribution and use in source and binary forms, with or without
-,: modification, are permitted provided that the following conditions
-,: are met:
-,:
-,: - Redistributions of source code must retain the above copyright
-,: notice, this list of conditions and the following disclaimer.
-,:
-,: - Redistributions in binary form must reproduce the above copyright
-,: notice, this list of conditions and the following disclaimer in the
-,: documentation and/or other materials provided with the distribution.
-,:
-,: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-,: ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-,: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-,: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-,: OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-,: EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-,: PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES@ LOSS OF USE, @ DATA, OR
-,: PROFITS@ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-,: LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-,: NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-,: SOFTWARE, EVEN  .if ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    .text@   .p2align 2;   .arch armv7-a
-   .fpu neon
-   .object_arch armv4t
-
-  .include "celt/arm/armopts_gnu.s"
-
- .if OPUS_ARM_MAY_HAVE_EDSP
-  .global celt_pitch_xcorr_edsp
- .endif
-
- .if OPUS_ARM_MAY_HAVE_NEON
-  .global celt_pitch_xcorr_neon
- .endif
-
- .if OPUS_ARM_MAY_HAVE_NEON
-
-,: Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
-@ xcorr_kernel_neon: @ PROC
-xcorr_kernel_neon_start::
-  ,: input:
-  ,:   r3     = int         len
-  ,:   r4     = opus_val16 *x
-  ,:   r5     = opus_val16 *y
-  ,:   q0     = opus_val32  sum[4]
-  ,: output:
-  ,:   q0     = opus_val32  sum[4]
-  ,: preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
-  ,: internal usage:
-  ,:   r12 = int j
-  ,:   d3  = y_3|y_2|y_1|y_0
-  ,:   q2  = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
-  ,:   q3  = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
-  ,:   q8  = scratch
-  ,:
-  ,: Load y[0...3]
-  ,: This requires len>0 to always be valid (which we assert in the C code).
-  VLD1.16      {d5}, [r5]!
-  SUBS         r12, r3, #8
-  BLE xcorr_kernel_neon_process4
-,: Process 8 samples at a time.
-,: This loop loads one y value more than we actually need. Therefore we have to
-,: stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
-,: reading past the end of the array.
-xcorr_kernel_neon_process8::
-  ,: This loop has 19 total instructions (10 cycles to issue, minimum), with
-  ,: - 2 cycles of ARM insrtuctions,
-  ,: - 10 cycles of load/store/byte permute instructions, and
-  ,: - 9 cycles of data processing instructions.
-  ,: On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
-  ,: latter two categories, meaning the whole loop should run in 10 cycles per
-  ,: iteration, barring cache misses.
-  ,:
-  ,: Load x[0...7]
-  VLD1.16      {d6, d7}, [r4]!
-  ,: Unlike VMOV, VAND is a data processsing instruction (and doesn't get
-  ,: assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
-  VAND         d3, d5, d5
-  SUBS         r12, r12, #8
-  ,: Load y[4...11]
-  VLD1.16      {d4, d5}, [r5]!
-  VMLAL.S16    q0, d3, d6[0]
-  VEXT.16      d16, d3, d4, #1
-  VMLAL.S16    q0, d4, d7[0]
-  VEXT.16      d17, d4, d5, #1
-  VMLAL.S16    q0, d16, d6[1]
-  VEXT.16      d16, d3, d4, #2
-  VMLAL.S16    q0, d17, d7[1]
-  VEXT.16      d17, d4, d5, #2
-  VMLAL.S16    q0, d16, d6[2]
-  VEXT.16      d16, d3, d4, #3
-  VMLAL.S16    q0, d17, d7[2]
-  VEXT.16      d17, d4, d5, #3
-  VMLAL.S16    q0, d16, d6[3]
-  VMLAL.S16    q0, d17, d7[3]
-  BGT xcorr_kernel_neon_process8
-,: Process 4 samples here if we have > 4 left (still reading one extra y value).
-xcorr_kernel_neon_process4::
-  ADDS         r12, r12, #4
-  BLE xcorr_kernel_neon_process2
-  ,: Load x[0...3]
-  VLD1.16      d6, [r4]!
-  ,: Use VAND since it's a data processing instruction again.
-  VAND         d4, d5, d5
-  SUB          r12, r12, #4
-  ,: Load y[4...7]
-  VLD1.16      d5, [r5]!
-  VMLAL.S16    q0, d4, d6[0]
-  VEXT.16      d16, d4, d5, #1
-  VMLAL.S16    q0, d16, d6[1]
-  VEXT.16      d16, d4, d5, #2
-  VMLAL.S16    q0, d16, d6[2]
-  VEXT.16      d16, d4, d5, #3
-  VMLAL.S16    q0, d16, d6[3]
-,: Process 2 samples here if we have > 2 left (still reading one extra y value).
-xcorr_kernel_neon_process2::
-  ADDS         r12, r12, #2
-  BLE xcorr_kernel_neon_process1
-  ,: Load x[0...1]
-  VLD2.16      {d6[],d7[]}, [r4]!
-  ,: Use VAND since it's a data processing instruction again.
-  VAND         d4, d5, d5
-  SUB          r12, r12, #2
-  ,: Load y[4...5]
-  VLD1.32      {d5[]}, [r5]!
-  VMLAL.S16    q0, d4, d6
-  VEXT.16      d16, d4, d5, #1
-  ,: Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
-  ,: instead of VEXT, since it's a data-processing instruction.
-  VSRI.64      d5, d4, #32
-  VMLAL.S16    q0, d16, d7
-,: Process 1 sample using the extra y value we loaded above.
-xcorr_kernel_neon_process1::
-  ,: Load next *x
-  VLD1.16      {d6[]}, [r4]!
-  ADDS         r12, r12, #1
-  ,: y[0...3] are left in d5 from prior iteration(s) (if any)
-  VMLAL.S16    q0, d5, d6
-  MOVLE        pc, lr
-,: Now process 1 last sample, not reading ahead.
-  ,: Load last *y
-  VLD1.16      {d4[]}, [r5]!
-  VSRI.64      d4, d5, #16
-  ,: Load last *x
-  VLD1.16      {d6[]}, [r4]!
-  VMLAL.S16    q0, d4, d6
-  MOV          pc, lr
-	.size xcorr_kernel_neon, .-xcorr_kernel_neon  ,: @ ENDP
-
-,: opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
-,:  opus_val32 *xcorr, int len, int max_pitch)
-@ celt_pitch_xcorr_neon: @ PROC
-  ,: input:
-  ,:   r0  = opus_val16 *_x
-  ,:   r1  = opus_val16 *_y
-  ,:   r2  = opus_val32 *xcorr
-  ,:   r3  = int         len
-  ,: output:
-  ,:   r0  = int         maxcorr
-  ,: internal usage:
-  ,:   r4  = opus_val16 *x (for xcorr_kernel_neon())
-  ,:   r5  = opus_val16 *y (for xcorr_kernel_neon())
-  ,:   r6  = int         max_pitch
-  ,:   r12 = int         j
-  ,:   q15 = int         maxcorr[4] (q15 is not used by xcorr_kernel_neon())
-  STMFD        sp!, {r4-r6, lr}
-  LDR          r6, [sp, #16]
-  VMOV.S32     q15, #1
-  ,: if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
-  SUBS         r6, r6, #4
-  BLT celt_pitch_xcorr_neon_process4_done
-celt_pitch_xcorr_neon_process4::
-  ,: xcorr_kernel_neon parameters:
-  ,: r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
-  MOV          r4, r0
-  MOV          r5, r1
-  VEOR         q0, q0, q0
-  ,: xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
-  ,: So we don't save/restore any other registers.
-  BL xcorr_kernel_neon_start
-  SUBS         r6, r6, #4
-  VST1.32      {q0}, [r2]!
-  ,: _y += 4
-  ADD          r1, r1, #8
-  VMAX.S32     q15, q15, q0
-  ,: if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
-  BGE celt_pitch_xcorr_neon_process4
-,: We have less than 4 sums left to compute.
-celt_pitch_xcorr_neon_process4_done::
-  ADDS         r6, r6, #4
-  ,: Reduce maxcorr to a single value
-  VMAX.S32     d30, d30, d31
-  VPMAX.S32    d30, d30, d30
-  ,: if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
-  BLE celt_pitch_xcorr_neon_done
-,: Now compute each remaining sum one at a time.
-celt_pitch_xcorr_neon_process_remaining::
-  MOV          r4, r0
-  MOV          r5, r1
-  VMOV.I32     q0, #0
-  SUBS         r12, r3, #8
-  BLT celt_pitch_xcorr_neon_process_remaining4
-,: Sum terms 8 at a time.
-celt_pitch_xcorr_neon_process_remaining_loop8::
-  ,: Load x[0...7]
-  VLD1.16      {q1}, [r4]!
-  ,: Load y[0...7]
-  VLD1.16      {q2}, [r5]!
-  SUBS         r12, r12, #8
-  VMLAL.S16    q0, d4, d2
-  VMLAL.S16    q0, d5, d3
-  BGE celt_pitch_xcorr_neon_process_remaining_loop8
-,: Sum terms 4 at a time.
-celt_pitch_xcorr_neon_process_remaining4::
-  ADDS         r12, r12, #4
-  BLT celt_pitch_xcorr_neon_process_remaining4_done
-  ,: Load x[0...3]
-  VLD1.16      {d2}, [r4]!
-  ,: Load y[0...3]
-  VLD1.16      {d3}, [r5]!
-  SUB          r12, r12, #4
-  VMLAL.S16    q0, d3, d2
-celt_pitch_xcorr_neon_process_remaining4_done::
-  ,: Reduce the sum to a single value.
-  VADD.S32     d0, d0, d1
-  VPADDL.S32   d0, d0
-  ADDS         r12, r12, #4
-  BLE celt_pitch_xcorr_neon_process_remaining_loop_done
-,: Sum terms 1 at a time.
-celt_pitch_xcorr_neon_process_remaining_loop1::
-  VLD1.16      {d2[]}, [r4]!
-  VLD1.16      {d3[]}, [r5]!
-  SUBS         r12, r12, #1
-  VMLAL.S16    q0, d2, d3
-  BGT celt_pitch_xcorr_neon_process_remaining_loop1
-celt_pitch_xcorr_neon_process_remaining_loop_done::
-  VST1.32      {d0[0]}, [r2]!
-  VMAX.S32     d30, d30, d0
-  SUBS         r6, r6, #1
-  ,: _y++
-  ADD          r1, r1, #2
-  ,: if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
-  BGT celt_pitch_xcorr_neon_process_remaining
-celt_pitch_xcorr_neon_done::
-  VMOV.32      r0, d30[0]
-  LDMFD        sp!, {r4-r6, pc}
-	.size celt_pitch_xcorr_neon, .-celt_pitch_xcorr_neon  ,: @ ENDP
-
- .endif
-
- .if OPUS_ARM_MAY_HAVE_EDSP
-
-,: This will get used on ARMv7 devices without NEON, so it has been optimized
-,: to take advantage of dual-issuing where possible.
-@ xcorr_kernel_edsp: @ PROC
-xcorr_kernel_edsp_start::
-  ,: input:
-  ,:   r3      = int         len
-  ,:   r4      = opus_val16 *_x (must be 32-bit aligned)
-  ,:   r5      = opus_val16 *_y (must be 32-bit aligned)
-  ,:   r6...r9 = opus_val32  sum[4]
-  ,: output:
-  ,:   r6...r9 = opus_val32  sum[4]
-  ,: preserved: r0-r5
-  ,: internal usage
-  ,:   r2      = int         j
-  ,:   r12,r14 = opus_val16  x[4]
-  ,:   r10,r11 = opus_val16  y[4]
-  STMFD        sp!, {r2,r4,r5,lr}
-  LDR          r10, [r5], #4      ,: Load y[0...1]
-  SUBS         r2, r3, #4         ,: j = len-4
-  LDR          r11, [r5], #4      ,: Load y[2...3]
-  BLE xcorr_kernel_edsp_process4_done
-  LDR          r12, [r4], #4      ,: Load x[0...1]
-  ,: Stall
-xcorr_kernel_edsp_process4::
-  ,: The multiplies must issue from pipeline 0, and can't dual-issue with each
-  ,: other. Every other instruction here dual-issues with a multiply, and is
-  ,: thus "free". There should be no stalls in the body of the loop.
-  SMLABB       r6, r12, r10, r6   ,: sum[0] = MAC16_16(sum[0],x_0,y_0)
-  LDR          r14, [r4], #4      ,: Load x[2...3]
-  SMLABT       r7, r12, r10, r7   ,: sum[1] = MAC16_16(sum[1],x_0,y_1)
-  SUBS         r2, r2, #4         ,: j-=4
-  SMLABB       r8, r12, r11, r8   ,: sum[2] = MAC16_16(sum[2],x_0,y_2)
-  SMLABT       r9, r12, r11, r9   ,: sum[3] = MAC16_16(sum[3],x_0,y_3)
-  SMLATT       r6, r12, r10, r6   ,: sum[0] = MAC16_16(sum[0],x_1,y_1)
-  LDR          r10, [r5], #4      ,: Load y[4...5]
-  SMLATB       r7, r12, r11, r7   ,: sum[1] = MAC16_16(sum[1],x_1,y_2)
-  SMLATT       r8, r12, r11, r8   ,: sum[2] = MAC16_16(sum[2],x_1,y_3)
-  SMLATB       r9, r12, r10, r9   ,: sum[3] = MAC16_16(sum[3],x_1,y_4)
-  LDRGT        r12, [r4], #4      ,: Load x[0...1]
-  SMLABB       r6, r14, r11, r6   ,: sum[0] = MAC16_16(sum[0],x_2,y_2)
-  SMLABT       r7, r14, r11, r7   ,: sum[1] = MAC16_16(sum[1],x_2,y_3)
-  SMLABB       r8, r14, r10, r8   ,: sum[2] = MAC16_16(sum[2],x_2,y_4)
-  SMLABT       r9, r14, r10, r9   ,: sum[3] = MAC16_16(sum[3],x_2,y_5)
-  SMLATT       r6, r14, r11, r6   ,: sum[0] = MAC16_16(sum[0],x_3,y_3)
-  LDR          r11, [r5], #4      ,: Load y[6...7]
-  SMLATB       r7, r14, r10, r7   ,: sum[1] = MAC16_16(sum[1],x_3,y_4)
-  SMLATT       r8, r14, r10, r8   ,: sum[2] = MAC16_16(sum[2],x_3,y_5)
-  SMLATB       r9, r14, r11, r9   ,: sum[3] = MAC16_16(sum[3],x_3,y_6)
-  BGT xcorr_kernel_edsp_process4
-xcorr_kernel_edsp_process4_done::
-  ADDS         r2, r2, #4
-  BLE xcorr_kernel_edsp_done
-  LDRH         r12, [r4], #2      ,: r12 = *x++
-  SUBS         r2, r2, #1         ,: j--
-  ,: Stall
-  SMLABB       r6, r12, r10, r6   ,: sum[0] = MAC16_16(sum[0],x,y_0)
-  LDRHGT       r14, [r4], #2      ,: r14 = *x++
-  SMLABT       r7, r12, r10, r7   ,: sum[1] = MAC16_16(sum[1],x,y_1)
-  SMLABB       r8, r12, r11, r8   ,: sum[2] = MAC16_16(sum[2],x,y_2)
-  SMLABT       r9, r12, r11, r9   ,: sum[3] = MAC16_16(sum[3],x,y_3)
-  BLE xcorr_kernel_edsp_done
-  SMLABT       r6, r14, r10, r6   ,: sum[0] = MAC16_16(sum[0],x,y_1)
-  SUBS         r2, r2, #1         ,: j--
-  SMLABB       r7, r14, r11, r7   ,: sum[1] = MAC16_16(sum[1],x,y_2)
-  LDRH         r10, [r5], #2      ,: r10 = y_4 = *y++
-  SMLABT       r8, r14, r11, r8   ,: sum[2] = MAC16_16(sum[2],x,y_3)
-  LDRHGT       r12, [r4], #2      ,: r12 = *x++
-  SMLABB       r9, r14, r10, r9   ,: sum[3] = MAC16_16(sum[3],x,y_4)
-  BLE xcorr_kernel_edsp_done
-  SMLABB       r6, r12, r11, r6   ,: sum[0] = MAC16_16(sum[0],tmp,y_2)
-  CMP          r2, #1             ,: j--
-  SMLABT       r7, r12, r11, r7   ,: sum[1] = MAC16_16(sum[1],tmp,y_3)
-  LDRH         r2, [r5], #2       ,: r2 = y_5 = *y++
-  SMLABB       r8, r12, r10, r8   ,: sum[2] = MAC16_16(sum[2],tmp,y_4)
-  LDRHGT       r14, [r4]          ,: r14 = *x
-  SMLABB       r9, r12, r2, r9    ,: sum[3] = MAC16_16(sum[3],tmp,y_5)
-  BLE xcorr_kernel_edsp_done
-  SMLABT       r6, r14, r11, r6   ,: sum[0] = MAC16_16(sum[0],tmp,y_3)
-  LDRH         r11, [r5]          ,: r11 = y_6 = *y
-  SMLABB       r7, r14, r10, r7   ,: sum[1] = MAC16_16(sum[1],tmp,y_4)
-  SMLABB       r8, r14, r2, r8    ,: sum[2] = MAC16_16(sum[2],tmp,y_5)
-  SMLABB       r9, r14, r11, r9   ,: sum[3] = MAC16_16(sum[3],tmp,y_6)
-xcorr_kernel_edsp_done::
-  LDMFD        sp!, {r2,r4,r5,pc}
-	.size xcorr_kernel_edsp, .-xcorr_kernel_edsp  ,: @ ENDP
-
-@ celt_pitch_xcorr_edsp: @ PROC
-  ,: input:
-  ,:   r0  = opus_val16 *_x (must be 32-bit aligned)
-  ,:   r1  = opus_val16 *_y (only needs to be 16-bit aligned)
-  ,:   r2  = opus_val32 *xcorr
-  ,:   r3  = int         len
-  ,: output:
-  ,:   r0  = maxcorr
-  ,: internal usage
-  ,:   r4  = opus_val16 *x
-  ,:   r5  = opus_val16 *y
-  ,:   r6  = opus_val32  sum0
-  ,:   r7  = opus_val32  sum1
-  ,:   r8  = opus_val32  sum2
-  ,:   r9  = opus_val32  sum3
-  ,:   r1  = int         max_pitch
-  ,:   r12 = int         j
-  STMFD        sp!, {r4-r11, lr}
-  MOV          r5, r1
-  LDR          r1, [sp, #36]
-  MOV          r4, r0
-  TST          r5, #3
-  ,: maxcorr = 1
-  MOV          r0, #1
-  BEQ          celt_pitch_xcorr_edsp_process1u_done
-,: Compute one sum at the start to make y 32-bit aligned.
-  SUBS         r12, r3, #4
-  ,: r14 = sum = 0
-  MOV          r14, #0
-  LDRH         r8, [r5], #2
-  BLE celt_pitch_xcorr_edsp_process1u_loop4_done
-  LDR          r6, [r4], #4
-  MOV          r8, r8, LSL #16
-celt_pitch_xcorr_edsp_process1u_loop4::
-  LDR          r9, [r5], #4
-  SMLABT       r14, r6, r8, r14     ,: sum = MAC16_16(sum, x_0, y_0)
-  LDR          r7, [r4], #4
-  SMLATB       r14, r6, r9, r14     ,: sum = MAC16_16(sum, x_1, y_1)
-  LDR          r8, [r5], #4
-  SMLABT       r14, r7, r9, r14     ,: sum = MAC16_16(sum, x_2, y_2)
-  SUBS         r12, r12, #4         ,: j-=4
-  SMLATB       r14, r7, r8, r14     ,: sum = MAC16_16(sum, x_3, y_3)
-  LDRGT        r6, [r4], #4
-  BGT celt_pitch_xcorr_edsp_process1u_loop4
-  MOV          r8, r8, LSR #16
-celt_pitch_xcorr_edsp_process1u_loop4_done::
-  ADDS         r12, r12, #4
-celt_pitch_xcorr_edsp_process1u_loop1::
-  LDRHGE       r6, [r4], #2
-  ,: Stall
-  SMLABBGE     r14, r6, r8, r14    ,: sum = MAC16_16(sum, *x, *y)
-  SUBSGE       r12, r12, #1
-  LDRHGT       r8, [r5], #2
-  BGT celt_pitch_xcorr_edsp_process1u_loop1
-  ,: Restore _x
-  SUB          r4, r4, r3, LSL #1
-  ,: Restore and advance _y
-  SUB          r5, r5, r3, LSL #1
-  ,: maxcorr = max(maxcorr, sum)
-  CMP          r0, r14
-  ADD          r5, r5, #2
-  MOVLT        r0, r14
-  SUBS         r1, r1, #1
-  ,: xcorr[i] = sum
-  STR          r14, [r2], #4
-  BLE celt_pitch_xcorr_edsp_done
-celt_pitch_xcorr_edsp_process1u_done::
-  ,: if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
-  SUBS         r1, r1, #4
-  BLT celt_pitch_xcorr_edsp_process2
-celt_pitch_xcorr_edsp_process4::
-  ,: xcorr_kernel_edsp parameters:
-  ,: r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
-  MOV          r6, #0
-  MOV          r7, #0
-  MOV          r8, #0
-  MOV          r9, #0
-  BL xcorr_kernel_edsp_start  ,: xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
-  ,: maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
-  CMP          r0, r6
-  ,: _y+=4
-  ADD          r5, r5, #8
-  MOVLT        r0, r6
-  CMP          r0, r7
-  MOVLT        r0, r7
-  CMP          r0, r8
-  MOVLT        r0, r8
-  CMP          r0, r9
-  MOVLT        r0, r9
-  STMIA        r2!, {r6-r9}
-  SUBS         r1, r1, #4
-  BGE celt_pitch_xcorr_edsp_process4
-celt_pitch_xcorr_edsp_process2::
-  ADDS         r1, r1, #2
-  BLT celt_pitch_xcorr_edsp_process1a
-  SUBS         r12, r3, #4
-  ,: {r10, r11} = {sum0, sum1} = {0, 0}
-  MOV          r10, #0
-  MOV          r11, #0
-  LDR          r8, [r5], #4
-  BLE celt_pitch_xcorr_edsp_process2_loop_done
-  LDR          r6, [r4], #4
-  LDR          r9, [r5], #4
-celt_pitch_xcorr_edsp_process2_loop4::
-  SMLABB       r10, r6, r8, r10     ,: sum0 = MAC16_16(sum0, x_0, y_0)
-  LDR          r7, [r4], #4
-  SMLABT       r11, r6, r8, r11     ,: sum1 = MAC16_16(sum1, x_0, y_1)
-  SUBS         r12, r12, #4         ,: j-=4
-  SMLATT       r10, r6, r8, r10     ,: sum0 = MAC16_16(sum0, x_1, y_1)
-  LDR          r8, [r5], #4
-  SMLATB       r11, r6, r9, r11     ,: sum1 = MAC16_16(sum1, x_1, y_2)
-  LDRGT        r6, [r4], #4
-  SMLABB       r10, r7, r9, r10     ,: sum0 = MAC16_16(sum0, x_2, y_2)
-  SMLABT       r11, r7, r9, r11     ,: sum1 = MAC16_16(sum1, x_2, y_3)
-  SMLATT       r10, r7, r9, r10     ,: sum0 = MAC16_16(sum0, x_3, y_3)
-  LDRGT        r9, [r5], #4
-  SMLATB       r11, r7, r8, r11     ,: sum1 = MAC16_16(sum1, x_3, y_4)
-  BGT celt_pitch_xcorr_edsp_process2_loop4
-celt_pitch_xcorr_edsp_process2_loop_done::
-  ADDS         r12, r12, #2
-  BLE  celt_pitch_xcorr_edsp_process2_1
-  LDR          r6, [r4], #4
-  ,: Stall
-  SMLABB       r10, r6, r8, r10     ,: sum0 = MAC16_16(sum0, x_0, y_0)
-  LDR          r9, [r5], #4
-  SMLABT       r11, r6, r8, r11     ,: sum1 = MAC16_16(sum1, x_0, y_1)
-  SUB          r12, r12, #2
-  SMLATT       r10, r6, r8, r10     ,: sum0 = MAC16_16(sum0, x_1, y_1)
-  MOV          r8, r9
-  SMLATB       r11, r6, r9, r11     ,: sum1 = MAC16_16(sum1, x_1, y_2)
-celt_pitch_xcorr_edsp_process2_1::
-  LDRH         r6, [r4], #2
-  ADDS         r12, r12, #1
-  ,: Stall
-  SMLABB       r10, r6, r8, r10     ,: sum0 = MAC16_16(sum0, x_0, y_0)
-  LDRHGT       r7, [r4], #2
-  SMLABT       r11, r6, r8, r11     ,: sum1 = MAC16_16(sum1, x_0, y_1)
-  BLE celt_pitch_xcorr_edsp_process2_done
-  LDRH         r9, [r5], #2
-  SMLABT       r10, r7, r8, r10     ,: sum0 = MAC16_16(sum0, x_0, y_1)
-  SMLABB       r11, r7, r9, r11     ,: sum1 = MAC16_16(sum1, x_0, y_2)
-celt_pitch_xcorr_edsp_process2_done::
-  ,: Restore _x
-  SUB          r4, r4, r3, LSL #1
-  ,: Restore and advance _y
-  SUB          r5, r5, r3, LSL #1
-  ,: maxcorr = max(maxcorr, sum0)
-  CMP          r0, r10
-  ADD          r5, r5, #2
-  MOVLT        r0, r10
-  SUB          r1, r1, #2
-  ,: maxcorr = max(maxcorr, sum1)
-  CMP          r0, r11
-  ,: xcorr[i] = sum
-  STR          r10, [r2], #4
-  MOVLT        r0, r11
-  STR          r11, [r2], #4
-celt_pitch_xcorr_edsp_process1a::
-  ADDS         r1, r1, #1
-  BLT celt_pitch_xcorr_edsp_done
-  SUBS         r12, r3, #4
-  ,: r14 = sum = 0
-  MOV          r14, #0
-  BLT celt_pitch_xcorr_edsp_process1a_loop_done
-  LDR          r6, [r4], #4
-  LDR          r8, [r5], #4
-  LDR          r7, [r4], #4
-  LDR          r9, [r5], #4
-celt_pitch_xcorr_edsp_process1a_loop4::
-  SMLABB       r14, r6, r8, r14     ,: sum = MAC16_16(sum, x_0, y_0)
-  SUBS         r12, r12, #4         ,: j-=4
-  SMLATT       r14, r6, r8, r14     ,: sum = MAC16_16(sum, x_1, y_1)
-  LDRGE        r6, [r4], #4
-  SMLABB       r14, r7, r9, r14     ,: sum = MAC16_16(sum, x_2, y_2)
-  LDRGE        r8, [r5], #4
-  SMLATT       r14, r7, r9, r14     ,: sum = MAC16_16(sum, x_3, y_3)
-  LDRGE        r7, [r4], #4
-  LDRGE        r9, [r5], #4
-  BGE celt_pitch_xcorr_edsp_process1a_loop4
-celt_pitch_xcorr_edsp_process1a_loop_done::
-  ADDS         r12, r12, #2
-  LDRGE        r6, [r4], #4
-  LDRGE        r8, [r5], #4
-  ,: Stall
-  SMLABBGE     r14, r6, r8, r14     ,: sum = MAC16_16(sum, x_0, y_0)
-  SUBGE        r12, r12, #2
-  SMLATTGE     r14, r6, r8, r14     ,: sum = MAC16_16(sum, x_1, y_1)
-  ADDS         r12, r12, #1
-  LDRHGE       r6, [r4], #2
-  LDRHGE       r8, [r5], #2
-  ,: Stall
-  SMLABBGE     r14, r6, r8, r14     ,: sum = MAC16_16(sum, *x, *y)
-  ,: maxcorr = max(maxcorr, sum)
-  CMP          r0, r14
-  ,: xcorr[i] = sum
-  STR          r14, [r2], #4
-  MOVLT        r0, r14
-celt_pitch_xcorr_edsp_done::
-  LDMFD        sp!, {r4-r11, pc}
-	.size celt_pitch_xcorr_edsp, .-celt_pitch_xcorr_edsp  ,: @ ENDP
-
- .endif
-
-,: @ END:
-    .section	.note.GNU-stack,"",%progbits
diff --git a/celt/dump_modes/Makefile b/celt/dump_modes/Makefile
new file mode 100644
index 00000000..93f599fb
--- /dev/null
+++ b/celt/dump_modes/Makefile
@@ -0,0 +1,32 @@
+
+CFLAGS=-O2 -Wall -Wextra -DHAVE_CONFIG_H
+INCLUDES=-I. -I../ -I../.. -I../../include
+
+SOURCES = dump_modes.c \
+          ../modes.c \
+          ../cwrs.c \
+          ../rate.c \
+          ../entcode.c \
+          ../entenc.c \
+          ../entdec.c \
+          ../mathops.c \
+          ../mdct.c \
+          ../kiss_fft.c
+
+ifdef HAVE_ARM_NE10
+CC = gcc
+CFLAGS += -mfpu=neon
+INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_PRESUME_NEON_INTR
+LIBS = -L$(NE10_LIBDIR) -lNE10
+SOURCES += ../arm/celt_ne10_fft.c \
+           dump_modes_arm_ne10.c \
+           ../arm/armcpu.c
+endif
+
+all: dump_modes
+
+dump_modes:
+	$(PREFIX)$(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES $(SOURCES) -o $@ $(LIBS) -lm
+
+clean:
+	rm -f dump_modes
diff --git a/celt/dump_modes/dump_modes.c b/celt/dump_modes/dump_modes.c
new file mode 100644
index 00000000..9105a534
--- /dev/null
+++ b/celt/dump_modes/dump_modes.c
@@ -0,0 +1,353 @@
+/* Copyright (c) 2008 CSIRO
+   Copyright (c) 2008-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "modes.h"
+#include "celt.h"
+#include "rate.h"
+#include "dump_modes_arch.h"
+
+#define INT16 "%d"
+#define INT32 "%d"
+#define FLOAT "%#0.8gf"
+
+#ifdef FIXED_POINT
+#define WORD16 INT16
+#define WORD32 INT32
+#else
+#define WORD16 FLOAT
+#define WORD32 FLOAT
+#endif
+
+void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
+{
+   int i, j, k;
+   int mdct_twiddles_size;
+   fprintf(file, "/* The contents of this file was automatically generated by dump_modes.c\n");
+   fprintf(file, "   with arguments:");
+   for (i=0;i<nb_modes;i++)
+   {
+      CELTMode *mode = modes[i];
+      fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts);
+   }
+   fprintf(file, "\n   It contains static definitions for some pre-defined modes. */\n");
+   fprintf(file, "#include \"modes.h\"\n");
+   fprintf(file, "#include \"rate.h\"\n");
+   fprintf(file, "\n#ifdef HAVE_ARM_NE10\n");
+   fprintf(file, "#define OVERRIDE_FFT 1\n");
+   fprintf(file, "#include \"%s\"\n", ARM_NE10_ARCH_FILE_NAME);
+   fprintf(file, "#endif\n");
+
+   fprintf(file, "\n");
+
+   for (i=0;i<nb_modes;i++)
+   {
+      CELTMode *mode = modes[i];
+      int mdctSize;
+      int standard, framerate;
+
+      mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
+      standard = (mode->Fs == 400*(opus_int32)mode->shortMdctSize);
+      framerate = mode->Fs/mode->shortMdctSize;
+
+      if (!standard)
+      {
+         fprintf(file, "#ifndef DEF_EBANDS%d_%d\n", mode->Fs, mdctSize);
+         fprintf(file, "#define DEF_EBANDS%d_%d\n", mode->Fs, mdctSize);
+         fprintf (file, "static const opus_int16 eBands%d_%d[%d] = {\n", mode->Fs, mdctSize, mode->nbEBands+2);
+         for (j=0;j<mode->nbEBands+2;j++)
+            fprintf (file, "%d, ", mode->eBands[j]);
+         fprintf (file, "};\n");
+         fprintf(file, "#endif\n");
+         fprintf(file, "\n");
+      }
+
+      fprintf(file, "#ifndef DEF_WINDOW%d\n", mode->overlap);
+      fprintf(file, "#define DEF_WINDOW%d\n", mode->overlap);
+      fprintf (file, "static const opus_val16 window%d[%d] = {\n", mode->overlap, mode->overlap);
+      for (j=0;j<mode->overlap;j++)
+         fprintf (file, WORD16 ",%c", mode->window[j],(j+6)%5==0?'\n':' ');
+      fprintf (file, "};\n");
+      fprintf(file, "#endif\n");
+      fprintf(file, "\n");
+
+      if (!standard)
+      {
+         fprintf(file, "#ifndef DEF_ALLOC_VECTORS%d_%d\n", mode->Fs, mdctSize);
+         fprintf(file, "#define DEF_ALLOC_VECTORS%d_%d\n", mode->Fs, mdctSize);
+         fprintf (file, "static const unsigned char allocVectors%d_%d[%d] = {\n", mode->Fs, mdctSize, mode->nbEBands*mode->nbAllocVectors);
+         for (j=0;j<mode->nbAllocVectors;j++)
+         {
+            for (k=0;k<mode->nbEBands;k++)
+               fprintf (file, "%2d, ", mode->allocVectors[j*mode->nbEBands+k]);
+            fprintf (file, "\n");
+         }
+         fprintf (file, "};\n");
+         fprintf(file, "#endif\n");
+         fprintf(file, "\n");
+      }
+
+      fprintf(file, "#ifndef DEF_LOGN%d\n", framerate);
+      fprintf(file, "#define DEF_LOGN%d\n", framerate);
+      fprintf (file, "static const opus_int16 logN%d[%d] = {\n", framerate, mode->nbEBands);
+      for (j=0;j<mode->nbEBands;j++)
+         fprintf (file, "%d, ", mode->logN[j]);
+      fprintf (file, "};\n");
+      fprintf(file, "#endif\n");
+      fprintf(file, "\n");
+
+      /* Pulse cache */
+      fprintf(file, "#ifndef DEF_PULSE_CACHE%d\n", mode->Fs/mdctSize);
+      fprintf(file, "#define DEF_PULSE_CACHE%d\n", mode->Fs/mdctSize);
+      fprintf (file, "static const opus_int16 cache_index%d[%d] = {\n", mode->Fs/mdctSize, (mode->maxLM+2)*mode->nbEBands);
+      for (j=0;j<mode->nbEBands*(mode->maxLM+2);j++)
+         fprintf (file, "%d,%c", mode->cache.index[j],(j+16)%15==0?'\n':' ');
+      fprintf (file, "};\n");
+      fprintf (file, "static const unsigned char cache_bits%d[%d] = {\n", mode->Fs/mdctSize, mode->cache.size);
+      for (j=0;j<mode->cache.size;j++)
+         fprintf (file, "%d,%c", mode->cache.bits[j],(j+16)%15==0?'\n':' ');
+      fprintf (file, "};\n");
+      fprintf (file, "static const unsigned char cache_caps%d[%d] = {\n", mode->Fs/mdctSize, (mode->maxLM+1)*2*mode->nbEBands);
+      for (j=0;j<(mode->maxLM+1)*2*mode->nbEBands;j++)
+         fprintf (file, "%d,%c", mode->cache.caps[j],(j+16)%15==0?'\n':' ');
+      fprintf (file, "};\n");
+
+      fprintf(file, "#endif\n");
+      fprintf(file, "\n");
+
+      /* FFT twiddles */
+      fprintf(file, "#ifndef FFT_TWIDDLES%d_%d\n", mode->Fs, mdctSize);
+      fprintf(file, "#define FFT_TWIDDLES%d_%d\n", mode->Fs, mdctSize);
+      fprintf (file, "static const kiss_twiddle_cpx fft_twiddles%d_%d[%d] = {\n",
+            mode->Fs, mdctSize, mode->mdct.kfft[0]->nfft);
+      for (j=0;j<mode->mdct.kfft[0]->nfft;j++)
+         fprintf (file, "{" WORD16 ", " WORD16 "},%c", mode->mdct.kfft[0]->twiddles[j].r, mode->mdct.kfft[0]->twiddles[j].i,(j+3)%2==0?'\n':' ');
+      fprintf (file, "};\n");
+
+#ifdef OVERRIDE_FFT
+      dump_mode_arch(mode);
+#endif
+      /* FFT Bitrev tables */
+      for (k=0;k<=mode->mdct.maxshift;k++)
+      {
+         fprintf(file, "#ifndef FFT_BITREV%d\n", mode->mdct.kfft[k]->nfft);
+         fprintf(file, "#define FFT_BITREV%d\n", mode->mdct.kfft[k]->nfft);
+         fprintf (file, "static const opus_int16 fft_bitrev%d[%d] = {\n",
+               mode->mdct.kfft[k]->nfft, mode->mdct.kfft[k]->nfft);
+         for (j=0;j<mode->mdct.kfft[k]->nfft;j++)
+            fprintf (file, "%d,%c", mode->mdct.kfft[k]->bitrev[j],(j+16)%15==0?'\n':' ');
+         fprintf (file, "};\n");
+
+         fprintf(file, "#endif\n");
+         fprintf(file, "\n");
+      }
+
+      /* FFT States */
+      for (k=0;k<=mode->mdct.maxshift;k++)
+      {
+         fprintf(file, "#ifndef FFT_STATE%d_%d_%d\n", mode->Fs, mdctSize, k);
+         fprintf(file, "#define FFT_STATE%d_%d_%d\n", mode->Fs, mdctSize, k);
+         fprintf (file, "static const kiss_fft_state fft_state%d_%d_%d = {\n",
+               mode->Fs, mdctSize, k);
+         fprintf (file, "%d,    /* nfft */\n", mode->mdct.kfft[k]->nfft);
+         fprintf (file, WORD16 ",    /* scale */\n", mode->mdct.kfft[k]->scale);
+#ifdef FIXED_POINT
+         fprintf (file, "%d,    /* scale_shift */\n", mode->mdct.kfft[k]->scale_shift);
+#endif
+         fprintf (file, "%d,    /* shift */\n", mode->mdct.kfft[k]->shift);
+         fprintf (file, "{");
+         for (j=0;j<2*MAXFACTORS;j++)
+            fprintf (file, "%d, ", mode->mdct.kfft[k]->factors[j]);
+         fprintf (file, "},    /* factors */\n");
+         fprintf (file, "fft_bitrev%d,    /* bitrev */\n", mode->mdct.kfft[k]->nfft);
+         fprintf (file, "fft_twiddles%d_%d,    /* bitrev */\n", mode->Fs, mdctSize);
+
+         fprintf (file, "#ifdef OVERRIDE_FFT\n");
+         fprintf (file, "(arch_fft_state *)&cfg_arch_%d,\n", mode->mdct.kfft[k]->nfft);
+         fprintf (file, "#else\n");
+         fprintf (file, "NULL,\n");
+         fprintf(file, "#endif\n");
+
+         fprintf (file, "};\n");
+
+         fprintf(file, "#endif\n");
+         fprintf(file, "\n");
+      }
+
+      fprintf(file, "#endif\n");
+      fprintf(file, "\n");
+
+      /* MDCT twiddles */
+      mdct_twiddles_size = mode->mdct.n-(mode->mdct.n/2>>mode->mdct.maxshift);
+      fprintf(file, "#ifndef MDCT_TWIDDLES%d\n", mdctSize);
+      fprintf(file, "#define MDCT_TWIDDLES%d\n", mdctSize);
+      fprintf (file, "static const opus_val16 mdct_twiddles%d[%d] = {\n",
+            mdctSize, mdct_twiddles_size);
+      for (j=0;j<mdct_twiddles_size;j++)
+         fprintf (file, WORD16 ",%c", mode->mdct.trig[j],(j+6)%5==0?'\n':' ');
+      fprintf (file, "};\n");
+
+      fprintf(file, "#endif\n");
+      fprintf(file, "\n");
+
+
+      /* Print the actual mode data */
+      fprintf(file, "static const CELTMode mode%d_%d_%d = {\n", mode->Fs, mdctSize, mode->overlap);
+      fprintf(file, INT32 ",    /* Fs */\n", mode->Fs);
+      fprintf(file, "%d,    /* overlap */\n", mode->overlap);
+      fprintf(file, "%d,    /* nbEBands */\n", mode->nbEBands);
+      fprintf(file, "%d,    /* effEBands */\n", mode->effEBands);
+      fprintf(file, "{");
+      for (j=0;j<4;j++)
+         fprintf(file, WORD16 ", ", mode->preemph[j]);
+      fprintf(file, "},    /* preemph */\n");
+      if (standard)
+         fprintf(file, "eband5ms,    /* eBands */\n");
+      else
+         fprintf(file, "eBands%d_%d,    /* eBands */\n", mode->Fs, mdctSize);
+
+      fprintf(file, "%d,    /* maxLM */\n", mode->maxLM);
+      fprintf(file, "%d,    /* nbShortMdcts */\n", mode->nbShortMdcts);
+      fprintf(file, "%d,    /* shortMdctSize */\n", mode->shortMdctSize);
+
+      fprintf(file, "%d,    /* nbAllocVectors */\n", mode->nbAllocVectors);
+      if (standard)
+         fprintf(file, "band_allocation,    /* allocVectors */\n");
+      else
+         fprintf(file, "allocVectors%d_%d,    /* allocVectors */\n", mode->Fs, mdctSize);
+
+      fprintf(file, "logN%d,    /* logN */\n", framerate);
+      fprintf(file, "window%d,    /* window */\n", mode->overlap);
+      fprintf(file, "{%d, %d, {", mode->mdct.n, mode->mdct.maxshift);
+      for (k=0;k<=mode->mdct.maxshift;k++)
+         fprintf(file, "&fft_state%d_%d_%d, ", mode->Fs, mdctSize, k);
+      fprintf (file, "}, mdct_twiddles%d},    /* mdct */\n", mdctSize);
+
+      fprintf(file, "{%d, cache_index%d, cache_bits%d, cache_caps%d},    /* cache */\n",
+            mode->cache.size, mode->Fs/mdctSize, mode->Fs/mdctSize, mode->Fs/mdctSize);
+      fprintf(file, "};\n");
+   }
+   fprintf(file, "\n");
+   fprintf(file, "/* List of all the available modes */\n");
+   fprintf(file, "#define TOTAL_MODES %d\n", nb_modes);
+   fprintf(file, "static const CELTMode * const static_mode_list[TOTAL_MODES] = {\n");
+   for (i=0;i<nb_modes;i++)
+   {
+      CELTMode *mode = modes[i];
+      int mdctSize;
+      mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
+      fprintf(file, "&mode%d_%d_%d,\n", mode->Fs, mdctSize, mode->overlap);
+   }
+   fprintf(file, "};\n");
+}
+
+void dump_header(FILE *file, CELTMode **modes, int nb_modes)
+{
+   int i;
+   int channels = 0;
+   int frame_size = 0;
+   int overlap = 0;
+   fprintf (file, "/* This header file is generated automatically*/\n");
+   for (i=0;i<nb_modes;i++)
+   {
+      CELTMode *mode = modes[i];
+      if (frame_size==0)
+         frame_size = mode->shortMdctSize*mode->nbShortMdcts;
+      else if (frame_size != mode->shortMdctSize*mode->nbShortMdcts)
+         frame_size = -1;
+      if (overlap==0)
+         overlap = mode->overlap;
+      else if (overlap != mode->overlap)
+         overlap = -1;
+   }
+   if (channels>0)
+   {
+      fprintf (file, "#define CHANNELS(mode) %d\n", channels);
+      if (channels==1)
+         fprintf (file, "#define DISABLE_STEREO\n");
+   }
+   if (frame_size>0)
+   {
+      fprintf (file, "#define FRAMESIZE(mode) %d\n", frame_size);
+   }
+   if (overlap>0)
+   {
+      fprintf (file, "#define OVERLAP(mode) %d\n", overlap);
+   }
+}
+
+#ifdef FIXED_POINT
+#define BASENAME "static_modes_fixed"
+#else
+#define BASENAME "static_modes_float"
+#endif
+
+int main(int argc, char **argv)
+{
+   int i, nb;
+   FILE *file;
+   CELTMode **m;
+   if (argc%2 != 1 || argc<3)
+   {
+      fprintf (stderr, "Usage: %s rate frame_size [rate frame_size] [rate frame_size]...\n",argv[0]);
+      return 1;
+   }
+   nb = (argc-1)/2;
+   m = malloc(nb*sizeof(CELTMode*));
+   for (i=0;i<nb;i++)
+   {
+      int Fs, frame;
+      Fs      = atoi(argv[2*i+1]);
+      frame   = atoi(argv[2*i+2]);
+      m[i] = opus_custom_mode_create(Fs, frame, NULL);
+      if (m[i]==NULL)
+      {
+         fprintf(stderr,"Error creating mode with Fs=%s, frame_size=%s\n",
+               argv[2*i+1],argv[2*i+2]);
+         return EXIT_FAILURE;
+      }
+   }
+   file = fopen(BASENAME ".h", "w");
+#ifdef OVERRIDE_FFT
+   dump_modes_arch_init(m, nb);
+#endif
+   dump_modes(file, m, nb);
+   fclose(file);
+#ifdef OVERRIDE_FFT
+   dump_modes_arch_finalize();
+#endif
+   for (i=0;i<nb;i++)
+      opus_custom_mode_destroy(m[i]);
+   free(m);
+   return 0;
+}
diff --git a/celt/dump_modes/dump_modes_arch.h b/celt/dump_modes/dump_modes_arch.h
new file mode 100644
index 00000000..cc0d4be1
--- /dev/null
+++ b/celt/dump_modes/dump_modes_arch.h
@@ -0,0 +1,45 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DUMP_MODE_ARCH_H
+#define DUMP_MODE_ARCH_H
+
+void dump_modes_arch_init();
+void dump_mode_arch(CELTMode *mode);
+void dump_modes_arch_finalize();
+
+#if !defined(FIXED_POINT)
+#define ARM_NE10_ARCH_FILE_NAME "static_modes_float_arm_ne10.h"
+#else
+#define ARM_NE10_ARCH_FILE_NAME "static_modes_fixed_arm_ne10.h"
+#endif
+
+#if defined(HAVE_ARM_NE10)
+#define OVERRIDE_FFT (1)
+#endif
+
+#endif
diff --git a/celt/dump_modes/dump_modes_arm_ne10.c b/celt/dump_modes/dump_modes_arm_ne10.c
new file mode 100644
index 00000000..47578cda
--- /dev/null
+++ b/celt/dump_modes/dump_modes_arm_ne10.c
@@ -0,0 +1,152 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(HAVE_CONFIG_H)
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "modes.h"
+#include "dump_modes_arch.h"
+#include <NE10_dsp.h>
+
+#if !defined(FIXED_POINT)
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
+# define NE10_FFT_CPX_TYPE_T_STR "ne10_fft_cpx_float32_t"
+# define NE10_FFT_STATE_TYPE_T_STR "ne10_fft_state_float32_t"
+#else
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
+# define NE10_FFT_CPX_TYPE_T_STR "ne10_fft_cpx_int32_t"
+# define NE10_FFT_STATE_TYPE_T_STR "ne10_fft_state_int32_t"
+#endif
+
+static FILE *file;
+
+void dump_modes_arch_init(CELTMode **modes, int nb_modes)
+{
+   int i;
+
+   file = fopen(ARM_NE10_ARCH_FILE_NAME, "w");
+   fprintf(file, "/* The contents of this file was automatically generated by\n");
+   fprintf(file, " * dump_mode_arm_ne10.c with arguments:");
+   for (i=0;i<nb_modes;i++)
+   {
+      CELTMode *mode = modes[i];
+      fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts);
+   }
+   fprintf(file, "\n * It contains static definitions for some pre-defined modes. */\n");
+   fprintf(file, "#include <NE10_init.h>\n\n");
+}
+
+void dump_modes_arch_finalize()
+{
+   fclose(file);
+}
+
+void dump_mode_arch(CELTMode *mode)
+{
+   int k, j;
+   int mdctSize;
+
+   mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
+
+   fprintf(file, "#ifndef NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
+   fprintf(file, "#define NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
+   /* cfg->factors */
+   for(k=0;k<=mode->mdct.maxshift;k++) {
+      NE10_FFT_CFG_TYPE_T cfg;
+      cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
+      if (!cfg)
+         continue;
+      fprintf(file, "static const ne10_int32_t ne10_factors_%d[%d] = {\n",
+              mode->mdct.kfft[k]->nfft, (NE10_MAXFACTORS * 2));
+      for(j=0;j<(NE10_MAXFACTORS * 2);j++) {
+         fprintf(file, "%d,%c", cfg->factors[j],(j+16)%15==0?'\n':' ');
+      }
+      fprintf (file, "};\n");
+   }
+
+   /* cfg->twiddles */
+   for(k=0;k<=mode->mdct.maxshift;k++) {
+      NE10_FFT_CFG_TYPE_T cfg;
+      cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
+      if (!cfg)
+         continue;
+      fprintf(file, "static const %s ne10_twiddles_%d[%d] = {\n",
+              NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft,
+              mode->mdct.kfft[k]->nfft);
+      for(j=0;j<mode->mdct.kfft[k]->nfft;j++) {
+#if !defined(FIXED_POINT)
+         fprintf(file, "{%#0.8gf,%#0.8gf},%c",
+                 cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
+#else
+         fprintf(file, "{%d,%d},%c",
+                 cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
+#endif
+      }
+      fprintf (file, "};\n");
+   }
+
+   for(k=0;k<=mode->mdct.maxshift;k++) {
+      NE10_FFT_CFG_TYPE_T cfg;
+      cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
+      if (!cfg) {
+         fprintf(file, "/* Ne10 does not support scaled FFT for length = %d */\n",
+                 mode->mdct.kfft[k]->nfft);
+         fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n", mode->mdct.kfft[k]->nfft);
+         fprintf(file, "0,\n");
+         fprintf(file, "NULL\n");
+         fprintf(file, "};\n");
+         continue;
+      }
+      fprintf(file, "static const %s %s_%d = {\n", NE10_FFT_STATE_TYPE_T_STR,
+              NE10_FFT_STATE_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
+      fprintf(file, "%d,\n", cfg->nfft);
+      fprintf(file, "(ne10_int32_t *)ne10_factors_%d,\n", mode->mdct.kfft[k]->nfft);
+      fprintf(file, "(%s *)ne10_twiddles_%d,\n",
+              NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
+      fprintf(file, "NULL,\n");  /* buffer */
+      fprintf(file, "(%s *)&ne10_twiddles_%d[%d],\n",
+              NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft, cfg->nfft);
+#if !defined(FIXED_POINT)
+      fprintf(file, "/* is_forward_scaled = true */\n");
+      fprintf(file, "(ne10_int32_t) 1,\n");
+      fprintf(file, "/* is_backward_scaled = false */\n");
+      fprintf(file, "(ne10_int32_t) 0,\n");
+#endif
+      fprintf(file, "};\n");
+
+      fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n",
+              mode->mdct.kfft[k]->nfft);
+      fprintf(file, "1,\n");
+      fprintf(file, "(void *)&%s_%d,\n",
+              NE10_FFT_STATE_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
+      fprintf(file, "};\n\n");
+   }
+   fprintf(file, "#endif  /* end NE10_FFT_PARAMS%d_%d */\n", mode->Fs, mdctSize);
+}
diff --git a/celt/fixed_c5x.h b/celt/fixed_c5x.h
new file mode 100644
index 00000000..ea95a998
--- /dev/null
+++ b/celt/fixed_c5x.h
@@ -0,0 +1,79 @@
+/* Copyright (C) 2003 Jean-Marc Valin */
+/**
+   @file fixed_c5x.h
+   @brief Fixed-point operations for the TI C5x DSP family
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_C5X_H
+#define FIXED_C5X_H
+
+#include "dsplib.h"
+
+#undef IMUL32
+static OPUS_INLINE long IMUL32(long i, long j)
+{
+   long ac0, ac1;
+   ac0 = _lmpy(i>>16,j);
+   ac1 = ac0 + _lmpy(i,j>>16);
+   return _lmpyu(i,j) + (ac1<<16);
+}
+
+#undef MAX16
+#define MAX16(a,b) _max(a,b)
+
+#undef MIN16
+#define MIN16(a,b) _min(a,b)
+
+#undef MAX32
+#define MAX32(a,b) _lmax(a,b)
+
+#undef MIN32
+#define MIN32(a,b) _lmin(a,b)
+
+#undef VSHR32
+#define VSHR32(a, shift) _lshl(a,-(shift))
+
+#undef MULT16_16_Q15
+#define MULT16_16_Q15(a,b) (_smpy(a,b))
+
+#undef MULT16_16SU
+#define MULT16_16SU(a,b) _lmpysu(a,b)
+
+#undef MULT_16_16
+#define MULT_16_16(a,b) _lmpy(a,b)
+
+/* FIXME: This is technically incorrect and is bound to cause problems. Is there any cleaner solution? */
+#undef MULT16_32_Q15
+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),(b)),15))
+
+#define celt_ilog2(x) (30 - _lnorm(x))
+#define OVERRIDE_CELT_ILOG2
+
+#define celt_maxabs16(x, len) MAX32(EXTEND32(maxval((DATA *)x, len)),-EXTEND32(minval((DATA *)x, len)))
+#define OVERRIDE_CELT_MAXABS16
+
+#endif /* FIXED_C5X_H */
diff --git a/celt/fixed_c6x.h b/celt/fixed_c6x.h
new file mode 100644
index 00000000..bb6ad927
--- /dev/null
+++ b/celt/fixed_c6x.h
@@ -0,0 +1,70 @@
+/* Copyright (C) 2008 CSIRO */
+/**
+   @file fixed_c6x.h
+   @brief Fixed-point operations for the TI C6x DSP family
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_C6X_H
+#define FIXED_C6X_H
+
+#undef MULT16_16SU
+#define MULT16_16SU(a,b) _mpysu(a,b)
+
+#undef MULT_16_16
+#define MULT_16_16(a,b) _mpy(a,b)
+
+#define celt_ilog2(x) (30 - _norm(x))
+#define OVERRIDE_CELT_ILOG2
+
+#undef MULT16_32_Q15
+#define MULT16_32_Q15(a,b) (_mpylill(a, b) >> 15)
+
+#if 0
+#include "dsplib.h"
+
+#undef MAX16
+#define MAX16(a,b) _max(a,b)
+
+#undef MIN16
+#define MIN16(a,b) _min(a,b)
+
+#undef MAX32
+#define MAX32(a,b) _lmax(a,b)
+
+#undef MIN32
+#define MIN32(a,b) _lmin(a,b)
+
+#undef VSHR32
+#define VSHR32(a, shift) _lshl(a,-(shift))
+
+#undef MULT16_16_Q15
+#define MULT16_16_Q15(a,b) (_smpy(a,b))
+
+#define celt_maxabs16(x, len) MAX32(EXTEND32(maxval((DATA *)x, len)),-EXTEND32(minval((DATA *)x, len)))
+#define OVERRIDE_CELT_MAXABS16
+
+#endif /* FIXED_C6X_H */
author	Felicia Lim <flim@google.com>	2017-01-25 08:49:31 -0800
committer	Felicia Lim <flim@google.com>	2017-01-25 08:57:38 -0800
commit	0a1406acbe87c63044e9da7e0ab41bcbfa704f3d (patch)
tree	6dfda4da354e420a9ac2e256f168609e7d97571a /celt
parent	e65278181df6dea0ac1dde71f2534d66816119d2 (diff)
download	libopus-0a1406acbe87c63044e9da7e0ab41bcbfa704f3d.tar.gz