aboutsummaryrefslogtreecommitdiff
path: root/celt
diff options
context:
space:
mode:
authorFelicia Lim <flim@google.com>2017-01-25 08:49:31 -0800
committerFelicia Lim <flim@google.com>2017-01-25 08:57:38 -0800
commit0a1406acbe87c63044e9da7e0ab41bcbfa704f3d (patch)
tree6dfda4da354e420a9ac2e256f168609e7d97571a /celt
parente65278181df6dea0ac1dde71f2534d66816119d2 (diff)
downloadlibopus-0a1406acbe87c63044e9da7e0ab41bcbfa704f3d.tar.gz
Test: - verified build for arm*/mips*/x86* - checked functionality using an emulator and stagefright Change-Id: Iab6e7315c51e020dc986d57c89934a1205ec7a61
Diffstat (limited to 'celt')
-rw-r--r--celt/arm/celt_pitch_xcorr_arm-gnu.S_gnu.s552
-rw-r--r--celt/dump_modes/Makefile32
-rw-r--r--celt/dump_modes/dump_modes.c353
-rw-r--r--celt/dump_modes/dump_modes_arch.h45
-rw-r--r--celt/dump_modes/dump_modes_arm_ne10.c152
-rw-r--r--celt/fixed_c5x.h79
-rw-r--r--celt/fixed_c6x.h70
7 files changed, 731 insertions, 552 deletions
diff --git a/celt/arm/celt_pitch_xcorr_arm-gnu.S_gnu.s b/celt/arm/celt_pitch_xcorr_arm-gnu.S_gnu.s
deleted file mode 100644
index b62c5207..00000000
--- a/celt/arm/celt_pitch_xcorr_arm-gnu.S_gnu.s
+++ /dev/null
@@ -1,552 +0,0 @@
- .syntax unified
- .syntax unified
-,: Copyright (c) 2007-2008 CSIRO
-,: Copyright (c) 2007-2009 Xiph.Org Foundation
-,: Copyright (c) 2013 Parrot
-,: Written by Aurélien Zanelli
-,:
-,: Redistribution and use in source and binary forms, with or without
-,: modification, are permitted provided that the following conditions
-,: are met:
-,:
-,: - Redistributions of source code must retain the above copyright
-,: notice, this list of conditions and the following disclaimer.
-,:
-,: - Redistributions in binary form must reproduce the above copyright
-,: notice, this list of conditions and the following disclaimer in the
-,: documentation and/or other materials provided with the distribution.
-,:
-,: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-,: ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-,: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-,: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-,: OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-,: EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-,: PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES@ LOSS OF USE, @ DATA, OR
-,: PROFITS@ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-,: LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-,: NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-,: SOFTWARE, EVEN .if ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- .text@ .p2align 2; .arch armv7-a
- .fpu neon
- .object_arch armv4t
-
- .include "celt/arm/armopts_gnu.s"
-
- .if OPUS_ARM_MAY_HAVE_EDSP
- .global celt_pitch_xcorr_edsp
- .endif
-
- .if OPUS_ARM_MAY_HAVE_NEON
- .global celt_pitch_xcorr_neon
- .endif
-
- .if OPUS_ARM_MAY_HAVE_NEON
-
-,: Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
-@ xcorr_kernel_neon: @ PROC
-xcorr_kernel_neon_start::
- ,: input:
- ,: r3 = int len
- ,: r4 = opus_val16 *x
- ,: r5 = opus_val16 *y
- ,: q0 = opus_val32 sum[4]
- ,: output:
- ,: q0 = opus_val32 sum[4]
- ,: preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
- ,: internal usage:
- ,: r12 = int j
- ,: d3 = y_3|y_2|y_1|y_0
- ,: q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
- ,: q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
- ,: q8 = scratch
- ,:
- ,: Load y[0...3]
- ,: This requires len>0 to always be valid (which we assert in the C code).
- VLD1.16 {d5}, [r5]!
- SUBS r12, r3, #8
- BLE xcorr_kernel_neon_process4
-,: Process 8 samples at a time.
-,: This loop loads one y value more than we actually need. Therefore we have to
-,: stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
-,: reading past the end of the array.
-xcorr_kernel_neon_process8::
- ,: This loop has 19 total instructions (10 cycles to issue, minimum), with
- ,: - 2 cycles of ARM insrtuctions,
- ,: - 10 cycles of load/store/byte permute instructions, and
- ,: - 9 cycles of data processing instructions.
- ,: On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
- ,: latter two categories, meaning the whole loop should run in 10 cycles per
- ,: iteration, barring cache misses.
- ,:
- ,: Load x[0...7]
- VLD1.16 {d6, d7}, [r4]!
- ,: Unlike VMOV, VAND is a data processsing instruction (and doesn't get
- ,: assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
- VAND d3, d5, d5
- SUBS r12, r12, #8
- ,: Load y[4...11]
- VLD1.16 {d4, d5}, [r5]!
- VMLAL.S16 q0, d3, d6[0]
- VEXT.16 d16, d3, d4, #1
- VMLAL.S16 q0, d4, d7[0]
- VEXT.16 d17, d4, d5, #1
- VMLAL.S16 q0, d16, d6[1]
- VEXT.16 d16, d3, d4, #2
- VMLAL.S16 q0, d17, d7[1]
- VEXT.16 d17, d4, d5, #2
- VMLAL.S16 q0, d16, d6[2]
- VEXT.16 d16, d3, d4, #3
- VMLAL.S16 q0, d17, d7[2]
- VEXT.16 d17, d4, d5, #3
- VMLAL.S16 q0, d16, d6[3]
- VMLAL.S16 q0, d17, d7[3]
- BGT xcorr_kernel_neon_process8
-,: Process 4 samples here if we have > 4 left (still reading one extra y value).
-xcorr_kernel_neon_process4::
- ADDS r12, r12, #4
- BLE xcorr_kernel_neon_process2
- ,: Load x[0...3]
- VLD1.16 d6, [r4]!
- ,: Use VAND since it's a data processing instruction again.
- VAND d4, d5, d5
- SUB r12, r12, #4
- ,: Load y[4...7]
- VLD1.16 d5, [r5]!
- VMLAL.S16 q0, d4, d6[0]
- VEXT.16 d16, d4, d5, #1
- VMLAL.S16 q0, d16, d6[1]
- VEXT.16 d16, d4, d5, #2
- VMLAL.S16 q0, d16, d6[2]
- VEXT.16 d16, d4, d5, #3
- VMLAL.S16 q0, d16, d6[3]
-,: Process 2 samples here if we have > 2 left (still reading one extra y value).
-xcorr_kernel_neon_process2::
- ADDS r12, r12, #2
- BLE xcorr_kernel_neon_process1
- ,: Load x[0...1]
- VLD2.16 {d6[],d7[]}, [r4]!
- ,: Use VAND since it's a data processing instruction again.
- VAND d4, d5, d5
- SUB r12, r12, #2
- ,: Load y[4...5]
- VLD1.32 {d5[]}, [r5]!
- VMLAL.S16 q0, d4, d6
- VEXT.16 d16, d4, d5, #1
- ,: Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
- ,: instead of VEXT, since it's a data-processing instruction.
- VSRI.64 d5, d4, #32
- VMLAL.S16 q0, d16, d7
-,: Process 1 sample using the extra y value we loaded above.
-xcorr_kernel_neon_process1::
- ,: Load next *x
- VLD1.16 {d6[]}, [r4]!
- ADDS r12, r12, #1
- ,: y[0...3] are left in d5 from prior iteration(s) (if any)
- VMLAL.S16 q0, d5, d6
- MOVLE pc, lr
-,: Now process 1 last sample, not reading ahead.
- ,: Load last *y
- VLD1.16 {d4[]}, [r5]!
- VSRI.64 d4, d5, #16
- ,: Load last *x
- VLD1.16 {d6[]}, [r4]!
- VMLAL.S16 q0, d4, d6
- MOV pc, lr
- .size xcorr_kernel_neon, .-xcorr_kernel_neon ,: @ ENDP
-
-,: opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
-,: opus_val32 *xcorr, int len, int max_pitch)
-@ celt_pitch_xcorr_neon: @ PROC
- ,: input:
- ,: r0 = opus_val16 *_x
- ,: r1 = opus_val16 *_y
- ,: r2 = opus_val32 *xcorr
- ,: r3 = int len
- ,: output:
- ,: r0 = int maxcorr
- ,: internal usage:
- ,: r4 = opus_val16 *x (for xcorr_kernel_neon())
- ,: r5 = opus_val16 *y (for xcorr_kernel_neon())
- ,: r6 = int max_pitch
- ,: r12 = int j
- ,: q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon())
- STMFD sp!, {r4-r6, lr}
- LDR r6, [sp, #16]
- VMOV.S32 q15, #1
- ,: if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
- SUBS r6, r6, #4
- BLT celt_pitch_xcorr_neon_process4_done
-celt_pitch_xcorr_neon_process4::
- ,: xcorr_kernel_neon parameters:
- ,: r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
- MOV r4, r0
- MOV r5, r1
- VEOR q0, q0, q0
- ,: xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
- ,: So we don't save/restore any other registers.
- BL xcorr_kernel_neon_start
- SUBS r6, r6, #4
- VST1.32 {q0}, [r2]!
- ,: _y += 4
- ADD r1, r1, #8
- VMAX.S32 q15, q15, q0
- ,: if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
- BGE celt_pitch_xcorr_neon_process4
-,: We have less than 4 sums left to compute.
-celt_pitch_xcorr_neon_process4_done::
- ADDS r6, r6, #4
- ,: Reduce maxcorr to a single value
- VMAX.S32 d30, d30, d31
- VPMAX.S32 d30, d30, d30
- ,: if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
- BLE celt_pitch_xcorr_neon_done
-,: Now compute each remaining sum one at a time.
-celt_pitch_xcorr_neon_process_remaining::
- MOV r4, r0
- MOV r5, r1
- VMOV.I32 q0, #0
- SUBS r12, r3, #8
- BLT celt_pitch_xcorr_neon_process_remaining4
-,: Sum terms 8 at a time.
-celt_pitch_xcorr_neon_process_remaining_loop8::
- ,: Load x[0...7]
- VLD1.16 {q1}, [r4]!
- ,: Load y[0...7]
- VLD1.16 {q2}, [r5]!
- SUBS r12, r12, #8
- VMLAL.S16 q0, d4, d2
- VMLAL.S16 q0, d5, d3
- BGE celt_pitch_xcorr_neon_process_remaining_loop8
-,: Sum terms 4 at a time.
-celt_pitch_xcorr_neon_process_remaining4::
- ADDS r12, r12, #4
- BLT celt_pitch_xcorr_neon_process_remaining4_done
- ,: Load x[0...3]
- VLD1.16 {d2}, [r4]!
- ,: Load y[0...3]
- VLD1.16 {d3}, [r5]!
- SUB r12, r12, #4
- VMLAL.S16 q0, d3, d2
-celt_pitch_xcorr_neon_process_remaining4_done::
- ,: Reduce the sum to a single value.
- VADD.S32 d0, d0, d1
- VPADDL.S32 d0, d0
- ADDS r12, r12, #4
- BLE celt_pitch_xcorr_neon_process_remaining_loop_done
-,: Sum terms 1 at a time.
-celt_pitch_xcorr_neon_process_remaining_loop1::
- VLD1.16 {d2[]}, [r4]!
- VLD1.16 {d3[]}, [r5]!
- SUBS r12, r12, #1
- VMLAL.S16 q0, d2, d3
- BGT celt_pitch_xcorr_neon_process_remaining_loop1
-celt_pitch_xcorr_neon_process_remaining_loop_done::
- VST1.32 {d0[0]}, [r2]!
- VMAX.S32 d30, d30, d0
- SUBS r6, r6, #1
- ,: _y++
- ADD r1, r1, #2
- ,: if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
- BGT celt_pitch_xcorr_neon_process_remaining
-celt_pitch_xcorr_neon_done::
- VMOV.32 r0, d30[0]
- LDMFD sp!, {r4-r6, pc}
- .size celt_pitch_xcorr_neon, .-celt_pitch_xcorr_neon ,: @ ENDP
-
- .endif
-
- .if OPUS_ARM_MAY_HAVE_EDSP
-
-,: This will get used on ARMv7 devices without NEON, so it has been optimized
-,: to take advantage of dual-issuing where possible.
-@ xcorr_kernel_edsp: @ PROC
-xcorr_kernel_edsp_start::
- ,: input:
- ,: r3 = int len
- ,: r4 = opus_val16 *_x (must be 32-bit aligned)
- ,: r5 = opus_val16 *_y (must be 32-bit aligned)
- ,: r6...r9 = opus_val32 sum[4]
- ,: output:
- ,: r6...r9 = opus_val32 sum[4]
- ,: preserved: r0-r5
- ,: internal usage
- ,: r2 = int j
- ,: r12,r14 = opus_val16 x[4]
- ,: r10,r11 = opus_val16 y[4]
- STMFD sp!, {r2,r4,r5,lr}
- LDR r10, [r5], #4 ,: Load y[0...1]
- SUBS r2, r3, #4 ,: j = len-4
- LDR r11, [r5], #4 ,: Load y[2...3]
- BLE xcorr_kernel_edsp_process4_done
- LDR r12, [r4], #4 ,: Load x[0...1]
- ,: Stall
-xcorr_kernel_edsp_process4::
- ,: The multiplies must issue from pipeline 0, and can't dual-issue with each
- ,: other. Every other instruction here dual-issues with a multiply, and is
- ,: thus "free". There should be no stalls in the body of the loop.
- SMLABB r6, r12, r10, r6 ,: sum[0] = MAC16_16(sum[0],x_0,y_0)
- LDR r14, [r4], #4 ,: Load x[2...3]
- SMLABT r7, r12, r10, r7 ,: sum[1] = MAC16_16(sum[1],x_0,y_1)
- SUBS r2, r2, #4 ,: j-=4
- SMLABB r8, r12, r11, r8 ,: sum[2] = MAC16_16(sum[2],x_0,y_2)
- SMLABT r9, r12, r11, r9 ,: sum[3] = MAC16_16(sum[3],x_0,y_3)
- SMLATT r6, r12, r10, r6 ,: sum[0] = MAC16_16(sum[0],x_1,y_1)
- LDR r10, [r5], #4 ,: Load y[4...5]
- SMLATB r7, r12, r11, r7 ,: sum[1] = MAC16_16(sum[1],x_1,y_2)
- SMLATT r8, r12, r11, r8 ,: sum[2] = MAC16_16(sum[2],x_1,y_3)
- SMLATB r9, r12, r10, r9 ,: sum[3] = MAC16_16(sum[3],x_1,y_4)
- LDRGT r12, [r4], #4 ,: Load x[0...1]
- SMLABB r6, r14, r11, r6 ,: sum[0] = MAC16_16(sum[0],x_2,y_2)
- SMLABT r7, r14, r11, r7 ,: sum[1] = MAC16_16(sum[1],x_2,y_3)
- SMLABB r8, r14, r10, r8 ,: sum[2] = MAC16_16(sum[2],x_2,y_4)
- SMLABT r9, r14, r10, r9 ,: sum[3] = MAC16_16(sum[3],x_2,y_5)
- SMLATT r6, r14, r11, r6 ,: sum[0] = MAC16_16(sum[0],x_3,y_3)
- LDR r11, [r5], #4 ,: Load y[6...7]
- SMLATB r7, r14, r10, r7 ,: sum[1] = MAC16_16(sum[1],x_3,y_4)
- SMLATT r8, r14, r10, r8 ,: sum[2] = MAC16_16(sum[2],x_3,y_5)
- SMLATB r9, r14, r11, r9 ,: sum[3] = MAC16_16(sum[3],x_3,y_6)
- BGT xcorr_kernel_edsp_process4
-xcorr_kernel_edsp_process4_done::
- ADDS r2, r2, #4
- BLE xcorr_kernel_edsp_done
- LDRH r12, [r4], #2 ,: r12 = *x++
- SUBS r2, r2, #1 ,: j--
- ,: Stall
- SMLABB r6, r12, r10, r6 ,: sum[0] = MAC16_16(sum[0],x,y_0)
- LDRHGT r14, [r4], #2 ,: r14 = *x++
- SMLABT r7, r12, r10, r7 ,: sum[1] = MAC16_16(sum[1],x,y_1)
- SMLABB r8, r12, r11, r8 ,: sum[2] = MAC16_16(sum[2],x,y_2)
- SMLABT r9, r12, r11, r9 ,: sum[3] = MAC16_16(sum[3],x,y_3)
- BLE xcorr_kernel_edsp_done
- SMLABT r6, r14, r10, r6 ,: sum[0] = MAC16_16(sum[0],x,y_1)
- SUBS r2, r2, #1 ,: j--
- SMLABB r7, r14, r11, r7 ,: sum[1] = MAC16_16(sum[1],x,y_2)
- LDRH r10, [r5], #2 ,: r10 = y_4 = *y++
- SMLABT r8, r14, r11, r8 ,: sum[2] = MAC16_16(sum[2],x,y_3)
- LDRHGT r12, [r4], #2 ,: r12 = *x++
- SMLABB r9, r14, r10, r9 ,: sum[3] = MAC16_16(sum[3],x,y_4)
- BLE xcorr_kernel_edsp_done
- SMLABB r6, r12, r11, r6 ,: sum[0] = MAC16_16(sum[0],tmp,y_2)
- CMP r2, #1 ,: j--
- SMLABT r7, r12, r11, r7 ,: sum[1] = MAC16_16(sum[1],tmp,y_3)
- LDRH r2, [r5], #2 ,: r2 = y_5 = *y++
- SMLABB r8, r12, r10, r8 ,: sum[2] = MAC16_16(sum[2],tmp,y_4)
- LDRHGT r14, [r4] ,: r14 = *x
- SMLABB r9, r12, r2, r9 ,: sum[3] = MAC16_16(sum[3],tmp,y_5)
- BLE xcorr_kernel_edsp_done
- SMLABT r6, r14, r11, r6 ,: sum[0] = MAC16_16(sum[0],tmp,y_3)
- LDRH r11, [r5] ,: r11 = y_6 = *y
- SMLABB r7, r14, r10, r7 ,: sum[1] = MAC16_16(sum[1],tmp,y_4)
- SMLABB r8, r14, r2, r8 ,: sum[2] = MAC16_16(sum[2],tmp,y_5)
- SMLABB r9, r14, r11, r9 ,: sum[3] = MAC16_16(sum[3],tmp,y_6)
-xcorr_kernel_edsp_done::
- LDMFD sp!, {r2,r4,r5,pc}
- .size xcorr_kernel_edsp, .-xcorr_kernel_edsp ,: @ ENDP
-
-@ celt_pitch_xcorr_edsp: @ PROC
- ,: input:
- ,: r0 = opus_val16 *_x (must be 32-bit aligned)
- ,: r1 = opus_val16 *_y (only needs to be 16-bit aligned)
- ,: r2 = opus_val32 *xcorr
- ,: r3 = int len
- ,: output:
- ,: r0 = maxcorr
- ,: internal usage
- ,: r4 = opus_val16 *x
- ,: r5 = opus_val16 *y
- ,: r6 = opus_val32 sum0
- ,: r7 = opus_val32 sum1
- ,: r8 = opus_val32 sum2
- ,: r9 = opus_val32 sum3
- ,: r1 = int max_pitch
- ,: r12 = int j
- STMFD sp!, {r4-r11, lr}
- MOV r5, r1
- LDR r1, [sp, #36]
- MOV r4, r0
- TST r5, #3
- ,: maxcorr = 1
- MOV r0, #1
- BEQ celt_pitch_xcorr_edsp_process1u_done
-,: Compute one sum at the start to make y 32-bit aligned.
- SUBS r12, r3, #4
- ,: r14 = sum = 0
- MOV r14, #0
- LDRH r8, [r5], #2
- BLE celt_pitch_xcorr_edsp_process1u_loop4_done
- LDR r6, [r4], #4
- MOV r8, r8, LSL #16
-celt_pitch_xcorr_edsp_process1u_loop4::
- LDR r9, [r5], #4
- SMLABT r14, r6, r8, r14 ,: sum = MAC16_16(sum, x_0, y_0)
- LDR r7, [r4], #4
- SMLATB r14, r6, r9, r14 ,: sum = MAC16_16(sum, x_1, y_1)
- LDR r8, [r5], #4
- SMLABT r14, r7, r9, r14 ,: sum = MAC16_16(sum, x_2, y_2)
- SUBS r12, r12, #4 ,: j-=4
- SMLATB r14, r7, r8, r14 ,: sum = MAC16_16(sum, x_3, y_3)
- LDRGT r6, [r4], #4
- BGT celt_pitch_xcorr_edsp_process1u_loop4
- MOV r8, r8, LSR #16
-celt_pitch_xcorr_edsp_process1u_loop4_done::
- ADDS r12, r12, #4
-celt_pitch_xcorr_edsp_process1u_loop1::
- LDRHGE r6, [r4], #2
- ,: Stall
- SMLABBGE r14, r6, r8, r14 ,: sum = MAC16_16(sum, *x, *y)
- SUBSGE r12, r12, #1
- LDRHGT r8, [r5], #2
- BGT celt_pitch_xcorr_edsp_process1u_loop1
- ,: Restore _x
- SUB r4, r4, r3, LSL #1
- ,: Restore and advance _y
- SUB r5, r5, r3, LSL #1
- ,: maxcorr = max(maxcorr, sum)
- CMP r0, r14
- ADD r5, r5, #2
- MOVLT r0, r14
- SUBS r1, r1, #1
- ,: xcorr[i] = sum
- STR r14, [r2], #4
- BLE celt_pitch_xcorr_edsp_done
-celt_pitch_xcorr_edsp_process1u_done::
- ,: if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
- SUBS r1, r1, #4
- BLT celt_pitch_xcorr_edsp_process2
-celt_pitch_xcorr_edsp_process4::
- ,: xcorr_kernel_edsp parameters:
- ,: r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
- MOV r6, #0
- MOV r7, #0
- MOV r8, #0
- MOV r9, #0
- BL xcorr_kernel_edsp_start ,: xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
- ,: maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
- CMP r0, r6
- ,: _y+=4
- ADD r5, r5, #8
- MOVLT r0, r6
- CMP r0, r7
- MOVLT r0, r7
- CMP r0, r8
- MOVLT r0, r8
- CMP r0, r9
- MOVLT r0, r9
- STMIA r2!, {r6-r9}
- SUBS r1, r1, #4
- BGE celt_pitch_xcorr_edsp_process4
-celt_pitch_xcorr_edsp_process2::
- ADDS r1, r1, #2
- BLT celt_pitch_xcorr_edsp_process1a
- SUBS r12, r3, #4
- ,: {r10, r11} = {sum0, sum1} = {0, 0}
- MOV r10, #0
- MOV r11, #0
- LDR r8, [r5], #4
- BLE celt_pitch_xcorr_edsp_process2_loop_done
- LDR r6, [r4], #4
- LDR r9, [r5], #4
-celt_pitch_xcorr_edsp_process2_loop4::
- SMLABB r10, r6, r8, r10 ,: sum0 = MAC16_16(sum0, x_0, y_0)
- LDR r7, [r4], #4
- SMLABT r11, r6, r8, r11 ,: sum1 = MAC16_16(sum1, x_0, y_1)
- SUBS r12, r12, #4 ,: j-=4
- SMLATT r10, r6, r8, r10 ,: sum0 = MAC16_16(sum0, x_1, y_1)
- LDR r8, [r5], #4
- SMLATB r11, r6, r9, r11 ,: sum1 = MAC16_16(sum1, x_1, y_2)
- LDRGT r6, [r4], #4
- SMLABB r10, r7, r9, r10 ,: sum0 = MAC16_16(sum0, x_2, y_2)
- SMLABT r11, r7, r9, r11 ,: sum1 = MAC16_16(sum1, x_2, y_3)
- SMLATT r10, r7, r9, r10 ,: sum0 = MAC16_16(sum0, x_3, y_3)
- LDRGT r9, [r5], #4
- SMLATB r11, r7, r8, r11 ,: sum1 = MAC16_16(sum1, x_3, y_4)
- BGT celt_pitch_xcorr_edsp_process2_loop4
-celt_pitch_xcorr_edsp_process2_loop_done::
- ADDS r12, r12, #2
- BLE celt_pitch_xcorr_edsp_process2_1
- LDR r6, [r4], #4
- ,: Stall
- SMLABB r10, r6, r8, r10 ,: sum0 = MAC16_16(sum0, x_0, y_0)
- LDR r9, [r5], #4
- SMLABT r11, r6, r8, r11 ,: sum1 = MAC16_16(sum1, x_0, y_1)
- SUB r12, r12, #2
- SMLATT r10, r6, r8, r10 ,: sum0 = MAC16_16(sum0, x_1, y_1)
- MOV r8, r9
- SMLATB r11, r6, r9, r11 ,: sum1 = MAC16_16(sum1, x_1, y_2)
-celt_pitch_xcorr_edsp_process2_1::
- LDRH r6, [r4], #2
- ADDS r12, r12, #1
- ,: Stall
- SMLABB r10, r6, r8, r10 ,: sum0 = MAC16_16(sum0, x_0, y_0)
- LDRHGT r7, [r4], #2
- SMLABT r11, r6, r8, r11 ,: sum1 = MAC16_16(sum1, x_0, y_1)
- BLE celt_pitch_xcorr_edsp_process2_done
- LDRH r9, [r5], #2
- SMLABT r10, r7, r8, r10 ,: sum0 = MAC16_16(sum0, x_0, y_1)
- SMLABB r11, r7, r9, r11 ,: sum1 = MAC16_16(sum1, x_0, y_2)
-celt_pitch_xcorr_edsp_process2_done::
- ,: Restore _x
- SUB r4, r4, r3, LSL #1
- ,: Restore and advance _y
- SUB r5, r5, r3, LSL #1
- ,: maxcorr = max(maxcorr, sum0)
- CMP r0, r10
- ADD r5, r5, #2
- MOVLT r0, r10
- SUB r1, r1, #2
- ,: maxcorr = max(maxcorr, sum1)
- CMP r0, r11
- ,: xcorr[i] = sum
- STR r10, [r2], #4
- MOVLT r0, r11
- STR r11, [r2], #4
-celt_pitch_xcorr_edsp_process1a::
- ADDS r1, r1, #1
- BLT celt_pitch_xcorr_edsp_done
- SUBS r12, r3, #4
- ,: r14 = sum = 0
- MOV r14, #0
- BLT celt_pitch_xcorr_edsp_process1a_loop_done
- LDR r6, [r4], #4
- LDR r8, [r5], #4
- LDR r7, [r4], #4
- LDR r9, [r5], #4
-celt_pitch_xcorr_edsp_process1a_loop4::
- SMLABB r14, r6, r8, r14 ,: sum = MAC16_16(sum, x_0, y_0)
- SUBS r12, r12, #4 ,: j-=4
- SMLATT r14, r6, r8, r14 ,: sum = MAC16_16(sum, x_1, y_1)
- LDRGE r6, [r4], #4
- SMLABB r14, r7, r9, r14 ,: sum = MAC16_16(sum, x_2, y_2)
- LDRGE r8, [r5], #4
- SMLATT r14, r7, r9, r14 ,: sum = MAC16_16(sum, x_3, y_3)
- LDRGE r7, [r4], #4
- LDRGE r9, [r5], #4
- BGE celt_pitch_xcorr_edsp_process1a_loop4
-celt_pitch_xcorr_edsp_process1a_loop_done::
- ADDS r12, r12, #2
- LDRGE r6, [r4], #4
- LDRGE r8, [r5], #4
- ,: Stall
- SMLABBGE r14, r6, r8, r14 ,: sum = MAC16_16(sum, x_0, y_0)
- SUBGE r12, r12, #2
- SMLATTGE r14, r6, r8, r14 ,: sum = MAC16_16(sum, x_1, y_1)
- ADDS r12, r12, #1
- LDRHGE r6, [r4], #2
- LDRHGE r8, [r5], #2
- ,: Stall
- SMLABBGE r14, r6, r8, r14 ,: sum = MAC16_16(sum, *x, *y)
- ,: maxcorr = max(maxcorr, sum)
- CMP r0, r14
- ,: xcorr[i] = sum
- STR r14, [r2], #4
- MOVLT r0, r14
-celt_pitch_xcorr_edsp_done::
- LDMFD sp!, {r4-r11, pc}
- .size celt_pitch_xcorr_edsp, .-celt_pitch_xcorr_edsp ,: @ ENDP
-
- .endif
-
-,: @ END:
- .section .note.GNU-stack,"",%progbits
diff --git a/celt/dump_modes/Makefile b/celt/dump_modes/Makefile
new file mode 100644
index 00000000..93f599fb
--- /dev/null
+++ b/celt/dump_modes/Makefile
@@ -0,0 +1,32 @@
+
+CFLAGS=-O2 -Wall -Wextra -DHAVE_CONFIG_H
+INCLUDES=-I. -I../ -I../.. -I../../include
+
+SOURCES = dump_modes.c \
+ ../modes.c \
+ ../cwrs.c \
+ ../rate.c \
+ ../entcode.c \
+ ../entenc.c \
+ ../entdec.c \
+ ../mathops.c \
+ ../mdct.c \
+ ../kiss_fft.c
+
+ifdef HAVE_ARM_NE10
+CC = gcc
+CFLAGS += -mfpu=neon
+INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_PRESUME_NEON_INTR
+LIBS = -L$(NE10_LIBDIR) -lNE10
+SOURCES += ../arm/celt_ne10_fft.c \
+ dump_modes_arm_ne10.c \
+ ../arm/armcpu.c
+endif
+
+all: dump_modes
+
+dump_modes:
+ $(PREFIX)$(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES $(SOURCES) -o $@ $(LIBS) -lm
+
+clean:
+ rm -f dump_modes
diff --git a/celt/dump_modes/dump_modes.c b/celt/dump_modes/dump_modes.c
new file mode 100644
index 00000000..9105a534
--- /dev/null
+++ b/celt/dump_modes/dump_modes.c
@@ -0,0 +1,353 @@
+/* Copyright (c) 2008 CSIRO
+ Copyright (c) 2008-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "modes.h"
+#include "celt.h"
+#include "rate.h"
+#include "dump_modes_arch.h"
+
+#define INT16 "%d"
+#define INT32 "%d"
+#define FLOAT "%#0.8gf"
+
+#ifdef FIXED_POINT
+#define WORD16 INT16
+#define WORD32 INT32
+#else
+#define WORD16 FLOAT
+#define WORD32 FLOAT
+#endif
+
+void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
+{
+ int i, j, k;
+ int mdct_twiddles_size;
+ fprintf(file, "/* The contents of this file was automatically generated by dump_modes.c\n");
+ fprintf(file, " with arguments:");
+ for (i=0;i<nb_modes;i++)
+ {
+ CELTMode *mode = modes[i];
+ fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts);
+ }
+ fprintf(file, "\n It contains static definitions for some pre-defined modes. */\n");
+ fprintf(file, "#include \"modes.h\"\n");
+ fprintf(file, "#include \"rate.h\"\n");
+ fprintf(file, "\n#ifdef HAVE_ARM_NE10\n");
+ fprintf(file, "#define OVERRIDE_FFT 1\n");
+ fprintf(file, "#include \"%s\"\n", ARM_NE10_ARCH_FILE_NAME);
+ fprintf(file, "#endif\n");
+
+ fprintf(file, "\n");
+
+ for (i=0;i<nb_modes;i++)
+ {
+ CELTMode *mode = modes[i];
+ int mdctSize;
+ int standard, framerate;
+
+ mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
+ standard = (mode->Fs == 400*(opus_int32)mode->shortMdctSize);
+ framerate = mode->Fs/mode->shortMdctSize;
+
+ if (!standard)
+ {
+ fprintf(file, "#ifndef DEF_EBANDS%d_%d\n", mode->Fs, mdctSize);
+ fprintf(file, "#define DEF_EBANDS%d_%d\n", mode->Fs, mdctSize);
+ fprintf (file, "static const opus_int16 eBands%d_%d[%d] = {\n", mode->Fs, mdctSize, mode->nbEBands+2);
+ for (j=0;j<mode->nbEBands+2;j++)
+ fprintf (file, "%d, ", mode->eBands[j]);
+ fprintf (file, "};\n");
+ fprintf(file, "#endif\n");
+ fprintf(file, "\n");
+ }
+
+ fprintf(file, "#ifndef DEF_WINDOW%d\n", mode->overlap);
+ fprintf(file, "#define DEF_WINDOW%d\n", mode->overlap);
+ fprintf (file, "static const opus_val16 window%d[%d] = {\n", mode->overlap, mode->overlap);
+ for (j=0;j<mode->overlap;j++)
+ fprintf (file, WORD16 ",%c", mode->window[j],(j+6)%5==0?'\n':' ');
+ fprintf (file, "};\n");
+ fprintf(file, "#endif\n");
+ fprintf(file, "\n");
+
+ if (!standard)
+ {
+ fprintf(file, "#ifndef DEF_ALLOC_VECTORS%d_%d\n", mode->Fs, mdctSize);
+ fprintf(file, "#define DEF_ALLOC_VECTORS%d_%d\n", mode->Fs, mdctSize);
+ fprintf (file, "static const unsigned char allocVectors%d_%d[%d] = {\n", mode->Fs, mdctSize, mode->nbEBands*mode->nbAllocVectors);
+ for (j=0;j<mode->nbAllocVectors;j++)
+ {
+ for (k=0;k<mode->nbEBands;k++)
+ fprintf (file, "%2d, ", mode->allocVectors[j*mode->nbEBands+k]);
+ fprintf (file, "\n");
+ }
+ fprintf (file, "};\n");
+ fprintf(file, "#endif\n");
+ fprintf(file, "\n");
+ }
+
+ fprintf(file, "#ifndef DEF_LOGN%d\n", framerate);
+ fprintf(file, "#define DEF_LOGN%d\n", framerate);
+ fprintf (file, "static const opus_int16 logN%d[%d] = {\n", framerate, mode->nbEBands);
+ for (j=0;j<mode->nbEBands;j++)
+ fprintf (file, "%d, ", mode->logN[j]);
+ fprintf (file, "};\n");
+ fprintf(file, "#endif\n");
+ fprintf(file, "\n");
+
+ /* Pulse cache */
+ fprintf(file, "#ifndef DEF_PULSE_CACHE%d\n", mode->Fs/mdctSize);
+ fprintf(file, "#define DEF_PULSE_CACHE%d\n", mode->Fs/mdctSize);
+ fprintf (file, "static const opus_int16 cache_index%d[%d] = {\n", mode->Fs/mdctSize, (mode->maxLM+2)*mode->nbEBands);
+ for (j=0;j<mode->nbEBands*(mode->maxLM+2);j++)
+ fprintf (file, "%d,%c", mode->cache.index[j],(j+16)%15==0?'\n':' ');
+ fprintf (file, "};\n");
+ fprintf (file, "static const unsigned char cache_bits%d[%d] = {\n", mode->Fs/mdctSize, mode->cache.size);
+ for (j=0;j<mode->cache.size;j++)
+ fprintf (file, "%d,%c", mode->cache.bits[j],(j+16)%15==0?'\n':' ');
+ fprintf (file, "};\n");
+ fprintf (file, "static const unsigned char cache_caps%d[%d] = {\n", mode->Fs/mdctSize, (mode->maxLM+1)*2*mode->nbEBands);
+ for (j=0;j<(mode->maxLM+1)*2*mode->nbEBands;j++)
+ fprintf (file, "%d,%c", mode->cache.caps[j],(j+16)%15==0?'\n':' ');
+ fprintf (file, "};\n");
+
+ fprintf(file, "#endif\n");
+ fprintf(file, "\n");
+
+ /* FFT twiddles */
+ fprintf(file, "#ifndef FFT_TWIDDLES%d_%d\n", mode->Fs, mdctSize);
+ fprintf(file, "#define FFT_TWIDDLES%d_%d\n", mode->Fs, mdctSize);
+ fprintf (file, "static const kiss_twiddle_cpx fft_twiddles%d_%d[%d] = {\n",
+ mode->Fs, mdctSize, mode->mdct.kfft[0]->nfft);
+ for (j=0;j<mode->mdct.kfft[0]->nfft;j++)
+ fprintf (file, "{" WORD16 ", " WORD16 "},%c", mode->mdct.kfft[0]->twiddles[j].r, mode->mdct.kfft[0]->twiddles[j].i,(j+3)%2==0?'\n':' ');
+ fprintf (file, "};\n");
+
+#ifdef OVERRIDE_FFT
+ dump_mode_arch(mode);
+#endif
+ /* FFT Bitrev tables */
+ for (k=0;k<=mode->mdct.maxshift;k++)
+ {
+ fprintf(file, "#ifndef FFT_BITREV%d\n", mode->mdct.kfft[k]->nfft);
+ fprintf(file, "#define FFT_BITREV%d\n", mode->mdct.kfft[k]->nfft);
+ fprintf (file, "static const opus_int16 fft_bitrev%d[%d] = {\n",
+ mode->mdct.kfft[k]->nfft, mode->mdct.kfft[k]->nfft);
+ for (j=0;j<mode->mdct.kfft[k]->nfft;j++)
+ fprintf (file, "%d,%c", mode->mdct.kfft[k]->bitrev[j],(j+16)%15==0?'\n':' ');
+ fprintf (file, "};\n");
+
+ fprintf(file, "#endif\n");
+ fprintf(file, "\n");
+ }
+
+ /* FFT States */
+ for (k=0;k<=mode->mdct.maxshift;k++)
+ {
+ fprintf(file, "#ifndef FFT_STATE%d_%d_%d\n", mode->Fs, mdctSize, k);
+ fprintf(file, "#define FFT_STATE%d_%d_%d\n", mode->Fs, mdctSize, k);
+ fprintf (file, "static const kiss_fft_state fft_state%d_%d_%d = {\n",
+ mode->Fs, mdctSize, k);
+ fprintf (file, "%d, /* nfft */\n", mode->mdct.kfft[k]->nfft);
+ fprintf (file, WORD16 ", /* scale */\n", mode->mdct.kfft[k]->scale);
+#ifdef FIXED_POINT
+ fprintf (file, "%d, /* scale_shift */\n", mode->mdct.kfft[k]->scale_shift);
+#endif
+ fprintf (file, "%d, /* shift */\n", mode->mdct.kfft[k]->shift);
+ fprintf (file, "{");
+ for (j=0;j<2*MAXFACTORS;j++)
+ fprintf (file, "%d, ", mode->mdct.kfft[k]->factors[j]);
+ fprintf (file, "}, /* factors */\n");
+ fprintf (file, "fft_bitrev%d, /* bitrev */\n", mode->mdct.kfft[k]->nfft);
+ fprintf (file, "fft_twiddles%d_%d, /* bitrev */\n", mode->Fs, mdctSize);
+
+ fprintf (file, "#ifdef OVERRIDE_FFT\n");
+ fprintf (file, "(arch_fft_state *)&cfg_arch_%d,\n", mode->mdct.kfft[k]->nfft);
+ fprintf (file, "#else\n");
+ fprintf (file, "NULL,\n");
+ fprintf(file, "#endif\n");
+
+ fprintf (file, "};\n");
+
+ fprintf(file, "#endif\n");
+ fprintf(file, "\n");
+ }
+
+ fprintf(file, "#endif\n");
+ fprintf(file, "\n");
+
+ /* MDCT twiddles */
+ mdct_twiddles_size = mode->mdct.n-(mode->mdct.n/2>>mode->mdct.maxshift);
+ fprintf(file, "#ifndef MDCT_TWIDDLES%d\n", mdctSize);
+ fprintf(file, "#define MDCT_TWIDDLES%d\n", mdctSize);
+ fprintf (file, "static const opus_val16 mdct_twiddles%d[%d] = {\n",
+ mdctSize, mdct_twiddles_size);
+ for (j=0;j<mdct_twiddles_size;j++)
+ fprintf (file, WORD16 ",%c", mode->mdct.trig[j],(j+6)%5==0?'\n':' ');
+ fprintf (file, "};\n");
+
+ fprintf(file, "#endif\n");
+ fprintf(file, "\n");
+
+
+ /* Print the actual mode data */
+ fprintf(file, "static const CELTMode mode%d_%d_%d = {\n", mode->Fs, mdctSize, mode->overlap);
+ fprintf(file, INT32 ", /* Fs */\n", mode->Fs);
+ fprintf(file, "%d, /* overlap */\n", mode->overlap);
+ fprintf(file, "%d, /* nbEBands */\n", mode->nbEBands);
+ fprintf(file, "%d, /* effEBands */\n", mode->effEBands);
+ fprintf(file, "{");
+ for (j=0;j<4;j++)
+ fprintf(file, WORD16 ", ", mode->preemph[j]);
+ fprintf(file, "}, /* preemph */\n");
+ if (standard)
+ fprintf(file, "eband5ms, /* eBands */\n");
+ else
+ fprintf(file, "eBands%d_%d, /* eBands */\n", mode->Fs, mdctSize);
+
+ fprintf(file, "%d, /* maxLM */\n", mode->maxLM);
+ fprintf(file, "%d, /* nbShortMdcts */\n", mode->nbShortMdcts);
+ fprintf(file, "%d, /* shortMdctSize */\n", mode->shortMdctSize);
+
+ fprintf(file, "%d, /* nbAllocVectors */\n", mode->nbAllocVectors);
+ if (standard)
+ fprintf(file, "band_allocation, /* allocVectors */\n");
+ else
+ fprintf(file, "allocVectors%d_%d, /* allocVectors */\n", mode->Fs, mdctSize);
+
+ fprintf(file, "logN%d, /* logN */\n", framerate);
+ fprintf(file, "window%d, /* window */\n", mode->overlap);
+ fprintf(file, "{%d, %d, {", mode->mdct.n, mode->mdct.maxshift);
+ for (k=0;k<=mode->mdct.maxshift;k++)
+ fprintf(file, "&fft_state%d_%d_%d, ", mode->Fs, mdctSize, k);
+ fprintf (file, "}, mdct_twiddles%d}, /* mdct */\n", mdctSize);
+
+ fprintf(file, "{%d, cache_index%d, cache_bits%d, cache_caps%d}, /* cache */\n",
+ mode->cache.size, mode->Fs/mdctSize, mode->Fs/mdctSize, mode->Fs/mdctSize);
+ fprintf(file, "};\n");
+ }
+ fprintf(file, "\n");
+ fprintf(file, "/* List of all the available modes */\n");
+ fprintf(file, "#define TOTAL_MODES %d\n", nb_modes);
+ fprintf(file, "static const CELTMode * const static_mode_list[TOTAL_MODES] = {\n");
+ for (i=0;i<nb_modes;i++)
+ {
+ CELTMode *mode = modes[i];
+ int mdctSize;
+ mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
+ fprintf(file, "&mode%d_%d_%d,\n", mode->Fs, mdctSize, mode->overlap);
+ }
+ fprintf(file, "};\n");
+}
+
+void dump_header(FILE *file, CELTMode **modes, int nb_modes)
+{
+ int i;
+ int channels = 0;
+ int frame_size = 0;
+ int overlap = 0;
+ fprintf (file, "/* This header file is generated automatically*/\n");
+ for (i=0;i<nb_modes;i++)
+ {
+ CELTMode *mode = modes[i];
+ if (frame_size==0)
+ frame_size = mode->shortMdctSize*mode->nbShortMdcts;
+ else if (frame_size != mode->shortMdctSize*mode->nbShortMdcts)
+ frame_size = -1;
+ if (overlap==0)
+ overlap = mode->overlap;
+ else if (overlap != mode->overlap)
+ overlap = -1;
+ }
+ if (channels>0)
+ {
+ fprintf (file, "#define CHANNELS(mode) %d\n", channels);
+ if (channels==1)
+ fprintf (file, "#define DISABLE_STEREO\n");
+ }
+ if (frame_size>0)
+ {
+ fprintf (file, "#define FRAMESIZE(mode) %d\n", frame_size);
+ }
+ if (overlap>0)
+ {
+ fprintf (file, "#define OVERLAP(mode) %d\n", overlap);
+ }
+}
+
+#ifdef FIXED_POINT
+#define BASENAME "static_modes_fixed"
+#else
+#define BASENAME "static_modes_float"
+#endif
+
+int main(int argc, char **argv)
+{
+ int i, nb;
+ FILE *file;
+ CELTMode **m;
+ if (argc%2 != 1 || argc<3)
+ {
+ fprintf (stderr, "Usage: %s rate frame_size [rate frame_size] [rate frame_size]...\n",argv[0]);
+ return 1;
+ }
+ nb = (argc-1)/2;
+ m = malloc(nb*sizeof(CELTMode*));
+ for (i=0;i<nb;i++)
+ {
+ int Fs, frame;
+ Fs = atoi(argv[2*i+1]);
+ frame = atoi(argv[2*i+2]);
+ m[i] = opus_custom_mode_create(Fs, frame, NULL);
+ if (m[i]==NULL)
+ {
+ fprintf(stderr,"Error creating mode with Fs=%s, frame_size=%s\n",
+ argv[2*i+1],argv[2*i+2]);
+ return EXIT_FAILURE;
+ }
+ }
+ file = fopen(BASENAME ".h", "w");
+#ifdef OVERRIDE_FFT
+ dump_modes_arch_init(m, nb);
+#endif
+ dump_modes(file, m, nb);
+ fclose(file);
+#ifdef OVERRIDE_FFT
+ dump_modes_arch_finalize();
+#endif
+ for (i=0;i<nb;i++)
+ opus_custom_mode_destroy(m[i]);
+ free(m);
+ return 0;
+}
diff --git a/celt/dump_modes/dump_modes_arch.h b/celt/dump_modes/dump_modes_arch.h
new file mode 100644
index 00000000..cc0d4be1
--- /dev/null
+++ b/celt/dump_modes/dump_modes_arch.h
@@ -0,0 +1,45 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DUMP_MODE_ARCH_H
+#define DUMP_MODE_ARCH_H
+
+void dump_modes_arch_init();
+void dump_mode_arch(CELTMode *mode);
+void dump_modes_arch_finalize();
+
+#if !defined(FIXED_POINT)
+#define ARM_NE10_ARCH_FILE_NAME "static_modes_float_arm_ne10.h"
+#else
+#define ARM_NE10_ARCH_FILE_NAME "static_modes_fixed_arm_ne10.h"
+#endif
+
+#if defined(HAVE_ARM_NE10)
+#define OVERRIDE_FFT (1)
+#endif
+
+#endif
diff --git a/celt/dump_modes/dump_modes_arm_ne10.c b/celt/dump_modes/dump_modes_arm_ne10.c
new file mode 100644
index 00000000..47578cda
--- /dev/null
+++ b/celt/dump_modes/dump_modes_arm_ne10.c
@@ -0,0 +1,152 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(HAVE_CONFIG_H)
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "modes.h"
+#include "dump_modes_arch.h"
+#include <NE10_dsp.h>
+
+#if !defined(FIXED_POINT)
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
+# define NE10_FFT_CPX_TYPE_T_STR "ne10_fft_cpx_float32_t"
+# define NE10_FFT_STATE_TYPE_T_STR "ne10_fft_state_float32_t"
+#else
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
+# define NE10_FFT_CPX_TYPE_T_STR "ne10_fft_cpx_int32_t"
+# define NE10_FFT_STATE_TYPE_T_STR "ne10_fft_state_int32_t"
+#endif
+
+static FILE *file;
+
+void dump_modes_arch_init(CELTMode **modes, int nb_modes)
+{
+ int i;
+
+ file = fopen(ARM_NE10_ARCH_FILE_NAME, "w");
+ fprintf(file, "/* The contents of this file was automatically generated by\n");
+ fprintf(file, " * dump_mode_arm_ne10.c with arguments:");
+ for (i=0;i<nb_modes;i++)
+ {
+ CELTMode *mode = modes[i];
+ fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts);
+ }
+ fprintf(file, "\n * It contains static definitions for some pre-defined modes. */\n");
+ fprintf(file, "#include <NE10_init.h>\n\n");
+}
+
+void dump_modes_arch_finalize()
+{
+ fclose(file);
+}
+
+void dump_mode_arch(CELTMode *mode)
+{
+ int k, j;
+ int mdctSize;
+
+ mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
+
+ fprintf(file, "#ifndef NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
+ fprintf(file, "#define NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
+ /* cfg->factors */
+ for(k=0;k<=mode->mdct.maxshift;k++) {
+ NE10_FFT_CFG_TYPE_T cfg;
+ cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
+ if (!cfg)
+ continue;
+ fprintf(file, "static const ne10_int32_t ne10_factors_%d[%d] = {\n",
+ mode->mdct.kfft[k]->nfft, (NE10_MAXFACTORS * 2));
+ for(j=0;j<(NE10_MAXFACTORS * 2);j++) {
+ fprintf(file, "%d,%c", cfg->factors[j],(j+16)%15==0?'\n':' ');
+ }
+ fprintf (file, "};\n");
+ }
+
+ /* cfg->twiddles */
+ for(k=0;k<=mode->mdct.maxshift;k++) {
+ NE10_FFT_CFG_TYPE_T cfg;
+ cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
+ if (!cfg)
+ continue;
+ fprintf(file, "static const %s ne10_twiddles_%d[%d] = {\n",
+ NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft,
+ mode->mdct.kfft[k]->nfft);
+ for(j=0;j<mode->mdct.kfft[k]->nfft;j++) {
+#if !defined(FIXED_POINT)
+ fprintf(file, "{%#0.8gf,%#0.8gf},%c",
+ cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
+#else
+ fprintf(file, "{%d,%d},%c",
+ cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
+#endif
+ }
+ fprintf (file, "};\n");
+ }
+
+ for(k=0;k<=mode->mdct.maxshift;k++) {
+ NE10_FFT_CFG_TYPE_T cfg;
+ cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
+ if (!cfg) {
+ fprintf(file, "/* Ne10 does not support scaled FFT for length = %d */\n",
+ mode->mdct.kfft[k]->nfft);
+ fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n", mode->mdct.kfft[k]->nfft);
+ fprintf(file, "0,\n");
+ fprintf(file, "NULL\n");
+ fprintf(file, "};\n");
+ continue;
+ }
+ fprintf(file, "static const %s %s_%d = {\n", NE10_FFT_STATE_TYPE_T_STR,
+ NE10_FFT_STATE_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
+ fprintf(file, "%d,\n", cfg->nfft);
+ fprintf(file, "(ne10_int32_t *)ne10_factors_%d,\n", mode->mdct.kfft[k]->nfft);
+ fprintf(file, "(%s *)ne10_twiddles_%d,\n",
+ NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
+ fprintf(file, "NULL,\n"); /* buffer */
+ fprintf(file, "(%s *)&ne10_twiddles_%d[%d],\n",
+ NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft, cfg->nfft);
+#if !defined(FIXED_POINT)
+ fprintf(file, "/* is_forward_scaled = true */\n");
+ fprintf(file, "(ne10_int32_t) 1,\n");
+ fprintf(file, "/* is_backward_scaled = false */\n");
+ fprintf(file, "(ne10_int32_t) 0,\n");
+#endif
+ fprintf(file, "};\n");
+
+ fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n",
+ mode->mdct.kfft[k]->nfft);
+ fprintf(file, "1,\n");
+ fprintf(file, "(void *)&%s_%d,\n",
+ NE10_FFT_STATE_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
+ fprintf(file, "};\n\n");
+ }
+ fprintf(file, "#endif /* end NE10_FFT_PARAMS%d_%d */\n", mode->Fs, mdctSize);
+}
diff --git a/celt/fixed_c5x.h b/celt/fixed_c5x.h
new file mode 100644
index 00000000..ea95a998
--- /dev/null
+++ b/celt/fixed_c5x.h
@@ -0,0 +1,79 @@
+/* Copyright (C) 2003 Jean-Marc Valin */
+/**
+ @file fixed_c5x.h
+ @brief Fixed-point operations for the TI C5x DSP family
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_C5X_H
+#define FIXED_C5X_H
+
+#include "dsplib.h"
+
+#undef IMUL32
+static OPUS_INLINE long IMUL32(long i, long j)
+{
+ long ac0, ac1;
+ ac0 = _lmpy(i>>16,j);
+ ac1 = ac0 + _lmpy(i,j>>16);
+ return _lmpyu(i,j) + (ac1<<16);
+}
+
+#undef MAX16
+#define MAX16(a,b) _max(a,b)
+
+#undef MIN16
+#define MIN16(a,b) _min(a,b)
+
+#undef MAX32
+#define MAX32(a,b) _lmax(a,b)
+
+#undef MIN32
+#define MIN32(a,b) _lmin(a,b)
+
+#undef VSHR32
+#define VSHR32(a, shift) _lshl(a,-(shift))
+
+#undef MULT16_16_Q15
+#define MULT16_16_Q15(a,b) (_smpy(a,b))
+
+#undef MULT16_16SU
+#define MULT16_16SU(a,b) _lmpysu(a,b)
+
+#undef MULT_16_16
+#define MULT_16_16(a,b) _lmpy(a,b)
+
+/* FIXME: This is technically incorrect and is bound to cause problems. Is there any cleaner solution? */
+#undef MULT16_32_Q15
+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),(b)),15))
+
+#define celt_ilog2(x) (30 - _lnorm(x))
+#define OVERRIDE_CELT_ILOG2
+
+#define celt_maxabs16(x, len) MAX32(EXTEND32(maxval((DATA *)x, len)),-EXTEND32(minval((DATA *)x, len)))
+#define OVERRIDE_CELT_MAXABS16
+
+#endif /* FIXED_C5X_H */
diff --git a/celt/fixed_c6x.h b/celt/fixed_c6x.h
new file mode 100644
index 00000000..bb6ad927
--- /dev/null
+++ b/celt/fixed_c6x.h
@@ -0,0 +1,70 @@
+/* Copyright (C) 2008 CSIRO */
+/**
+ @file fixed_c6x.h
+ @brief Fixed-point operations for the TI C6x DSP family
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_C6X_H
+#define FIXED_C6X_H
+
+#undef MULT16_16SU
+#define MULT16_16SU(a,b) _mpysu(a,b)
+
+#undef MULT_16_16
+#define MULT_16_16(a,b) _mpy(a,b)
+
+#define celt_ilog2(x) (30 - _norm(x))
+#define OVERRIDE_CELT_ILOG2
+
+#undef MULT16_32_Q15
+#define MULT16_32_Q15(a,b) (_mpylill(a, b) >> 15)
+
+#if 0
+#include "dsplib.h"
+
+#undef MAX16
+#define MAX16(a,b) _max(a,b)
+
+#undef MIN16
+#define MIN16(a,b) _min(a,b)
+
+#undef MAX32
+#define MAX32(a,b) _lmax(a,b)
+
+#undef MIN32
+#define MIN32(a,b) _lmin(a,b)
+
+#undef VSHR32
+#define VSHR32(a, shift) _lshl(a,-(shift))
+
+#undef MULT16_16_Q15
+#define MULT16_16_Q15(a,b) (_smpy(a,b))
+
+#define celt_maxabs16(x, len) MAX32(EXTEND32(maxval((DATA *)x, len)),-EXTEND32(minval((DATA *)x, len)))
+#define OVERRIDE_CELT_MAXABS16
+
+#endif /* FIXED_C6X_H */