improve handling at end of bufferandroid-mainline-10.0.0_r9 android-mainline-10.0.0_r7 android-mainline-10.0.0_r5 android-mainline-10.0.0_r4 android-mainline-10.0.0_r10 android-10.0.0_r45 android-10.0.0_r44 android-10.0.0_r43 android-10.0.0_r42 android-10.0.0_r41 android-10.0.0_r40 android-10.0.0_r39 android-10.0.0_r38 android-10.0.0_r37 android-10.0.0_r36 android-10.0.0_r35 android-10.0.0_r34 android-10.0.0_r33 android-10.0.0_r32 android-10.0.0_r31 android-10.0.0_r30 android10-qpr3-s1-release android10-qpr3-release android10-qpr2-s4-release android10-qpr2-s3-release android10-qpr2-s2-release android10-qpr2-s1-release android10-qpr2-release android10-qpr1-mainline-release android10-mainline-media-release android10-d4-s1-release android10-d4-release

a prior change reduced iterations through the input buffer to avoid the NEON operations from overrunning the end of the locally allocated buffer. While avoiding the overrun, it generated bad results. Here we instead extend the locally allocated buffers enough that the original iteration count won't overrun. Some pre-existing bit-exact issues remain. Bug: 136616344 Test: CTS + bit-exact cross-checks. (cherry picked from commit aae866aed579da4e1c3299a1e9b94a1713a0decb) Merged-In: Ifb790a7d1d09a4ce7da900b43e3fa1f7ab01ac53 Change-Id: Ia4a94c89979d6b3b0ddead135036aec40259f53a
author: Ray Essick <essick@google.com> 2019-09-04 09:09:52 -0700
committer: Ray Essick <essick@google.com> 2019-09-04 18:41:08 +0000
commit: 5d2e0a05b7df5aca4525cc547cd8538f3339b526 (patch)
tree: 92b213cf1c81a89895c63d1840c68c2e14208802
parent: 385b87c83fa3b6be4016babfce40c63fc359e28a (diff)
download: libopus-5d2e0a05b7df5aca4525cc547cd8538f3339b526.tar.gz
1 files changed, 8 insertions, 3 deletions
diff --git a/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c b/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c
index ee06f986..6f3be025 100644
--- a/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c
+++ b/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c
@@ -84,7 +84,9 @@ void silk_warped_autocorrelation_FIX_neon(
         silk_assert( ( order & 1 ) == 0 );
         silk_assert( 2 * QS - QC >= 0 );
 
-        ALLOC( input_QST, length + 2 * MAX_SHAPE_LPC_ORDER, opus_int32 );
+        /* The additional +4 is to ensure a later vld1q_s32 call does not overflow.               */
+        /* Strictly, only +3 is needed but +4 simplifies initialization using the 4x32 neon load. */
+        ALLOC( input_QST, length + 2 * MAX_SHAPE_LPC_ORDER + 4, opus_int32 );
 
         input_QS = input_QST;
         /* input_QS has zero paddings in the beginning and end. */
@@ -121,6 +123,8 @@ void silk_warped_autocorrelation_FIX_neon(
         vst1q_s32( input_QS, vdupq_n_s32( 0 ) );
         input_QS += 4;
         vst1q_s32( input_QS, vdupq_n_s32( 0 ) );
+        input_QS += 4;
+        vst1q_s32( input_QS, vdupq_n_s32( 0 ) );
         input_QS = input_QST + MAX_SHAPE_LPC_ORDER - orderT;
 
         /* The following loop runs ( length + order ) times, with ( order ) extra epilogues.                  */
@@ -153,7 +157,8 @@ void silk_warped_autocorrelation_FIX_neon(
             opus_int o = orderT;
             int32x4_t state_QS_s32x4[ 3 ][ 2 ];
 
-            ALLOC( state, length + orderT, opus_int32 );
+            /* The additional +4 is to ensure a later vld1q_s32 call does not overflow. */
+            ALLOC( state, length + order + 4, opus_int32 );
             state_QS_s32x4[ 2 ][ 1 ] = vdupq_n_s32( 0 );
 
             /* Calculate 8 taps of all inputs in each loop. */
@@ -172,7 +177,7 @@ void silk_warped_autocorrelation_FIX_neon(
                     state_QS_s32x4[ 0 ][ 1 ] = calc_state( state_QS_s32x4[ 0 ][ 1 ], state_QS_s32x4[ 2 ][ 1 ], state_QS_s32x4[ 1 ][ 1 ], warping_Q16_s32x4 );
                     state_QS_s32x4[ 1 ][ 0 ] = state_QS_s32x4[ 2 ][ 0 ];
                     state_QS_s32x4[ 1 ][ 1 ] = state_QS_s32x4[ 2 ][ 1 ];
-                } while( ++n < ( length + order - 3) );
+                } while( ++n < ( length + order ) );
                 in = state;
                 o -= 8;
             } while( o > 4 );
author	Ray Essick <essick@google.com>	2019-09-04 09:09:52 -0700
committer	Ray Essick <essick@google.com>	2019-09-04 18:41:08 +0000
commit	5d2e0a05b7df5aca4525cc547cd8538f3339b526 (patch)
tree	92b213cf1c81a89895c63d1840c68c2e14208802
parent	385b87c83fa3b6be4016babfce40c63fc359e28a (diff)
download	libopus-5d2e0a05b7df5aca4525cc547cd8538f3339b526.tar.gz