aboutsummaryrefslogtreecommitdiff
path: root/celt/x86
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2013-06-05 18:53:48 -0400
committerJean-Marc Valin <jmvalin@jmvalin.ca>2013-06-05 18:56:07 -0400
commita092aa8f80be25f62b5b62032d996f26fa188ed8 (patch)
treef431d6d57c04636beb05305ce4b77349f552059a /celt/x86
parentcd4c8249bc0e091789495a09b8942d28b687273c (diff)
downloadlibopus-a092aa8f80be25f62b5b62032d996f26fa188ed8.tar.gz
Adds SSE support (only xcorr_kernel() for now)
There's no CPU detection for it, it only gets enabled by __SSE__ which gcc (other compilers?) defines automatically when supported by -march=, which means at least all x86-64. For ia32, the user needs to enable it in the CFLAGS.
Diffstat (limited to 'celt/x86')
-rw-r--r--celt/x86/pitch_sse.h97
1 files changed, 97 insertions, 0 deletions
diff --git a/celt/x86/pitch_sse.h b/celt/x86/pitch_sse.h
new file mode 100644
index 00000000..6f5a2d1b
--- /dev/null
+++ b/celt/x86/pitch_sse.h
@@ -0,0 +1,97 @@
+/* Copyright (c) 2013 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/**
+ @file pitch_sse.h
+ @brief Pitch analysis
+ */
+
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_SSE_H
+#define PITCH_SSE_H
+
+#include <xmmintrin.h>
+#include "arch.h"
+
+#define OVERRIDE_XCORR_KERNEL
+
+static inline void xcorr_kernel(const opus_val16 * _x, const opus_val16 * _y, opus_val32 _sum[4], int len)
+{
+ int j;
+ __m128 sum;
+ __m128 x;
+ __m128 y;
+ __m128 y2;
+ __m128 y1;
+ __m128 y3;
+ __m128 tmp;
+ sum = _mm_loadu_ps(_sum);
+
+ x = _mm_loadu_ps(_x);
+ y = _mm_loadu_ps(_y);
+ y1 = _mm_loadu_ps(_y+1);
+ for (j=0;j<len-3;j+=4)
+ {
+ _x+=4;
+ _y+=4;
+ y2 = _mm_loadu_ps(_y);
+ y3 = _mm_loadu_ps(_y+1);
+ tmp = _mm_shuffle_ps(x, x, 0x00);
+ sum = _mm_add_ps(sum, _mm_mul_ps(tmp, y));
+ tmp = _mm_shuffle_ps(x, x, 0x55);
+ sum = _mm_add_ps(sum, _mm_mul_ps(tmp, y1));
+ tmp = _mm_shuffle_ps(x, x, 0xaa);
+ y = _mm_shuffle_ps(y, y2, 0x4e);
+ sum = _mm_add_ps(sum, _mm_mul_ps(tmp, y));
+ tmp = _mm_shuffle_ps(x, x, 0xff);
+ y = _mm_shuffle_ps(y1, y3, 0x4e);
+ sum = _mm_add_ps(sum, _mm_mul_ps(tmp, y));
+ x = _mm_loadu_ps(_x);
+ y = y2;
+ y1 = y3;
+ }
+ _y++;
+ if (j++<len)
+ {
+ tmp = _mm_shuffle_ps(x, x, 0x00);
+ sum = _mm_add_ps(sum, _mm_mul_ps(tmp, y));
+ }
+ if (j++<len)
+ {
+ tmp = _mm_shuffle_ps(x, x, 0x55);
+ y = _mm_loadu_ps(_y++);
+ sum = _mm_add_ps(sum, _mm_mul_ps(tmp, y));
+ }
+ if (j++<len)
+ {
+ tmp = _mm_shuffle_ps(x, x, 0xaa);
+ y = _mm_loadu_ps(_y++);
+ sum = _mm_add_ps(sum, _mm_mul_ps(tmp, y));
+ }
+ _mm_storeu_ps(_sum, sum);
+}
+
+#endif