aboutsummaryrefslogtreecommitdiff
path: root/celt/arm
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2014-01-21 11:07:57 -0500
committerJean-Marc Valin <jmvalin@jmvalin.ca>2014-01-21 11:07:57 -0500
commit17b197837fb5bf6361e4cae7fbe0d0163e74b8a8 (patch)
tree3f253af8b9e9cb8d872db33a8ae58eb034d7fec1 /celt/arm
parent29354ff6e05c9ead9454981a7404a9b9ea203d2e (diff)
downloadlibopus-17b197837fb5bf6361e4cae7fbe0d0163e74b8a8.tar.gz
Speed up the comb filter on ARM by using MAC16_32_Q16()
Diffstat (limited to 'celt/arm')
-rw-r--r--celt/arm/fixed_armv4.h4
-rw-r--r--celt/arm/fixed_armv5e.h17
2 files changed, 21 insertions, 0 deletions
diff --git a/celt/arm/fixed_armv4.h b/celt/arm/fixed_armv4.h
index b690bc8c..efb3b189 100644
--- a/celt/arm/fixed_armv4.h
+++ b/celt/arm/fixed_armv4.h
@@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
#undef MAC16_32_Q15
#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
+/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
+ Result fits in 32 bits. */
+#undef MAC16_32_Q16
+#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b))
/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
#undef MULT32_32_Q31
diff --git a/celt/arm/fixed_armv5e.h b/celt/arm/fixed_armv5e.h
index 1194a7d3..36d6bed0 100644
--- a/celt/arm/fixed_armv5e.h
+++ b/celt/arm/fixed_armv5e.h
@@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
}
#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
+/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
+ Result fits in 32 bits. */
+#undef MAC16_32_Q16
+static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a,
+ opus_val32 b)
+{
+ int res;
+ __asm__(
+ "#MAC16_32_Q16\n\t"
+ "smlawb %0, %1, %2, %3;\n"
+ : "=r"(res)
+ : "r"(b), "r"(a), "r"(c)
+ );
+ return res;
+}
+#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b))
+
/** 16x16 multiply-add where the result fits in 32 bits */
#undef MAC16_16
static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,