aboutsummaryrefslogtreecommitdiff
path: root/silk
diff options
context:
space:
mode:
authorTimothy B. Terriberry <tterribe@xiph.org>2013-05-20 15:29:04 -0700
committerTimothy B. Terriberry <tterribe@xiph.org>2013-05-20 23:15:15 -0700
commitb518b56fe11bf53f88fe30d57ea9d668337983a9 (patch)
tree1cf7d43f65b1122942d5175df40b01c78ac31bc9 /silk
parent9880c4cdebf7e4db5616546e801749d36fdd7202 (diff)
downloadlibopus-b518b56fe11bf53f88fe30d57ea9d668337983a9.tar.gz
Clean up register constraints.
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0068b/CIHBJEHG.html says that "Rd cannot be the same as Rm." http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0068b/CIHBJEHG.html says that "RdLo, RdHi, and Rm must all be different registers." This means that some of the early clobbers I removed really should have been there (to prevent aliasing Rd, RdLo, or RdHi with Rm). It also means that we should reverse some of the operands in the FFT's complex multiplies. This should only affect the ARMv4 optimizations. Thanks to Nils Wallménius for the report. While we're here, audit the commutative pair flags again, since I screwed up at least one of them, and eliminate some dead code.
Diffstat (limited to 'silk')
-rw-r--r--silk/SigProc_FIX_armv4.h2
-rw-r--r--silk/SigProc_FIX_armv5e.h4
-rw-r--r--silk/macros_armv4.h14
-rw-r--r--silk/macros_armv5e.h2
4 files changed, 11 insertions, 11 deletions
diff --git a/silk/SigProc_FIX_armv4.h b/silk/SigProc_FIX_armv4.h
index ea372020..d69573e3 100644
--- a/silk/SigProc_FIX_armv4.h
+++ b/silk/SigProc_FIX_armv4.h
@@ -37,7 +37,7 @@ static inline opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
__asm__(
"#silk_MLA\n\t"
"mla %0, %1, %2, %3\n\t"
- : "=r"(res)
+ : "=&r"(res)
: "r"(b), "r"(c), "r"(a)
);
return res;
diff --git a/silk/SigProc_FIX_armv5e.h b/silk/SigProc_FIX_armv5e.h
index 804e2bc5..81a6324f 100644
--- a/silk/SigProc_FIX_armv5e.h
+++ b/silk/SigProc_FIX_armv5e.h
@@ -37,7 +37,7 @@ static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
"#silk_SMULTT\n\t"
"smultt %0, %1, %2\n\t"
: "=r"(res)
- : "r"(a), "r"(b)
+ : "%r"(a), "r"(b)
);
return res;
}
@@ -52,7 +52,7 @@ static inline opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
"#silk_SMLATT\n\t"
"smlatt %0, %1, %2, %3\n\t"
: "=r"(res)
- : "r"(b), "r"(c), "r"(a)
+ : "%r"(b), "r"(c), "r"(a)
);
return res;
}
diff --git a/silk/macros_armv4.h b/silk/macros_armv4.h
index e5dfe69c..58df6c2e 100644
--- a/silk/macros_armv4.h
+++ b/silk/macros_armv4.h
@@ -37,7 +37,7 @@ static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
__asm__(
"#silk_SMULWB\n\t"
"smull %0, %1, %2, %3\n\t"
- : "=r"(rd_lo), "=r"(rd_hi)
+ : "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(a), "r"(b<<16)
);
return rd_hi;
@@ -57,7 +57,7 @@ static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
__asm__(
"#silk_SMULWT\n\t"
"smull %0, %1, %2, %3\n\t"
- : "=r"(rd_lo), "=r"(rd_hi)
+ : "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(a), "r"(b&~0xFFFF)
);
return rd_hi;
@@ -77,10 +77,10 @@ static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
__asm__(
"#silk_SMULWW\n\t"
"smull %0, %1, %2, %3\n\t"
- : "=r"(rd_lo), "=r"(rd_hi)
+ : "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(a), "r"(b)
);
- return (rd_lo>>16)|(rd_hi<<16);
+ return (rd_hi<<16)+(rd_lo>>16);
}
#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))
@@ -91,12 +91,12 @@ static inline opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
unsigned rd_lo;
int rd_hi;
__asm__(
- "#silk_SMULWW\n\t"
+ "#silk_SMLAWW\n\t"
"smull %0, %1, %2, %3\n\t"
- : "=r"(rd_lo), "=r"(rd_hi)
+ : "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(b), "r"(c)
);
- return a+((rd_lo>>16)|(rd_hi<<16));
+ return a+(rd_hi<<16)+(rd_lo>>16);
}
#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c))
diff --git a/silk/macros_armv5e.h b/silk/macros_armv5e.h
index a86586b3..63b1e30f 100644
--- a/silk/macros_armv5e.h
+++ b/silk/macros_armv5e.h
@@ -203,7 +203,7 @@ static inline opus_int32 silk_CLZ32_armv5(opus_int32 in32)
__asm__(
"#silk_CLZ32\n\t"
"clz %0, %1\n\t"
- : "=&r"(res)
+ : "=r"(res)
: "r"(in32)
);
return res;