summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorandreas128 <Andreas>2017-08-21 12:01:36 +0200
committerandreas128 <Andreas>2017-08-21 12:01:36 +0200
commitcc12ba55d31f13a231da76d78b9764696217d6c6 (patch)
tree5baa95b3250ed572f4ab2f882a34ab99a85f8b96 /src
parent83497fbdaf6486571ed70b41dae42f3c13432c1c (diff)
downloaddabmod-cc12ba55d31f13a231da76d78b9764696217d6c6.tar.gz
dabmod-cc12ba55d31f13a231da76d78b9764696217d6c6.tar.bz2
dabmod-cc12ba55d31f13a231da76d78b9764696217d6c6.zip
Add loop unrolling for MemlessPoly.cpp
Diffstat (limited to 'src')
-rw-r--r--src/MemlessPoly.cpp44
1 files changed, 31 insertions, 13 deletions
diff --git a/src/MemlessPoly.cpp b/src/MemlessPoly.cpp
index dc0dd31..2a16ebe 100644
--- a/src/MemlessPoly.cpp
+++ b/src/MemlessPoly.cpp
@@ -129,7 +129,7 @@ static void apply_coeff(
const complexf *__restrict in, size_t start, size_t stop,
complexf *__restrict out)
{
- for (size_t i = start; i < stop; i++) {
+ for (size_t i = start; i < stop; i+=2) {
/* Implement
a1*x + a3*x*|x|^2 + a5*x*|x|^4 + a5*x*|x|^4 + a5*x*|x|^4 + a7*x*|x|^6
@@ -142,20 +142,38 @@ static void apply_coeff(
// Complex polynomial, all operations are on complex values.
// Usually this is the representation we use when speaking
// about the real-valued passband signal that the PA receives.
- float in_mag = std::abs(in[i]);
- float in_2 = in_mag * in_mag;
- float in_4 = in_2 * in_2;
- float in_6 = in_2 * in_4;
- float in_8 = in_4 * in_4;
- float in_10 = in_6 * in_4;
- out[i] = in[i] *
+ float in_1_mag = std::abs(in[i]);
+ float in_1_2 = in_1_mag * in_1_mag;
+ float in_1_4 = in_1_2 * in_1_2;
+ float in_1_6 = in_1_2 * in_1_4;
+ float in_1_8 = in_1_4 * in_1_4;
+ float in_1_10 = in_1_6 * in_1_4;
+
+ float in_2_mag = std::abs(in[i]);
+ float in_2_2 = in_2_mag * in_2_mag;
+ float in_2_4 = in_2_2 * in_2_2;
+ float in_2_6 = in_2_2 * in_2_4;
+ float in_2_8 = in_2_4 * in_2_4;
+ float in_2_10 = in_2_6 * in_2_4;
+
+ out[i+0] = in[i+0] *
(
coefs[0] +
- coefs[1] * in_2 +
- coefs[2] * in_4 +
- coefs[3] * in_6 +
- coefs[4] * in_8 +
- coefs[5] * in_10
+ coefs[1] * in_1_2 +
+ coefs[2] * in_1_4 +
+ coefs[3] * in_1_6 +
+ coefs[4] * in_1_8 +
+ coefs[5] * in_1_10
+ );
+
+ out[i+1] = in[i+1] *
+ (
+ coefs[0] +
+ coefs[1] * in_2_2 +
+ coefs[2] * in_2_4 +
+ coefs[3] * in_2_6 +
+ coefs[4] * in_2_8 +
+ coefs[5] * in_2_10
);
}
}