diff options
author | andreas128 <Andreas> | 2017-08-21 12:01:36 +0200 |
---|---|---|
committer | andreas128 <Andreas> | 2017-08-21 12:01:36 +0200 |
commit | cc12ba55d31f13a231da76d78b9764696217d6c6 (patch) | |
tree | 5baa95b3250ed572f4ab2f882a34ab99a85f8b96 | |
parent | 83497fbdaf6486571ed70b41dae42f3c13432c1c (diff) | |
download | dabmod-cc12ba55d31f13a231da76d78b9764696217d6c6.tar.gz dabmod-cc12ba55d31f13a231da76d78b9764696217d6c6.tar.bz2 dabmod-cc12ba55d31f13a231da76d78b9764696217d6c6.zip |
Add loop unrolling for MemlessPoly.cpp
-rw-r--r-- | src/MemlessPoly.cpp | 44 |
1 files changed, 31 insertions, 13 deletions
diff --git a/src/MemlessPoly.cpp b/src/MemlessPoly.cpp index dc0dd31..2a16ebe 100644 --- a/src/MemlessPoly.cpp +++ b/src/MemlessPoly.cpp @@ -129,7 +129,7 @@ static void apply_coeff( const complexf *__restrict in, size_t start, size_t stop, complexf *__restrict out) { - for (size_t i = start; i < stop; i++) { + for (size_t i = start; i < stop; i+=2) { /* Implement a1*x + a3*x*|x|^2 + a5*x*|x|^4 + a5*x*|x|^4 + a5*x*|x|^4 + a7*x*|x|^6 @@ -142,20 +142,38 @@ static void apply_coeff( // Complex polynomial, all operations are on complex values. // Usually this is the representation we use when speaking // about the real-valued passband signal that the PA receives. - float in_mag = std::abs(in[i]); - float in_2 = in_mag * in_mag; - float in_4 = in_2 * in_2; - float in_6 = in_2 * in_4; - float in_8 = in_4 * in_4; - float in_10 = in_6 * in_4; - out[i] = in[i] * + float in_1_mag = std::abs(in[i]); + float in_1_2 = in_1_mag * in_1_mag; + float in_1_4 = in_1_2 * in_1_2; + float in_1_6 = in_1_2 * in_1_4; + float in_1_8 = in_1_4 * in_1_4; + float in_1_10 = in_1_6 * in_1_4; + + float in_2_mag = std::abs(in[i]); + float in_2_2 = in_2_mag * in_2_mag; + float in_2_4 = in_2_2 * in_2_2; + float in_2_6 = in_2_2 * in_2_4; + float in_2_8 = in_2_4 * in_2_4; + float in_2_10 = in_2_6 * in_2_4; + + out[i+0] = in[i+0] * ( coefs[0] + - coefs[1] * in_2 + - coefs[2] * in_4 + - coefs[3] * in_6 + - coefs[4] * in_8 + - coefs[5] * in_10 + coefs[1] * in_1_2 + + coefs[2] * in_1_4 + + coefs[3] * in_1_6 + + coefs[4] * in_1_8 + + coefs[5] * in_1_10 + ); + + out[i+1] = in[i+1] * + ( + coefs[0] + + coefs[1] * in_2_2 + + coefs[2] * in_2_4 + + coefs[3] * in_2_6 + + coefs[4] * in_2_8 + + coefs[5] * in_2_10 ); } } |