From cc12ba55d31f13a231da76d78b9764696217d6c6 Mon Sep 17 00:00:00 2001 From: andreas128 Date: Mon, 21 Aug 2017 12:01:36 +0200 Subject: Add loop unrolling for MemlessPoly.cpp --- src/MemlessPoly.cpp | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/MemlessPoly.cpp b/src/MemlessPoly.cpp index dc0dd31..2a16ebe 100644 --- a/src/MemlessPoly.cpp +++ b/src/MemlessPoly.cpp @@ -129,7 +129,7 @@ static void apply_coeff( const complexf *__restrict in, size_t start, size_t stop, complexf *__restrict out) { - for (size_t i = start; i < stop; i++) { + for (size_t i = start; i < stop; i+=2) { /* Implement a1*x + a3*x*|x|^2 + a5*x*|x|^4 + a5*x*|x|^4 + a5*x*|x|^4 + a7*x*|x|^6 @@ -142,20 +142,38 @@ static void apply_coeff( // Complex polynomial, all operations are on complex values. // Usually this is the representation we use when speaking // about the real-valued passband signal that the PA receives. - float in_mag = std::abs(in[i]); - float in_2 = in_mag * in_mag; - float in_4 = in_2 * in_2; - float in_6 = in_2 * in_4; - float in_8 = in_4 * in_4; - float in_10 = in_6 * in_4; - out[i] = in[i] * + float in_1_mag = std::abs(in[i]); + float in_1_2 = in_1_mag * in_1_mag; + float in_1_4 = in_1_2 * in_1_2; + float in_1_6 = in_1_2 * in_1_4; + float in_1_8 = in_1_4 * in_1_4; + float in_1_10 = in_1_6 * in_1_4; + + float in_2_mag = std::abs(in[i]); + float in_2_2 = in_2_mag * in_2_mag; + float in_2_4 = in_2_2 * in_2_2; + float in_2_6 = in_2_2 * in_2_4; + float in_2_8 = in_2_4 * in_2_4; + float in_2_10 = in_2_6 * in_2_4; + + out[i+0] = in[i+0] * ( coefs[0] + - coefs[1] * in_2 + - coefs[2] * in_4 + - coefs[3] * in_6 + - coefs[4] * in_8 + - coefs[5] * in_10 + coefs[1] * in_1_2 + + coefs[2] * in_1_4 + + coefs[3] * in_1_6 + + coefs[4] * in_1_8 + + coefs[5] * in_1_10 + ); + + out[i+1] = in[i+1] * + ( + coefs[0] + + coefs[1] * in_2_2 + + coefs[2] * in_2_4 + + coefs[3] * in_2_6 + + coefs[4] * in_2_8 + + coefs[5] * in_2_10 ); } } -- cgit v1.2.3