diff options
Diffstat (limited to 'src/MemlessPoly.cpp')
-rw-r--r-- | src/MemlessPoly.cpp | 244 |
1 files changed, 182 insertions, 62 deletions
diff --git a/src/MemlessPoly.cpp b/src/MemlessPoly.cpp index 7e074eb..d7f9a96 100644 --- a/src/MemlessPoly.cpp +++ b/src/MemlessPoly.cpp @@ -29,6 +29,8 @@ along with ODR-DabMod. If not, see <http://www.gnu.org/licenses/>. */ +#pragma GCC optimize ("O3") + #include "MemlessPoly.h" #include "PcDebug.h" #include "Utils.h" @@ -36,31 +38,60 @@ #include <stdio.h> #include <stdexcept> +#include <future> #include <array> #include <iostream> #include <fstream> #include <memory> +#include <complex> using namespace std; +// Number of AM/AM coefs, identical to number of AM/PM coefs +#define NUM_COEFS 5 -// By default the signal is unchanged -static const std::array<float, 8> default_coefficients({ - 1, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0 - }); - - -MemlessPoly::MemlessPoly(const std::string& coefs_file) : +MemlessPoly::MemlessPoly(const std::string& coefs_file, unsigned int num_threads) : PipelinedModCodec(), RemoteControllable("memlesspoly"), - m_coefs_file(coefs_file) + m_coefs_am(), + m_coefs_pm(), + m_coefs_file(coefs_file), + m_coefs_mutex() { PDEBUG("MemlessPoly::MemlessPoly(%s) @ %p\n", coefs_file.c_str(), this); RC_ADD_PARAMETER(ncoefs, "(Read-only) number of coefficients."); - RC_ADD_PARAMETER(coeffile, "Filename containing coefficients. When written to, the new file gets automatically loaded."); + RC_ADD_PARAMETER(coeffile, "Filename containing coefficients. " + "When set, the file gets loaded."); + + if (num_threads == 0) { + const unsigned int hw_concurrency = std::thread::hardware_concurrency(); + etiLog.level(info) << "Polynomial Predistorter will use " << + hw_concurrency << " threads (auto detected)"; + + for (size_t i = 0; i < hw_concurrency; i++) { + m_workers.emplace_back(); + } + + for (auto& worker : m_workers) { + worker.thread = std::thread( + &MemlessPoly::worker_thread, &worker); + } + } + else { + etiLog.level(info) << "Polynomial Predistorter will use " << + num_threads << " threads (set in config file)"; + + for (size_t i = 0; i < num_threads; i++) { + m_workers.emplace_back(); + } + + for (auto& worker : m_workers) { + worker.thread = std::thread( + &MemlessPoly::worker_thread, &worker); + } + } load_coefficients(m_coefs_file); @@ -69,76 +100,165 @@ MemlessPoly::MemlessPoly(const std::string& coefs_file) : void MemlessPoly::load_coefficients(const std::string &coefFile) { - std::vector<float> coefs; - if (coefFile == "default") { - std::copy(default_coefficients.begin(), default_coefficients.end(), - std::back_inserter(coefs)); + std::vector<float> coefs_am; + std::vector<float> coefs_pm; + std::ifstream coef_fstream(coefFile.c_str()); + if (!coef_fstream) { + throw std::runtime_error("MemlessPoly: Could not open file with coefs!"); } - else { - std::ifstream coef_fstream(coefFile.c_str()); - if(!coef_fstream) { - fprintf(stderr, "MemlessPoly: file %s could not be opened !\n", coefFile.c_str()); - throw std::runtime_error("MemlessPoly: Could not open file with coefs! "); - } - int n_coefs; - coef_fstream >> n_coefs; + int n_coefs; + coef_fstream >> n_coefs; - if (n_coefs <= 0) { - fprintf(stderr, "MemlessPoly: warning: coefs file has invalid format\n"); - throw std::runtime_error("MemlessPoly: coefs file has invalid format."); - } + if (n_coefs <= 0) { + throw std::runtime_error("MemlessPoly: coefs file has invalid format."); + } + else if (n_coefs != NUM_COEFS) { + throw std::runtime_error("MemlessPoly: invalid number of coefs: " + + std::to_string(n_coefs) + " expected " + std::to_string(NUM_COEFS)); + } - if (n_coefs != 8) { - throw std::runtime_error( "MemlessPoly: error: coefs file does not have 8 coefs\n"); - } + const int n_entries = 2 * n_coefs; - fprintf(stderr, "MemlessPoly: Reading %d coefs...\n", n_coefs); + etiLog.log(debug, "MemlessPoly: Reading %d coefs...", n_entries); - coefs.resize(n_coefs); + coefs_am.resize(n_coefs); + coefs_pm.resize(n_coefs); - int n; - for (n = 0; n < n_coefs; n++) { - coef_fstream >> coefs[n]; - PDEBUG("MemlessPoly: coef: %f\n", coefs[n] ); - if (coef_fstream.eof()) { - fprintf(stderr, "MemlessPoly: file %s should contains %d coefs, but EOF reached "\ - "after %d coefs !\n", coefFile.c_str(), n_coefs, n); - throw std::runtime_error("MemlessPoly: coefs file invalid ! "); - } + for (int n = 0; n < n_entries; n++) { + float a; + coef_fstream >> a; + + if (n < n_coefs) { + coefs_am[n] = a; + } + else { + coefs_pm[n - n_coefs] = a; + } + + if (coef_fstream.eof()) { + etiLog.log(error, "MemlessPoly: file %s should contains %d coefs, " + "but EOF reached after %d coefs !", + coefFile.c_str(), n_entries, n); + throw std::runtime_error("MemlessPoly: coefs file invalid !"); } } { std::lock_guard<std::mutex> lock(m_coefs_mutex); - m_coefs = coefs; + m_coefs_am = coefs_am; + m_coefs_pm = coefs_pm; } } +/* The restrict keyword is C99, g++ and clang++ however support __restrict + * instead, and this allows the compiler to auto-vectorize the loop. + */ +static void apply_coeff( + const float *__restrict coefs_am, const float *__restrict coefs_pm, + const complexf *__restrict in, size_t start, size_t stop, + complexf *__restrict out) +{ + for (size_t i = start; i < stop; i+=1) { + + float in_mag_sq = in[i].real() * in[i].real() + in[i].imag() * in[i].imag(); + + float amplitude_correction = + ( coefs_am[0] + in_mag_sq * + ( coefs_am[1] + in_mag_sq * + ( coefs_am[2] + in_mag_sq * + ( coefs_am[3] + in_mag_sq * + coefs_am[4])))); + + float phase_correction = -1 * + ( coefs_pm[0] + in_mag_sq * + ( coefs_pm[1] + in_mag_sq * + ( coefs_pm[2] + in_mag_sq * + ( coefs_pm[3] + in_mag_sq * + coefs_pm[4])))); + + float phase_correction_sq = phase_correction * phase_correction; + + // Approximation for Cosinus 1 - 1/2 x^2 + 1/24 x^4 - 1/720 x^6 + float re = (1.0f - phase_correction_sq * + ( -0.5f + phase_correction_sq * + ( 0.486666f + phase_correction_sq * + ( -0.00138888f)))); + + // Approximation for Sinus x + 1/6 x^3 + 1/120 x^5 + float im = phase_correction * + (1.0f + phase_correction_sq * + (0.166666f + phase_correction_sq * + (0.00833333f))); + + out[i] = in[i] * amplitude_correction * complex<float>(re, im); + } +} + +void MemlessPoly::worker_thread(MemlessPoly::worker_t *workerdata) +{ + while (true) { + worker_t::input_data_t in_data; + workerdata->in_queue.wait_and_pop(in_data); + + if (in_data.terminate) { + break; + } + + apply_coeff(in_data.coefs_am, in_data.coefs_pm, + in_data.in, in_data.start, in_data.stop, + in_data.out); + + workerdata->out_queue.push(1); + } +} int MemlessPoly::internal_process(Buffer* const dataIn, Buffer* dataOut) { - const float* in = reinterpret_cast<const float*>(dataIn->getData()); - float* out = reinterpret_cast<float*>(dataOut->getData()); - size_t sizeIn = dataIn->getLength() / sizeof(float); - - { - std::lock_guard<std::mutex> lock(m_coefs_mutex); - for (size_t i = 0; i < sizeIn; i += 1) { - float mag = std::abs(in[i]); - //out[i] = in[i]; - out[i] = in[i] * ( - m_coefs[0] + - m_coefs[1] * mag + - m_coefs[2] * mag*mag + - m_coefs[3] * mag*mag*mag + - m_coefs[4] * mag*mag*mag*mag + - m_coefs[5] * mag*mag*mag*mag*mag + - m_coefs[6] * mag*mag*mag*mag*mag*mag + - m_coefs[7] * mag*mag*mag*mag*mag*mag*mag - ); + dataOut->setLength(dataIn->getLength()); + + const complexf* in = reinterpret_cast<const complexf*>(dataIn->getData()); + complexf* out = reinterpret_cast<complexf*>(dataOut->getData()); + size_t sizeOut = dataOut->getLength() / sizeof(complexf); + + { + std::lock_guard<std::mutex> lock(m_coefs_mutex); + const size_t num_threads = m_workers.size(); + + if (num_threads > 0) { + const size_t step = sizeOut / num_threads; + + size_t start = 0; + for (auto& worker : m_workers) { + worker_t::input_data_t dat; + dat.terminate = false; + dat.coefs_am = m_coefs_am.data(); + dat.coefs_pm = m_coefs_pm.data(); + dat.in = in; + dat.start = start; + dat.stop = start + step; + dat.out = out; + + worker.in_queue.push(dat); + + start += step; + } + + // Do the last in this thread + apply_coeff(m_coefs_am.data(), m_coefs_pm.data(), + in, start, sizeOut, out); + + // Wait for completion of the tasks + for (auto& worker : m_workers) { + int ret; + worker.out_queue.wait_and_pop(ret); } } + else { + apply_coeff(m_coefs_am.data(), m_coefs_pm.data(), + in, 0, sizeOut, out); + } + } return dataOut->getLength(); } @@ -172,9 +292,9 @@ const string MemlessPoly::get_parameter(const string& parameter) const { stringstream ss; if (parameter == "ncoefs") { - ss << m_coefs.size(); + ss << m_coefs_am.size(); } - else if (parameter == "coefFile") { + else if (parameter == "coeffile") { ss << m_coefs_file; } else { |