From 0c31bc95ca1ca4ee420a6e2d82bad6b1e50177c6 Mon Sep 17 00:00:00 2001 From: "Matthias P. Braendli" Date: Fri, 4 Aug 2017 20:39:24 +0200 Subject: Parallelise MemlessPoly using std::async --- src/MemlessPoly.cpp | 73 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 17 deletions(-) (limited to 'src/MemlessPoly.cpp') diff --git a/src/MemlessPoly.cpp b/src/MemlessPoly.cpp index 90b859d..b0b5ddd 100644 --- a/src/MemlessPoly.cpp +++ b/src/MemlessPoly.cpp @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -117,6 +118,29 @@ void MemlessPoly::load_coefficients(const std::string &coefFile) } } +static void apply_coeff( + const vector &coefs, + const complexf* in, size_t start, size_t stop, + complexf* out) +{ + for (size_t i = start; i < stop; i++) { + + /* Implement + a0 + a1*x + a2*x^2 + a3*x^3 + a4*x^4 + a5*x^5; + with less multiplications: + a0 + x*(a1 + x*(a2 + x*(a3 + x*(a3 + x*(a4 + a5*x))))); + */ + + /* Make sure to adapt NUM_COEFS when you change this */ + out[i] = + coefs[0] + in[i] * + ( coefs[1] + in[i] * + ( coefs[2] + in[i] * + ( coefs[3] + in[i] * + ( coefs[4] + in[i] * + ( coefs[5] + in[i] ))))); + } +} int MemlessPoly::internal_process(Buffer* const dataIn, Buffer* dataOut) { @@ -127,23 +151,38 @@ int MemlessPoly::internal_process(Buffer* const dataIn, Buffer* dataOut) size_t sizeOut = dataOut->getLength() / sizeof(complexf); { - std::lock_guard lock(m_coefs_mutex); - for (size_t i = 0; i < sizeOut; i += 1) { - - /* Implement - a0 + a1*x + a2*x^2 + a3*x^3 + a4*x^4 + a5*x^5; - with less multiplications: - a0 + x*(a1 + x*(a2 + x*(a3 + x*(a3 + x*(a4 + a5*x))))); - */ - - /* Make sure to adapt NUM_COEFS when you change this */ - out[i] = - m_coefs[0] + in[i] * - ( m_coefs[1] + in[i] * - ( m_coefs[2] + in[i] * - ( m_coefs[3] + in[i] * - ( m_coefs[4] + in[i] * - ( m_coefs[5] + in[i] ))))); + std::lock_guard lock(m_coefs_mutex); + const unsigned int hw_concurrency = std::thread::hardware_concurrency(); + + if (hw_concurrency) { + const size_t step = sizeOut / hw_concurrency; + vector > flags; + + size_t start = 0; + for (size_t i = 0; i < hw_concurrency - 1; i++) { + flags.push_back(async(launch::async, apply_coeff, + m_coefs, in, start, start + step, out)); + + start += step; + } + + // Do the last in this thread + apply_coeff(m_coefs, in, start, sizeOut, out); + + // Wait for completion of the tasks + for (auto& f : flags) { + f.get(); + } + } + else { + static bool error_printed = false; + if (not error_printed) { + etiLog.level(warn) << + "Your platform doesn't seem to have hardware concurrency. " + "MemlessPoly will run single-threaded"; + } + // For some reason we don't have hw concurrency. + apply_coeff(m_coefs, in, 0, sizeOut, out); } } -- cgit v1.2.3