From 7452ff0fd85da423a5be1fdc3873106160c6ccae Mon Sep 17 00:00:00 2001 From: "Matthias P. Braendli" Date: Tue, 5 Nov 2024 16:45:35 +0100 Subject: Debug DEXTER FFT Acccelerator --- src/DabModulator.cpp | 4 + src/DifferentialModulator.cpp | 2 +- src/Flowgraph.cpp | 10 +- src/FrequencyInterleaver.cpp | 8 +- src/GuardIntervalInserter.cpp | 210 ++++++++++++++++++++++-------------------- src/OfdmGenerator.cpp | 16 ++-- src/PhaseReference.cpp | 4 +- src/SignalMultiplexer.cpp | 8 +- 8 files changed, 132 insertions(+), 130 deletions(-) (limited to 'src') diff --git a/src/DabModulator.cpp b/src/DabModulator.cpp index 4cbd0f5..39c1d4a 100644 --- a/src/DabModulator.cpp +++ b/src/DabModulator.cpp @@ -220,6 +220,7 @@ int DabModulator::process(Buffer* dataOut) m_settings.cfrErrorClip); break; case FFTEngine::DEXTER: +#if defined(HAVE_DEXTER) cifOfdm = make_shared( (1 + m_nbSymbols), m_nbCarriers, @@ -227,6 +228,9 @@ int DabModulator::process(Buffer* dataOut) m_settings.enableCfr, m_settings.cfrClip, m_settings.cfrErrorClip); +#else + throw std::runtime_error("Cannot use DEXTER fft engine without --enable-dexter"); +#endif break; } diff --git a/src/DifferentialModulator.cpp b/src/DifferentialModulator.cpp index cfebf65..21b4c3e 100644 --- a/src/DifferentialModulator.cpp +++ b/src/DifferentialModulator.cpp @@ -43,7 +43,7 @@ DifferentialModulator::~DifferentialModulator() template -void do_process(size_t carriers, std::vector dataIn, Buffer* dataOut) +void do_process(size_t carriers, const std::vector& dataIn, Buffer* dataOut) { size_t phaseSize = dataIn[0]->getLength() / sizeof(T); size_t dataSize = dataIn[1]->getLength() / sizeof(T); diff --git a/src/Flowgraph.cpp b/src/Flowgraph.cpp index 3d4cdcc..339e326 100644 --- a/src/Flowgraph.cpp +++ b/src/Flowgraph.cpp @@ -27,12 +27,10 @@ #include "Flowgraph.h" #include "PcDebug.h" #include "Log.h" -#include #include #include #include #include -#include #include #include @@ -254,15 +252,15 @@ Flowgraph::~Flowgraph() char node_time_sz[1024] = {}; for (const auto &node : nodes) { - snprintf(node_time_sz, 1023, " %30s: %10lu us (%2.2f %%)\n", + snprintf(node_time_sz, 1023, " %30s: %10lld us (%2.2f %%)\n", node->plugin()->name(), - node->processTime(), + (long long)node->processTime(), node->processTime() * 100.0 / myProcessTime); ss << node_time_sz; } - snprintf(node_time_sz, 1023, " %30s: %10lu us (100.00 %%)\n", "total", - myProcessTime); + snprintf(node_time_sz, 1023, " %30s: %10lld us (100.00 %%)\n", "total", + (long long)myProcessTime); ss << node_time_sz; etiLog.level(debug) << ss.str(); diff --git a/src/FrequencyInterleaver.cpp b/src/FrequencyInterleaver.cpp index 856e8d0..6f36dcb 100644 --- a/src/FrequencyInterleaver.cpp +++ b/src/FrequencyInterleaver.cpp @@ -61,7 +61,7 @@ FrequencyInterleaver::FrequencyInterleaver(size_t mode, bool fixedPoint) : beta = 255; break; default: - PDEBUG("Carriers: %zu\n", (d_carriers >> 1) << 1); + PDEBUG("Carriers: %zu\n", (m_carriers >> 1) << 1); throw std::runtime_error("FrequencyInterleaver: invalid dab mode"); } @@ -79,10 +79,10 @@ FrequencyInterleaver::FrequencyInterleaver(size_t mode, bool fixedPoint) : && perm <= (num - (num - m_carriers) / 2) && perm != (num / 2)) { PDEBUG("i: %4zu, R: %4zu, d: %4zu, n: %4zu, k: %5zi, index: %zu\n", - j, perm, perm, index - d_indexes, perm - num / 2, + j, perm, perm, index - m_indices, perm - num / 2, perm > num / 2 ? perm - (1 + (num / 2)) - : perm + (d_carriers - (num / 2))); + : perm + (m_carriers - (num / 2))); *(index++) = perm > num / 2 ? perm - (1 + (num / 2)) : perm + (m_carriers - (num / 2)); } @@ -114,7 +114,7 @@ void do_process(Buffer* const dataIn, Buffer* dataOut, } for (size_t i = 0; i < sizeIn;) { -// memset(out, 0, d_carriers * sizeof(T)); +// memset(out, 0, m_carriers * sizeof(T)); for (size_t j = 0; j < carriers; i += 4, j += 4) { out[indices[j]] = in[i]; out[indices[j + 1]] = in[i + 1]; diff --git a/src/GuardIntervalInserter.cpp b/src/GuardIntervalInserter.cpp index 7061e47..26d4fd1 100644 --- a/src/GuardIntervalInserter.cpp +++ b/src/GuardIntervalInserter.cpp @@ -140,118 +140,123 @@ int do_process(const GuardIntervalInserter::Params& p, Buffer* const dataIn, Buf // windowing too. std::lock_guard lock(p.windowMutex); - if (p.windowOverlap) { if constexpr (std::is_same_v) { - { - // Handle Null symbol separately because it is longer - const size_t prefixlength = p.nullSize - p.spacing; - - // end = spacing - memcpy(out, &in[p.spacing - prefixlength], - prefixlength * sizeof(T)); - - memcpy(&out[prefixlength], in, (p.spacing - p.windowOverlap) * sizeof(T)); - - // The remaining part of the symbol must have half of the window applied, - // sloping down from 1 to 0.5 - for (size_t i = 0; i < p.windowOverlap; i++) { - const size_t out_ix = prefixlength + p.spacing - p.windowOverlap + i; - const size_t in_ix = p.spacing - p.windowOverlap + i; - out[out_ix] = in[in_ix] * p.window[2*p.windowOverlap - (i+1)]; - } - - // Suffix is taken from the beginning of the symbol, and sees the other - // half of the window applied. - for (size_t i = 0; i < p.windowOverlap; i++) { - const size_t out_ix = prefixlength + p.spacing + i; - out[out_ix] = in[i] * p.window[p.windowOverlap - (i+1)]; - } + if (p.windowOverlap) { + if constexpr (std::is_same_v) { + { + // Handle Null symbol separately because it is longer + const size_t prefixlength = p.nullSize - p.spacing; + + // end = spacing + memcpy(out, &in[p.spacing - prefixlength], + prefixlength * sizeof(T)); + + memcpy(&out[prefixlength], in, (p.spacing - p.windowOverlap) * sizeof(T)); + + // The remaining part of the symbol must have half of the window applied, + // sloping down from 1 to 0.5 + for (size_t i = 0; i < p.windowOverlap; i++) { + const size_t out_ix = prefixlength + p.spacing - p.windowOverlap + i; + const size_t in_ix = p.spacing - p.windowOverlap + i; + out[out_ix] = in[in_ix] * p.window[2*p.windowOverlap - (i+1)]; + } - in += p.spacing; - out += p.nullSize; - // out is now pointing to the proper end of symbol. There are - // windowOverlap samples ahead that were already written. - } + // Suffix is taken from the beginning of the symbol, and sees the other + // half of the window applied. + for (size_t i = 0; i < p.windowOverlap; i++) { + const size_t out_ix = prefixlength + p.spacing + i; + out[out_ix] = in[i] * p.window[p.windowOverlap - (i+1)]; + } - // Data symbols - for (size_t sym_ix = 0; sym_ix < p.nbSymbols; sym_ix++) { - /* _ix variables are indices into in[], _ox variables are - * indices for out[] */ - const ssize_t start_rise_ox = -p.windowOverlap; - const size_t start_rise_ix = 2 * p.spacing - p.symSize - p.windowOverlap; - /* - const size_t start_real_symbol_ox = 0; - const size_t start_real_symbol_ix = 2 * p.spacing - p.symSize; - */ - const ssize_t end_rise_ox = p.windowOverlap; - const size_t end_rise_ix = 2 * p.spacing - p.symSize + p.windowOverlap; - const ssize_t end_cyclic_prefix_ox = p.symSize - p.spacing; - /* end_cyclic_prefix_ix = end of symbol - const size_t begin_fall_ox = p.symSize - p.windowOverlap; - const size_t begin_fall_ix = p.spacing - p.windowOverlap; - const size_t end_real_symbol_ox = p.symSize; - end_real_symbol_ix = end of symbol - const size_t end_fall_ox = p.symSize + p.windowOverlap; - const size_t end_fall_ix = p.spacing + p.windowOverlap; - */ - - ssize_t ox = start_rise_ox; - size_t ix = start_rise_ix; - - for (size_t i = 0; ix < end_rise_ix; i++) { - out[ox] += in[ix] * p.window.at(i); - ix++; - ox++; - } - assert(ox == end_rise_ox); - - const size_t remaining_prefix_length = end_cyclic_prefix_ox - end_rise_ox; - memcpy( &out[ox], &in[ix], - remaining_prefix_length * sizeof(T)); - ox += remaining_prefix_length; - assert(ox == end_cyclic_prefix_ox); - ix = 0; - - const bool last_symbol = (sym_ix + 1 >= p.nbSymbols); - if (last_symbol) { - // No windowing at all at end - memcpy(&out[ox], &in[ix], p.spacing * sizeof(T)); - ox += p.spacing; + in += p.spacing; + out += p.nullSize; + // out is now pointing to the proper end of symbol. There are + // windowOverlap samples ahead that were already written. } - else { - // Copy the middle part of the symbol, p.windowOverlap samples - // short of the end. - memcpy( &out[ox], - &in[ix], - (p.spacing - p.windowOverlap) * sizeof(T)); - ox += p.spacing - p.windowOverlap; - ix += p.spacing - p.windowOverlap; - assert(ox == (ssize_t)(p.symSize - p.windowOverlap)); - - // Apply window from 1 to 0.5 for the end of the symbol - for (size_t i = 0; ox < (ssize_t)p.symSize; i++) { - out[ox] = in[ix] * p.window[2*p.windowOverlap - (i+1)]; - ox++; + + // Data symbols + for (size_t sym_ix = 0; sym_ix < p.nbSymbols; sym_ix++) { + /* _ix variables are indices into in[], _ox variables are + * indices for out[] */ + const ssize_t start_rise_ox = -p.windowOverlap; + const size_t start_rise_ix = 2 * p.spacing - p.symSize - p.windowOverlap; + /* + const size_t start_real_symbol_ox = 0; + const size_t start_real_symbol_ix = 2 * p.spacing - p.symSize; + */ + const ssize_t end_rise_ox = p.windowOverlap; + const size_t end_rise_ix = 2 * p.spacing - p.symSize + p.windowOverlap; + const ssize_t end_cyclic_prefix_ox = p.symSize - p.spacing; + /* end_cyclic_prefix_ix = end of symbol + const size_t begin_fall_ox = p.symSize - p.windowOverlap; + const size_t begin_fall_ix = p.spacing - p.windowOverlap; + const size_t end_real_symbol_ox = p.symSize; + end_real_symbol_ix = end of symbol + const size_t end_fall_ox = p.symSize + p.windowOverlap; + const size_t end_fall_ix = p.spacing + p.windowOverlap; + */ + + ssize_t ox = start_rise_ox; + size_t ix = start_rise_ix; + + for (size_t i = 0; ix < end_rise_ix; i++) { + out[ox] += in[ix] * p.window.at(i); ix++; + ox++; } - assert(ix == p.spacing); + assert(ox == end_rise_ox); + const size_t remaining_prefix_length = end_cyclic_prefix_ox - end_rise_ox; + memcpy( &out[ox], &in[ix], + remaining_prefix_length * sizeof(T)); + ox += remaining_prefix_length; + assert(ox == end_cyclic_prefix_ox); ix = 0; - // Cyclic suffix, with window from 0.5 to 0 - for (size_t i = 0; ox < (ssize_t)(p.symSize + p.windowOverlap); i++) { - out[ox] = in[ix] * p.window[p.windowOverlap - (i+1)]; - ox++; - ix++; + + const bool last_symbol = (sym_ix + 1 >= p.nbSymbols); + if (last_symbol) { + // No windowing at all at end + memcpy(&out[ox], &in[ix], p.spacing * sizeof(T)); + ox += p.spacing; + } + else { + // Copy the middle part of the symbol, p.windowOverlap samples + // short of the end. + memcpy( &out[ox], + &in[ix], + (p.spacing - p.windowOverlap) * sizeof(T)); + ox += p.spacing - p.windowOverlap; + ix += p.spacing - p.windowOverlap; + assert(ox == (ssize_t)(p.symSize - p.windowOverlap)); + + // Apply window from 1 to 0.5 for the end of the symbol + for (size_t i = 0; ox < (ssize_t)p.symSize; i++) { + out[ox] = in[ix] * p.window[2*p.windowOverlap - (i+1)]; + ox++; + ix++; + } + assert(ix == p.spacing); + + ix = 0; + // Cyclic suffix, with window from 0.5 to 0 + for (size_t i = 0; ox < (ssize_t)(p.symSize + p.windowOverlap); i++) { + out[ox] = in[ix] * p.window[p.windowOverlap - (i+1)]; + ox++; + ix++; + } + + assert(ix == p.windowOverlap); } - assert(ix == p.windowOverlap); + out += p.symSize; + in += p.spacing; + // out is now pointing to the proper end of symbol. There are + // windowOverlap samples ahead that were already written. } - - out += p.symSize; - in += p.spacing; - // out is now pointing to the proper end of symbol. There are - // windowOverlap samples ahead that were already written. } - } } + else { + throw std::runtime_error("fixed-point doesn't support window overlap"); + } + } else { // Handle Null symbol separately because it is longer // end - (nullSize - spacing) = 2 * spacing - nullSize @@ -272,7 +277,8 @@ int do_process(const GuardIntervalInserter::Params& p, Buffer* const dataIn, Buf } } - return sizeIn; + const auto sizeOut = dataOut->getLength(); + return sizeOut; } int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut) diff --git a/src/OfdmGenerator.cpp b/src/OfdmGenerator.cpp index cfdf31d..32b5f76 100644 --- a/src/OfdmGenerator.cpp +++ b/src/OfdmGenerator.cpp @@ -219,7 +219,6 @@ int OfdmGeneratorCF32::process(Buffer* const dataIn, Buffer* dataOut) memcpy(&myFftIn[myNegDst], &in[myNegSrc], myNegSize * sizeof(FFTW_TYPE)); - if (myCfr) { reference.resize(mySpacing); memcpy(reinterpret_cast(reference.data()), @@ -228,7 +227,6 @@ int OfdmGeneratorCF32::process(Buffer* const dataIn, Buffer* dataOut) fftwf_execute(myFftPlan); // IFFT from myFftIn to myFftOut - if (myCfr) { complexf *symbol = reinterpret_cast(myFftOut); myPaprBeforeCFR.process_block(symbol, mySpacing); @@ -634,7 +632,7 @@ OfdmGeneratorDEXTER::OfdmGeneratorDEXTER(size_t nbSymbols, PDEBUG(" myZeroSize: %u\n", myZeroSize); const size_t nbytes_in = mySpacing * sizeof(complexfix); - const size_t nbytes_out = mySpacing * 2 * sizeof(int32_t); + const size_t nbytes_out = mySpacing * sizeof(complexfix_wide); #define IIO_ENSURE(expr, err) { \ if (!(expr)) { \ @@ -721,14 +719,14 @@ int OfdmGeneratorDEXTER::process(Buffer* const dataIn, Buffer* dataOut) throw std::runtime_error("OfdmGenerator::process incorrect iio buffer size!"); } - complexfix *fft_in = reinterpret_cast(iio_buffer_start(m_buf_in)); + for (size_t i = 0; i < myNbSymbols; i++) { + complexfix *fft_in = reinterpret_cast(iio_buffer_start(m_buf_in)); - fft_in[0] = static_cast(0); - for (size_t i = 0; i < myZeroSize; i++) { - fft_in[myZeroDst + i] = static_cast(0); - } + fft_in[0] = static_cast(0); + for (size_t i = 0; i < myZeroSize; i++) { + fft_in[myZeroDst + i] = static_cast(0); + } - for (size_t i = 0; i < myNbSymbols; i++) { /* For TM I this is: * ZeroDst=769 ZeroSize=511 * PosSrc=0 PosDst=1 PosSize=768 diff --git a/src/PhaseReference.cpp b/src/PhaseReference.cpp index e2fb9a9..71dec87 100644 --- a/src/PhaseReference.cpp +++ b/src/PhaseReference.cpp @@ -176,10 +176,10 @@ int PhaseReference::process(Buffer* dataOut) PDEBUG("PhaseReference::process(dataOut: %p)\n", dataOut); if (d_fixedPoint) { - dataOut->setData(&d_phaseRefFixed.dataIn[0], d_carriers * sizeof(complexfix)); + dataOut->setData(d_phaseRefFixed.dataIn.data(), d_carriers * sizeof(complexfix)); } else { - dataOut->setData(&d_phaseRefCF32.dataIn[0], d_carriers * sizeof(complexf)); + dataOut->setData(d_phaseRefCF32.dataIn.data(), d_carriers * sizeof(complexf)); } return 1; diff --git a/src/SignalMultiplexer.cpp b/src/SignalMultiplexer.cpp index 8ecbe78..d4955d0 100644 --- a/src/SignalMultiplexer.cpp +++ b/src/SignalMultiplexer.cpp @@ -22,24 +22,20 @@ #include "SignalMultiplexer.h" #include "PcDebug.h" -#include -#include +#include #include -#include SignalMultiplexer::SignalMultiplexer() : ModMux() { - PDEBUG("SignalMultiplexer::SignalMultiplexer(%zu) @ %p\n", framesize, this); - + PDEBUG("SignalMultiplexer::SignalMultiplexer() @ %p\n", this); } SignalMultiplexer::~SignalMultiplexer() { PDEBUG("SignalMultiplexer::~SignalMultiplexer() @ %p\n", this); - } -- cgit v1.2.3