diff options
Diffstat (limited to 'src')
45 files changed, 1368 insertions, 731 deletions
diff --git a/src/Buffer.h b/src/Buffer.h index af52e93..2c2a65e 100644 --- a/src/Buffer.h +++ b/src/Buffer.h @@ -33,9 +33,17 @@ #include <vector> #include <memory> +#include <complex> +#include "fpm/fixed.hpp" + +typedef std::complex<float> complexf; + +using fixed_16 = fpm::fixed<std::int16_t, std::int32_t, 14>; +typedef std::complex<fixed_16> complexfix; +typedef std::complex<fpm::fixed_16_16> complexfix_wide; /* Buffer is a container for a byte array, which is memory-aligned - * to 32 bytes for SSE performance. + * to 32 bytes for SIMD performance. * * The allocation/freeing of the data is handled internally. */ diff --git a/src/CicEqualizer.h b/src/CicEqualizer.h index 792da02..4510d0c 100644 --- a/src/CicEqualizer.h +++ b/src/CicEqualizer.h @@ -25,18 +25,10 @@ # include <config.h> #endif - #include "ModPlugin.h" #include <vector> #include <sys/types.h> -#include <complex> -#ifdef __SSE__ -# include <xmmintrin.h> -#endif - - -typedef std::complex<float> complexf; class CicEqualizer : public ModCodec { diff --git a/src/ConfigParser.cpp b/src/ConfigParser.cpp index fb2c1a1..c92a520 100644 --- a/src/ConfigParser.cpp +++ b/src/ConfigParser.cpp @@ -63,6 +63,27 @@ static GainMode parse_gainmode(const std::string &gainMode_setting) throw std::runtime_error("Configuration error"); } +static FFTEngine parse_fft_engine(const std::string &fft_engine_setting) +{ + string fft_engine_minuscule(fft_engine_setting); + std::transform(fft_engine_minuscule.begin(), fft_engine_minuscule.end(), + fft_engine_minuscule.begin(), ::tolower); + + if (fft_engine_minuscule == "fftw") { + return FFTEngine::FFTW; + } + else if (fft_engine_minuscule == "kiss") { + return FFTEngine::KISS; + } + else if (fft_engine_minuscule == "dexter") { + return FFTEngine::DEXTER; + } + + cerr << "Modulator fft_engine setting '" << fft_engine_setting << + "' not recognised." << endl; + throw std::runtime_error("Configuration error"); +} + static void parse_configfile( const std::string& configuration_file, mod_settings_t& mod_settings) @@ -156,6 +177,9 @@ static void parse_configfile( mod_settings.showProcessTime); // modulator parameters: + const string fft_engine_setting = pt.Get("modulator.fft_engine", "fftw"); + mod_settings.fftEngine = parse_fft_engine(fft_engine_setting); + const string gainMode_setting = pt.Get("modulator.gainmode", "var"); mod_settings.gainMode = parse_gainmode(gainMode_setting); mod_settings.gainmodeVariance = pt.GetReal("modulator.normalise_variance", diff --git a/src/ConfigParser.h b/src/ConfigParser.h index ae76dee..3bacfdd 100644 --- a/src/ConfigParser.h +++ b/src/ConfigParser.h @@ -36,6 +36,12 @@ #include "TII.h" #include "output/SDRDevice.h" +enum class FFTEngine { + FFTW, // floating point in software + KISS, // fixed-point in software + DEXTER // fixed-point in FPGA +}; + struct mod_settings_t { std::string startupCheck; @@ -51,6 +57,8 @@ struct mod_settings_t { bool useLimeOutput = false; bool useBladeRFOutput = false; + FFTEngine fftEngine = FFTEngine::FFTW; + size_t outputRate = 2048000; size_t clockRate = 0; unsigned dabMode = 1; diff --git a/src/DabMod.cpp b/src/DabMod.cpp index 3b072c1..7866818 100644 --- a/src/DabMod.cpp +++ b/src/DabMod.cpp @@ -31,10 +31,8 @@ #endif #include <memory> -#include <complex> #include <string> #include <iostream> -#include <iomanip> #include <cstdlib> #include <stdexcept> #include <cstdio> @@ -51,7 +49,6 @@ #include "Utils.h" #include "Log.h" #include "DabModulator.h" -#include "InputMemory.h" #include "OutputFile.h" #include "FormatConverter.h" #include "FrameMultiplexer.h" @@ -75,16 +72,16 @@ * samples can have peaks up to about 48000. The value of 50000 * should guarantee that with a digital gain of 1.0, UHD never clips * our samples. + * + * This only applies when fixed_point == false. */ static const float normalise_factor = 50000.0f; -//Empirical normalisation factors used to normalise the samples to amplitude 1. +// Empirical normalisation factors used to normalise the samples to amplitude 1. static const float normalise_factor_file_fix = 81000.0f; static const float normalise_factor_file_var = 46000.0f; static const float normalise_factor_file_max = 46000.0f; -typedef std::complex<float> complexf; - using namespace std; volatile sig_atomic_t running = 1; @@ -255,7 +252,11 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s) shared_ptr<ModOutput> output; if (s.useFileOutput) { - if (s.fileOutputFormat == "complexf") { + if (s.fftEngine != FFTEngine::FFTW) { + // Intentionally ignore fileOutputFormat, it is always sc16 + output = make_shared<OutputFile>(s.outputName, s.fileOutputShowMetadata); + } + else if (s.fileOutputFormat == "complexf") { output = make_shared<OutputFile>(s.outputName, s.fileOutputShowMetadata); } else if (s.fileOutputFormat == "complexf_normalised") { @@ -291,6 +292,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s) else if (s.useUHDOutput) { s.normalise = 1.0f / normalise_factor; s.sdr_device_config.sampleRate = s.outputRate; + s.sdr_device_config.fixedPoint = (s.fftEngine != FFTEngine::FFTW); auto uhddevice = make_shared<Output::UHD>(s.sdr_device_config); output = make_shared<Output::SDR>(s.sdr_device_config, uhddevice); rcs.enrol((Output::SDR*)output.get()); @@ -301,6 +303,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s) /* We normalise the same way as for the UHD output */ s.normalise = 1.0f / normalise_factor; s.sdr_device_config.sampleRate = s.outputRate; + if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("soapy fixed_point unsupported"); auto soapydevice = make_shared<Output::Soapy>(s.sdr_device_config); output = make_shared<Output::SDR>(s.sdr_device_config, soapydevice); rcs.enrol((Output::SDR*)output.get()); @@ -320,6 +323,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s) else if (s.useLimeOutput) { /* We normalise the same way as for the UHD output */ s.normalise = 1.0f / normalise_factor; + if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("limesdr fixed_point unsupported"); s.sdr_device_config.sampleRate = s.outputRate; auto limedevice = make_shared<Output::Lime>(s.sdr_device_config); output = make_shared<Output::SDR>(s.sdr_device_config, limedevice); @@ -330,6 +334,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s) else if (s.useBladeRFOutput) { /* We normalise specifically for the BladeRF output : range [-2048; 2047] */ s.normalise = 2047.0f / normalise_factor; + if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("bladerf fixed_point unsupported"); s.sdr_device_config.sampleRate = s.outputRate; auto bladerfdevice = make_shared<Output::BladeRF>(s.sdr_device_config); output = make_shared<Output::SDR>(s.sdr_device_config, bladerfdevice); @@ -420,7 +425,8 @@ int launch_modulator(int argc, char* argv[]) ModulatorData m; rcs.enrol(&m); - { + // Neither KISS FFT used for fixedpoint nor the FFT Accelerator used for DEXTER need planning. + if (mod_settings.fftEngine == FFTEngine::FFTW) { // This is mostly useful on ARM systems where FFTW planning takes some time. If we do it here // it will be done before the modulator starts up etiLog.level(debug) << "Running FFTW planning..."; @@ -442,7 +448,14 @@ int launch_modulator(int argc, char* argv[]) } std::string output_format; - if (mod_settings.useFileOutput and + if (mod_settings.fftEngine == FFTEngine::KISS) { + output_format = ""; //fixed point is native sc16, no converter needed + } + else if (mod_settings.fftEngine == FFTEngine::DEXTER) { + output_format = "s16"; // FPGA FFT Engine outputs s32 + } + // else FFTW, i.e. floating point + else if (mod_settings.useFileOutput and (mod_settings.fileOutputFormat == "s8" or mod_settings.fileOutputFormat == "u8" or mod_settings.fileOutputFormat == "s16")) { diff --git a/src/DabModulator.cpp b/src/DabModulator.cpp index 4a29132..5f7aaf6 100644 --- a/src/DabModulator.cpp +++ b/src/DabModulator.cpp @@ -3,7 +3,7 @@ Her Majesty the Queen in Right of Canada (Communications Research Center Canada) - Copyright (C) 2023 + Copyright (C) 2024 Matthias P. Braendli, matthias.braendli@mpb.li http://opendigitalradio.org @@ -54,7 +54,6 @@ #include "SignalMultiplexer.h" #include "TII.h" #include "TimeInterleaver.h" -#include "TimestampDecoder.h" using namespace std; @@ -142,14 +141,15 @@ int DabModulator::process(Buffer* dataOut) auto cifMux = make_shared<FrameMultiplexer>(m_etiSource); auto cifPart = make_shared<BlockPartitioner>(mode); - auto cifMap = make_shared<QpskSymbolMapper>(m_nbCarriers); - auto cifRef = make_shared<PhaseReference>(mode); - auto cifFreq = make_shared<FrequencyInterleaver>(mode); - auto cifDiff = make_shared<DifferentialModulator>(m_nbCarriers); + const bool fixedPoint = m_settings.fftEngine != FFTEngine::FFTW; + auto cifMap = make_shared<QpskSymbolMapper>(m_nbCarriers, fixedPoint); + auto cifRef = make_shared<PhaseReference>(mode, fixedPoint); + auto cifFreq = make_shared<FrequencyInterleaver>(mode, fixedPoint); + auto cifDiff = make_shared<DifferentialModulator>(m_nbCarriers, fixedPoint); - auto cifNull = make_shared<NullSymbol>(m_nbCarriers); - auto cifSig = make_shared<SignalMultiplexer>( - (1 + m_nbSymbols) * m_nbCarriers * sizeof(complexf)); + auto cifNull = make_shared<NullSymbol>(m_nbCarriers, + fixedPoint ? sizeof(complexfix) : sizeof(complexf)); + auto cifSig = make_shared<SignalMultiplexer>(); // TODO this needs a review bool useCicEq = false; @@ -180,46 +180,79 @@ int DabModulator::process(Buffer* dataOut) try { tii = make_shared<TII>( m_settings.dabMode, - m_settings.tiiConfig); + m_settings.tiiConfig, + fixedPoint); rcs.enrol(tii.get()); - tiiRef = make_shared<PhaseReference>(mode); + tiiRef = make_shared<PhaseReference>(mode, fixedPoint); } catch (const TIIError& e) { etiLog.level(error) << "Could not initialise TII: " << e.what(); } - auto cifOfdm = make_shared<OfdmGenerator>( - (1 + m_nbSymbols), - m_nbCarriers, - m_spacing, - m_settings.enableCfr, - m_settings.cfrClip, - m_settings.cfrErrorClip); + shared_ptr<ModPlugin> cifOfdm; + + switch (m_settings.fftEngine) { + case FFTEngine::FFTW: + { + auto ofdm = make_shared<OfdmGeneratorCF32>( + (1 + m_nbSymbols), + m_nbCarriers, + m_spacing, + m_settings.enableCfr, + m_settings.cfrClip, + m_settings.cfrErrorClip); + rcs.enrol(ofdm.get()); + cifOfdm = ofdm; + } + break; + case FFTEngine::KISS: + cifOfdm = make_shared<OfdmGeneratorFixed>( + (1 + m_nbSymbols), + m_nbCarriers, + m_spacing); + break; + case FFTEngine::DEXTER: +#if defined(HAVE_DEXTER) + cifOfdm = make_shared<OfdmGeneratorDEXTER>( + (1 + m_nbSymbols), + m_nbCarriers, + m_spacing); +#else + throw std::runtime_error("Cannot use DEXTER fft engine without --enable-dexter"); +#endif + break; + } - rcs.enrol(cifOfdm.get()); + shared_ptr<GainControl> cifGain; - auto cifGain = make_shared<GainControl>( - m_spacing, - m_settings.gainMode, - m_settings.digitalgain, - m_settings.normalise, - m_settings.gainmodeVariance); + if (not fixedPoint) { + cifGain = make_shared<GainControl>( + m_spacing, + m_settings.gainMode, + m_settings.digitalgain, + m_settings.normalise, + m_settings.gainmodeVariance); - rcs.enrol(cifGain.get()); + rcs.enrol(cifGain.get()); + } auto cifGuard = make_shared<GuardIntervalInserter>( m_nbSymbols, m_spacing, m_nullSize, m_symSize, - m_settings.ofdmWindowOverlap); + m_settings.ofdmWindowOverlap, m_settings.fftEngine); rcs.enrol(cifGuard.get()); shared_ptr<FIRFilter> cifFilter; if (not m_settings.filterTapsFilename.empty()) { + if (fixedPoint) throw std::runtime_error("fixed point doesn't support fir filter"); + cifFilter = make_shared<FIRFilter>(m_settings.filterTapsFilename); rcs.enrol(cifFilter.get()); } shared_ptr<MemlessPoly> cifPoly; if (not m_settings.polyCoefFilename.empty()) { + if (fixedPoint) throw std::runtime_error("fixed point doesn't support predistortion"); + cifPoly = make_shared<MemlessPoly>(m_settings.polyCoefFilename, m_settings.polyNumThreads); rcs.enrol(cifPoly.get()); @@ -227,15 +260,21 @@ int DabModulator::process(Buffer* dataOut) shared_ptr<Resampler> cifRes; if (m_settings.outputRate != 2048000) { + if (fixedPoint) throw std::runtime_error("fixed point doesn't support resampler"); + cifRes = make_shared<Resampler>( 2048000, m_settings.outputRate, m_spacing); } - if (not m_format.empty()) { - m_formatConverter = make_shared<FormatConverter>(m_format); + if (m_settings.fftEngine == FFTEngine::FFTW and not m_format.empty()) { + m_formatConverter = make_shared<FormatConverter>(false, m_format); + } + else if (m_settings.fftEngine == FFTEngine::DEXTER) { + m_formatConverter = make_shared<FormatConverter>(true, m_format); } + // KISS is already in s16 m_output = make_shared<OutputMemory>(dataOut); diff --git a/src/DabModulator.h b/src/DabModulator.h index 093a782..82782cd 100644 --- a/src/DabModulator.h +++ b/src/DabModulator.h @@ -40,12 +40,8 @@ #include "EtiReader.h" #include "Flowgraph.h" #include "FormatConverter.h" -#include "GainControl.h" #include "OutputMemory.h" #include "RemoteControl.h" -#include "Log.h" -#include "TII.h" - class DabModulator : public ModInput, public ModMetadata, public RemoteControllable { diff --git a/src/DifferentialModulator.cpp b/src/DifferentialModulator.cpp index 97a7998..21b4c3e 100644 --- a/src/DifferentialModulator.cpp +++ b/src/DifferentialModulator.cpp @@ -22,17 +22,14 @@ #include "DifferentialModulator.h" #include "PcDebug.h" -#include <stdio.h> +#include <cstdio> #include <stdexcept> -#include <complex> -#include <string.h> +#include <cstring> -typedef std::complex<float> complexf; - - -DifferentialModulator::DifferentialModulator(size_t carriers) : +DifferentialModulator::DifferentialModulator(size_t carriers, bool fixedPoint) : ModMux(), - d_carriers(carriers) + m_carriers(carriers), + m_fixedPoint(fixedPoint) { PDEBUG("DifferentialModulator::DifferentialModulator(%zu)\n", carriers); @@ -42,10 +39,42 @@ DifferentialModulator::DifferentialModulator(size_t carriers) : DifferentialModulator::~DifferentialModulator() { PDEBUG("DifferentialModulator::~DifferentialModulator()\n"); - } +template<typename T> +void do_process(size_t carriers, const std::vector<Buffer*>& dataIn, Buffer* dataOut) +{ + size_t phaseSize = dataIn[0]->getLength() / sizeof(T); + size_t dataSize = dataIn[1]->getLength() / sizeof(T); + dataOut->setLength((phaseSize + dataSize) * sizeof(T)); + + const T* phase = reinterpret_cast<const T*>(dataIn[0]->getData()); + const T* in = reinterpret_cast<const T*>(dataIn[1]->getData()); + T* out = reinterpret_cast<T*>(dataOut->getData()); + + if (phaseSize != carriers) { + throw std::runtime_error( + "DifferentialModulator::process input phase size not valid!"); + } + if (dataSize % carriers != 0) { + throw std::runtime_error( + "DifferentialModulator::process input data size not valid!"); + } + + memcpy(dataOut->getData(), phase, phaseSize * sizeof(T)); + for (size_t i = 0; i < dataSize; i += carriers) { + for (size_t j = 0; j < carriers; j += 4) { + out[carriers + j] = out[j] * in[j]; + out[carriers + j + 1] = out[j + 1] * in[j + 1]; + out[carriers + j + 2] = out[j + 2] * in[j + 2]; + out[carriers + j + 3] = out[j + 3] * in[j + 3]; + } + in += carriers; + out += carriers; + } +} + // dataIn[0] -> phase reference // dataIn[1] -> data symbols int DifferentialModulator::process(std::vector<Buffer*> dataIn, Buffer* dataOut) @@ -67,33 +96,11 @@ int DifferentialModulator::process(std::vector<Buffer*> dataIn, Buffer* dataOut) "DifferentialModulator::process nb of input streams not 2!"); } - size_t phaseSize = dataIn[0]->getLength() / sizeof(complexf); - size_t dataSize = dataIn[1]->getLength() / sizeof(complexf); - dataOut->setLength((phaseSize + dataSize) * sizeof(complexf)); - - const complexf* phase = reinterpret_cast<const complexf*>(dataIn[0]->getData()); - const complexf* in = reinterpret_cast<const complexf*>(dataIn[1]->getData()); - complexf* out = reinterpret_cast<complexf*>(dataOut->getData()); - - if (phaseSize != d_carriers) { - throw std::runtime_error( - "DifferentialModulator::process input phase size not valid!"); - } - if (dataSize % d_carriers != 0) { - throw std::runtime_error( - "DifferentialModulator::process input data size not valid!"); + if (m_fixedPoint) { + do_process<complexfix>(m_carriers, dataIn, dataOut); } - - memcpy(dataOut->getData(), phase, phaseSize * sizeof(complexf)); - for (size_t i = 0; i < dataSize; i += d_carriers) { - for (size_t j = 0; j < d_carriers; j += 4) { - out[d_carriers + j] = out[j] * in[j]; - out[d_carriers + j + 1] = out[j + 1] * in[j + 1]; - out[d_carriers + j + 2] = out[j + 2] * in[j + 2]; - out[d_carriers + j + 3] = out[j + 3] * in[j + 3]; - } - in += d_carriers; - out += d_carriers; + else { + do_process<complexf>(m_carriers, dataIn, dataOut); } return dataOut->getLength(); diff --git a/src/DifferentialModulator.h b/src/DifferentialModulator.h index b26ea8b..9cc5081 100644 --- a/src/DifferentialModulator.h +++ b/src/DifferentialModulator.h @@ -35,7 +35,7 @@ class DifferentialModulator : public ModMux { public: - DifferentialModulator(size_t carriers); + DifferentialModulator(size_t carriers, bool fixedPoint); virtual ~DifferentialModulator(); DifferentialModulator(const DifferentialModulator&); DifferentialModulator& operator=(const DifferentialModulator&); @@ -45,6 +45,7 @@ public: const char* name() { return "DifferentialModulator"; } protected: - size_t d_carriers; + size_t m_carriers; + size_t m_fixedPoint; }; diff --git a/src/FIRFilter.h b/src/FIRFilter.h index a4effa1..2d8fba9 100644 --- a/src/FIRFilter.h +++ b/src/FIRFilter.h @@ -33,21 +33,14 @@ #include "RemoteControl.h" #include "ModPlugin.h" -#include "PcDebug.h" #include <sys/types.h> -#include <complex> -#include <thread> #include <vector> -#include <time.h> #include <cstdio> #include <string> -#include <memory> #define FIRFILTER_PIPELINE_DELAY 1 -typedef std::complex<float> complexf; - class FIRFilter : public PipelinedModCodec, public RemoteControllable { public: diff --git a/src/Flowgraph.cpp b/src/Flowgraph.cpp index 3d4cdcc..339e326 100644 --- a/src/Flowgraph.cpp +++ b/src/Flowgraph.cpp @@ -27,12 +27,10 @@ #include "Flowgraph.h" #include "PcDebug.h" #include "Log.h" -#include <string> #include <memory> #include <algorithm> #include <sstream> #include <sys/types.h> -#include <stdexcept> #include <assert.h> #include <sys/time.h> @@ -254,15 +252,15 @@ Flowgraph::~Flowgraph() char node_time_sz[1024] = {}; for (const auto &node : nodes) { - snprintf(node_time_sz, 1023, " %30s: %10lu us (%2.2f %%)\n", + snprintf(node_time_sz, 1023, " %30s: %10lld us (%2.2f %%)\n", node->plugin()->name(), - node->processTime(), + (long long)node->processTime(), node->processTime() * 100.0 / myProcessTime); ss << node_time_sz; } - snprintf(node_time_sz, 1023, " %30s: %10lu us (100.00 %%)\n", "total", - myProcessTime); + snprintf(node_time_sz, 1023, " %30s: %10lld us (100.00 %%)\n", "total", + (long long)myProcessTime); ss << node_time_sz; etiLog.level(debug) << ss.str(); diff --git a/src/FormatConverter.cpp b/src/FormatConverter.cpp index e8e76ed..517f26e 100644 --- a/src/FormatConverter.cpp +++ b/src/FormatConverter.cpp @@ -28,17 +28,37 @@ #include "FormatConverter.h" #include "PcDebug.h" +#include "Log.h" -#include <sys/types.h> -#include <string.h> #include <stdexcept> +#include <cstring> #include <assert.h> +#include <sys/types.h> +#if defined(__ARM_NEON) +#include <arm_neon.h> +#endif -FormatConverter::FormatConverter(const std::string& format) : +FormatConverter::FormatConverter(bool input_is_complexfix_wide, const std::string& format_out) : ModCodec(), - m_format(format) + m_input_complexfix_wide(input_is_complexfix_wide), + m_format_out(format_out) { } +FormatConverter::~FormatConverter() +{ + if ( +#if defined(__ARM_NEON) + not m_input_complexfix_wide +#else + true +#endif + ) { + etiLog.level(debug) << "FormatConverter: " << + m_num_clipped_samples.load() << " clipped"; + } +} + + /* Expect the input samples to be in the correct range for the required format */ int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut) { @@ -47,71 +67,113 @@ int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut) size_t num_clipped_samples = 0; - size_t sizeIn = dataIn->getLength() / sizeof(float); - float* in = reinterpret_cast<float*>(dataIn->getData()); + if (m_input_complexfix_wide) { + size_t sizeIn = dataIn->getLength() / sizeof(int32_t); + if (m_format_out == "s16") { + dataOut->setLength(sizeIn * sizeof(int16_t)); + const int32_t *in = reinterpret_cast<int32_t*>(dataIn->getData()); + int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData()); - if (m_format == "s16") { - dataOut->setLength(sizeIn * sizeof(int16_t)); - int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData()); + constexpr int shift = 6; - for (size_t i = 0; i < sizeIn; i++) { - if (in[i] < INT16_MIN) { - out[i] = INT16_MIN; - num_clipped_samples++; +#if defined(__ARM_NEON) + if (sizeIn % 4 != 0) { + throw std::logic_error("Unexpected length not multiple of 4"); } - else if (in[i] > INT16_MAX) { - out[i] = INT16_MAX; - num_clipped_samples++; + + for (size_t i = 0; i < sizeIn; i += 4) { + int32x4_t input_vec = vld1q_s32(&in[i]); + // Apply shift right, saturate on conversion to int16_t + int16x4_t output_vec = vqshrn_n_s32(input_vec, shift); + vst1_s16(&out[i], output_vec); } - else { - out[i] = in[i]; +#else + for (size_t i = 0; i < sizeIn; i++) { + const int32_t val = in[i] >> shift; + if (val < INT16_MIN) { + out[i] = INT16_MIN; + num_clipped_samples++; + } + else if (val > INT16_MAX) { + out[i] = INT16_MAX; + num_clipped_samples++; + } + else { + out[i] = val; + } } +#endif } - } - else if (m_format == "u8") { - dataOut->setLength(sizeIn * sizeof(int8_t)); - uint8_t* out = reinterpret_cast<uint8_t*>(dataOut->getData()); - - for (size_t i = 0; i < sizeIn; i++) { - const auto samp = in[i] + 128.0f; - if (samp < 0) { - out[i] = 0; - num_clipped_samples++; - } - else if (samp > UINT8_MAX) { - out[i] = UINT8_MAX; - num_clipped_samples++; - } - else { - out[i] = samp; - } - + else { + throw std::runtime_error("FormatConverter: Invalid fix format " + m_format_out); } } - else if (m_format == "s8") { - dataOut->setLength(sizeIn * sizeof(int8_t)); - int8_t* out = reinterpret_cast<int8_t*>(dataOut->getData()); - - for (size_t i = 0; i < sizeIn; i++) { - if (in[i] < INT8_MIN) { - out[i] = INT8_MIN; - num_clipped_samples++; + else { + size_t sizeIn = dataIn->getLength() / sizeof(float); + const float* in = reinterpret_cast<float*>(dataIn->getData()); + + if (m_format_out == "s16") { + dataOut->setLength(sizeIn * sizeof(int16_t)); + int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData()); + + for (size_t i = 0; i < sizeIn; i++) { + if (in[i] < INT16_MIN) { + out[i] = INT16_MIN; + num_clipped_samples++; + } + else if (in[i] > INT16_MAX) { + out[i] = INT16_MAX; + num_clipped_samples++; + } + else { + out[i] = in[i]; + } } - else if (in[i] > INT8_MAX) { - out[i] = INT8_MAX; - num_clipped_samples++; + } + else if (m_format_out == "u8") { + dataOut->setLength(sizeIn * sizeof(int8_t)); + uint8_t* out = reinterpret_cast<uint8_t*>(dataOut->getData()); + + for (size_t i = 0; i < sizeIn; i++) { + const auto samp = in[i] + 128.0f; + if (samp < 0) { + out[i] = 0; + num_clipped_samples++; + } + else if (samp > UINT8_MAX) { + out[i] = UINT8_MAX; + num_clipped_samples++; + } + else { + out[i] = samp; + } + } - else { - out[i] = in[i]; + } + else if (m_format_out == "s8") { + dataOut->setLength(sizeIn * sizeof(int8_t)); + int8_t* out = reinterpret_cast<int8_t*>(dataOut->getData()); + + for (size_t i = 0; i < sizeIn; i++) { + if (in[i] < INT8_MIN) { + out[i] = INT8_MIN; + num_clipped_samples++; + } + else if (in[i] > INT8_MAX) { + out[i] = INT8_MAX; + num_clipped_samples++; + } + else { + out[i] = in[i]; + } } } - } - else { - throw std::runtime_error("FormatConverter: Invalid format " + m_format); + else { + throw std::runtime_error("FormatConverter: Invalid format " + m_format_out); + } } m_num_clipped_samples.store(num_clipped_samples); - return dataOut->getLength(); } diff --git a/src/FormatConverter.h b/src/FormatConverter.h index 05511c0..1ed2283 100644 --- a/src/FormatConverter.h +++ b/src/FormatConverter.h @@ -33,18 +33,19 @@ #endif #include "ModPlugin.h" -#include <complex> #include <atomic> #include <string> -#include <cstdint> class FormatConverter : public ModCodec { public: static size_t get_format_size(const std::string& format); - // Allowed formats: s8, u8 and s16 - FormatConverter(const std::string& format); + // floating-point input allows output formats: s8, u8 and s16 + // complexfix_wide input allows output formats: s16 + // complexfix input is already in s16, and needs no converter + FormatConverter(bool input_is_complexfix_wide, const std::string& format_out); + virtual ~FormatConverter(); int process(Buffer* const dataIn, Buffer* dataOut); const char* name(); @@ -52,7 +53,8 @@ class FormatConverter : public ModCodec size_t get_num_clipped_samples() const; private: - std::string m_format; + bool m_input_complexfix_wide; + std::string m_format_out; std::atomic<size_t> m_num_clipped_samples = 0; }; diff --git a/src/FrameMultiplexer.cpp b/src/FrameMultiplexer.cpp index e893120..ebd8b76 100644 --- a/src/FrameMultiplexer.cpp +++ b/src/FrameMultiplexer.cpp @@ -25,17 +25,11 @@ */ #include "FrameMultiplexer.h" -#include "PcDebug.h" -#include <stdio.h> #include <string> -#include <stdexcept> -#include <complex> -#include <memory> -#include <assert.h> -#include <string.h> - -typedef std::complex<float> complexf; +#include <cstdio> +#include <cassert> +#include <cstring> FrameMultiplexer::FrameMultiplexer( const EtiSource& etiSource) : diff --git a/src/FrequencyInterleaver.cpp b/src/FrequencyInterleaver.cpp index e76d525..6f36dcb 100644 --- a/src/FrequencyInterleaver.cpp +++ b/src/FrequencyInterleaver.cpp @@ -22,17 +22,15 @@ #include "FrequencyInterleaver.h" #include "PcDebug.h" -#include <stdio.h> #include <stdexcept> #include <string> -#include <stdlib.h> -#include <complex> +#include <cstdio> +#include <cstdlib> -typedef std::complex<float> complexf; - -FrequencyInterleaver::FrequencyInterleaver(size_t mode) : - ModCodec() +FrequencyInterleaver::FrequencyInterleaver(size_t mode, bool fixedPoint) : + ModCodec(), + m_fixedPoint(fixedPoint) { PDEBUG("FrequencyInterleaver::FrequencyInterleaver(%zu) @ %p\n", mode, this); @@ -42,54 +40,53 @@ FrequencyInterleaver::FrequencyInterleaver(size_t mode) : size_t beta; switch (mode) { case 1: - d_carriers = 1536; + m_carriers = 1536; num = 2048; beta = 511; break; case 2: - d_carriers = 384; + m_carriers = 384; num = 512; beta = 127; break; case 3: - d_carriers = 192; + m_carriers = 192; num = 256; beta = 63; break; case 0: case 4: - d_carriers = 768; + m_carriers = 768; num = 1024; beta = 255; break; default: - PDEBUG("Carriers: %zu\n", (d_carriers >> 1) << 1); - throw std::runtime_error("FrequencyInterleaver::FrequencyInterleaver " - "nb of carriers invalid!"); - break; + PDEBUG("Carriers: %zu\n", (m_carriers >> 1) << 1); + throw std::runtime_error("FrequencyInterleaver: invalid dab mode"); } - const int ret = posix_memalign((void**)(&d_indexes), 16, d_carriers * sizeof(size_t)); + const int ret = posix_memalign((void**)(&m_indices), 16, m_carriers * sizeof(size_t)); if (ret != 0) { throw std::runtime_error("memory allocation failed: " + std::to_string(ret)); } - size_t* index = d_indexes; + size_t *index = m_indices; size_t perm = 0; PDEBUG("i: %4u, R: %4u\n", 0, 0); for (size_t j = 1; j < num; ++j) { perm = (alpha * perm + beta) & (num - 1); - if (perm >= ((num - d_carriers) / 2) - && perm <= (num - (num - d_carriers) / 2) + if (perm >= ((num - m_carriers) / 2) + && perm <= (num - (num - m_carriers) / 2) && perm != (num / 2)) { PDEBUG("i: %4zu, R: %4zu, d: %4zu, n: %4zu, k: %5zi, index: %zu\n", - j, perm, perm, index - d_indexes, perm - num / 2, + j, perm, perm, index - m_indices, perm - num / 2, perm > num / 2 ? perm - (1 + (num / 2)) - : perm + (d_carriers - (num / 2))); + : perm + (m_carriers - (num / 2))); *(index++) = perm > num / 2 ? - perm - (1 + (num / 2)) : perm + (d_carriers - (num / 2)); - } else { + perm - (1 + (num / 2)) : perm + (m_carriers - (num / 2)); + } + else { PDEBUG("i: %4zu, R: %4zu\n", j, perm); } } @@ -100,9 +97,33 @@ FrequencyInterleaver::~FrequencyInterleaver() { PDEBUG("FrequencyInterleaver::~FrequencyInterleaver() @ %p\n", this); - free(d_indexes); + free(m_indices); } +template<typename T> +void do_process(Buffer* const dataIn, Buffer* dataOut, + size_t carriers, const size_t * const indices) +{ + const T* in = reinterpret_cast<const T*>(dataIn->getData()); + T* out = reinterpret_cast<T*>(dataOut->getData()); + size_t sizeIn = dataIn->getLength() / sizeof(T); + + if (sizeIn % carriers != 0) { + throw std::runtime_error( + "FrequencyInterleaver::process input size not valid!"); + } + + for (size_t i = 0; i < sizeIn;) { +// memset(out, 0, m_carriers * sizeof(T)); + for (size_t j = 0; j < carriers; i += 4, j += 4) { + out[indices[j]] = in[i]; + out[indices[j + 1]] = in[i + 1]; + out[indices[j + 2]] = in[i + 2]; + out[indices[j + 3]] = in[i + 3]; + } + out += carriers; + } +} int FrequencyInterleaver::process(Buffer* const dataIn, Buffer* dataOut) { @@ -112,24 +133,11 @@ int FrequencyInterleaver::process(Buffer* const dataIn, Buffer* dataOut) dataOut->setLength(dataIn->getLength()); - const complexf* in = reinterpret_cast<const complexf*>(dataIn->getData()); - complexf* out = reinterpret_cast<complexf*>(dataOut->getData()); - size_t sizeIn = dataIn->getLength() / sizeof(complexf); - - if (sizeIn % d_carriers != 0) { - throw std::runtime_error( - "FrequencyInterleaver::process input size not valid!"); + if (m_fixedPoint) { + do_process<complexfix>(dataIn, dataOut, m_carriers, m_indices); } - - for (size_t i = 0; i < sizeIn;) { -// memset(out, 0, d_carriers * sizeof(complexf)); - for (size_t j = 0; j < d_carriers; i += 4, j += 4) { - out[d_indexes[j]] = in[i]; - out[d_indexes[j + 1]] = in[i + 1]; - out[d_indexes[j + 2]] = in[i + 2]; - out[d_indexes[j + 3]] = in[i + 3]; - } - out += d_carriers; + else { + do_process<complexf>(dataIn, dataOut, m_carriers, m_indices); } return 1; diff --git a/src/FrequencyInterleaver.h b/src/FrequencyInterleaver.h index 43ca21a..b31b968 100644 --- a/src/FrequencyInterleaver.h +++ b/src/FrequencyInterleaver.h @@ -25,16 +25,14 @@ # include <config.h> #endif - #include "ModPlugin.h" #include <sys/types.h> - class FrequencyInterleaver : public ModCodec { public: - FrequencyInterleaver(size_t mode); + FrequencyInterleaver(size_t mode, bool fixedPoint); virtual ~FrequencyInterleaver(); FrequencyInterleaver(const FrequencyInterleaver&) = delete; FrequencyInterleaver& operator=(const FrequencyInterleaver&) = delete; @@ -43,7 +41,8 @@ public: const char* name() override { return "FrequencyInterleaver"; } protected: - size_t d_carriers; - size_t* d_indexes; + bool m_fixedPoint; + size_t m_carriers; + size_t *m_indices; }; diff --git a/src/GainControl.h b/src/GainControl.h index 04f6b58..d40a7d7 100644 --- a/src/GainControl.h +++ b/src/GainControl.h @@ -35,7 +35,6 @@ #include "RemoteControl.h" #include <sys/types.h> -#include <complex> #include <string> #include <mutex> @@ -43,9 +42,6 @@ # include <xmmintrin.h> #endif - -typedef std::complex<float> complexf; - enum class GainMode { GAIN_FIX = 0, GAIN_MAX = 1, GAIN_VAR = 2 }; class GainControl : public PipelinedModCodec, public RemoteControllable diff --git a/src/GuardIntervalInserter.cpp b/src/GuardIntervalInserter.cpp index 3c2db14..26d4fd1 100644 --- a/src/GuardIntervalInserter.cpp +++ b/src/GuardIntervalInserter.cpp @@ -29,39 +29,47 @@ #include <cstring> #include <cassert> #include <stdexcept> -#include <complex> #include <mutex> -typedef std::complex<float> complexf; +GuardIntervalInserter::Params::Params( + size_t nbSymbols, + size_t spacing, + size_t nullSize, + size_t symSize, + size_t& windowOverlap) : + nbSymbols(nbSymbols), + spacing(spacing), + nullSize(nullSize), + symSize(symSize), + windowOverlap(windowOverlap) {} GuardIntervalInserter::GuardIntervalInserter( size_t nbSymbols, size_t spacing, size_t nullSize, size_t symSize, - size_t& windowOverlap) : + size_t& windowOverlap, + FFTEngine fftEngine) : ModCodec(), RemoteControllable("guardinterval"), - d_nbSymbols(nbSymbols), - d_spacing(spacing), - d_nullSize(nullSize), - d_symSize(symSize), - d_windowOverlap(windowOverlap) + m_fftEngine(fftEngine), + m_params(nbSymbols, spacing, nullSize, symSize, windowOverlap) { - if (d_nullSize == 0) { + if (nullSize == 0) { throw std::logic_error("NULL symbol must be present"); } + RC_ADD_PARAMETER(windowlen, "Window length for OFDM windowng [0 to disable]"); /* We use a raised-cosine window for the OFDM windowing. - * Each symbol is extended on both sides by d_windowOverlap samples. + * Each symbol is extended on both sides by windowOverlap samples. * * * Sym n |####################| * Sym n+1 |####################| * - * We now extend the symbols by d_windowOverlap (one dash) + * We now extend the symbols by windowOverlap (one dash) * * Sym n extended -|####################|- * Sym n+1 extended -|####################|- @@ -75,7 +83,7 @@ GuardIntervalInserter::GuardIntervalInserter( * / \ * ... ________________/ \__ ... * - * The window length is 2*d_windowOverlap. + * The window length is 2*windowOverlap. */ update_window(windowOverlap); @@ -87,44 +95,43 @@ GuardIntervalInserter::GuardIntervalInserter( void GuardIntervalInserter::update_window(size_t new_window_overlap) { - std::lock_guard<std::mutex> lock(d_windowMutex); + std::lock_guard<std::mutex> lock(m_params.windowMutex); - d_windowOverlap = new_window_overlap; + m_params.windowOverlap = new_window_overlap; - // d_window only contains the rising window edge. - d_window.resize(2*d_windowOverlap); - for (size_t i = 0; i < 2*d_windowOverlap; i++) { - d_window[i] = (float)(0.5 * (1.0 - cos(M_PI * i / (2*d_windowOverlap - 1)))); + // m_params.window only contains the rising window edge. + m_params.window.resize(2*m_params.windowOverlap); + for (size_t i = 0; i < 2*m_params.windowOverlap; i++) { + m_params.window[i] = (float)(0.5 * (1.0 - cos(M_PI * i / (2*m_params.windowOverlap - 1)))); } } -int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut) +template<typename T> +int do_process(const GuardIntervalInserter::Params& p, Buffer* const dataIn, Buffer* dataOut) { - PDEBUG("GuardIntervalInserter::process(dataIn: %p, dataOut: %p)\n", + PDEBUG("GuardIntervalInserter do_process(dataIn: %p, dataOut: %p)\n", dataIn, dataOut); - std::lock_guard<std::mutex> lock(d_windowMutex); - - // Every symbol overlaps over a length of d_windowOverlap with + // Every symbol overlaps over a length of windowOverlap with // the previous symbol, and with the next symbol. First symbol // receives no prefix window, because we don't remember the // last symbol from the previous TF (yet). Last symbol also // receives no suffix window, for the same reason. // Overall output buffer length must stay independent of the windowing. - dataOut->setLength((d_nullSize + (d_nbSymbols * d_symSize)) * sizeof(complexf)); + dataOut->setLength((p.nullSize + (p.nbSymbols * p.symSize)) * sizeof(T)); - const complexf* in = reinterpret_cast<const complexf*>(dataIn->getData()); - complexf* out = reinterpret_cast<complexf*>(dataOut->getData()); - size_t sizeIn = dataIn->getLength() / sizeof(complexf); + const T* in = reinterpret_cast<const T*>(dataIn->getData()); + T* out = reinterpret_cast<T*>(dataOut->getData()); + size_t sizeIn = dataIn->getLength() / sizeof(T); - const size_t num_symbols = d_nbSymbols + 1; - if (sizeIn != num_symbols * d_spacing) + const size_t num_symbols = p.nbSymbols + 1; + if (sizeIn != num_symbols * p.spacing) { - PDEBUG("Nb symbols: %zu\n", d_nbSymbols); - PDEBUG("Spacing: %zu\n", d_spacing); - PDEBUG("Null size: %zu\n", d_nullSize); - PDEBUG("Sym size: %zu\n", d_symSize); - PDEBUG("\n%zu != %zu\n", sizeIn, (d_nbSymbols + 1) * d_spacing); + PDEBUG("Nb symbols: %zu\n", p.nbSymbols); + PDEBUG("Spacing: %zu\n", p.spacing); + PDEBUG("Null size: %zu\n", p.nullSize); + PDEBUG("Sym size: %zu\n", p.symSize); + PDEBUG("\n%zu != %zu\n", sizeIn, (p.nbSymbols + 1) * p.spacing); throw std::runtime_error( "GuardIntervalInserter::process input size not valid!"); } @@ -132,139 +139,162 @@ int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut) // TODO remember the end of the last TF so that we can do some // windowing too. - if (d_windowOverlap) { - { - // Handle Null symbol separately because it is longer - const size_t prefixlength = d_nullSize - d_spacing; - - // end = spacing - memcpy(out, &in[d_spacing - prefixlength], - prefixlength * sizeof(complexf)); - - memcpy(&out[prefixlength], in, (d_spacing - d_windowOverlap) * sizeof(complexf)); + std::lock_guard<std::mutex> lock(p.windowMutex); + if (p.windowOverlap) { + if constexpr (std::is_same_v<complexf, T>) { + { + // Handle Null symbol separately because it is longer + const size_t prefixlength = p.nullSize - p.spacing; + + // end = spacing + memcpy(out, &in[p.spacing - prefixlength], + prefixlength * sizeof(T)); + + memcpy(&out[prefixlength], in, (p.spacing - p.windowOverlap) * sizeof(T)); + + // The remaining part of the symbol must have half of the window applied, + // sloping down from 1 to 0.5 + for (size_t i = 0; i < p.windowOverlap; i++) { + const size_t out_ix = prefixlength + p.spacing - p.windowOverlap + i; + const size_t in_ix = p.spacing - p.windowOverlap + i; + out[out_ix] = in[in_ix] * p.window[2*p.windowOverlap - (i+1)]; + } - // The remaining part of the symbol must have half of the window applied, - // sloping down from 1 to 0.5 - for (size_t i = 0; i < d_windowOverlap; i++) { - const size_t out_ix = prefixlength + d_spacing - d_windowOverlap + i; - const size_t in_ix = d_spacing - d_windowOverlap + i; - out[out_ix] = in[in_ix] * d_window[2*d_windowOverlap - (i+1)]; - } + // Suffix is taken from the beginning of the symbol, and sees the other + // half of the window applied. + for (size_t i = 0; i < p.windowOverlap; i++) { + const size_t out_ix = prefixlength + p.spacing + i; + out[out_ix] = in[i] * p.window[p.windowOverlap - (i+1)]; + } - // Suffix is taken from the beginning of the symbol, and sees the other - // half of the window applied. - for (size_t i = 0; i < d_windowOverlap; i++) { - const size_t out_ix = prefixlength + d_spacing + i; - out[out_ix] = in[i] * d_window[d_windowOverlap - (i+1)]; + in += p.spacing; + out += p.nullSize; + // out is now pointing to the proper end of symbol. There are + // windowOverlap samples ahead that were already written. } - in += d_spacing; - out += d_nullSize; - // out is now pointing to the proper end of symbol. There are - // d_windowOverlap samples ahead that were already written. - } - - // Data symbols - for (size_t sym_ix = 0; sym_ix < d_nbSymbols; sym_ix++) { - /* _ix variables are indices into in[], _ox variables are - * indices for out[] */ - const ssize_t start_rise_ox = -d_windowOverlap; - const size_t start_rise_ix = 2 * d_spacing - d_symSize - d_windowOverlap; - /* - const size_t start_real_symbol_ox = 0; - const size_t start_real_symbol_ix = 2 * d_spacing - d_symSize; - */ - const ssize_t end_rise_ox = d_windowOverlap; - const size_t end_rise_ix = 2 * d_spacing - d_symSize + d_windowOverlap; - const ssize_t end_cyclic_prefix_ox = d_symSize - d_spacing; - /* end_cyclic_prefix_ix = end of symbol - const size_t begin_fall_ox = d_symSize - d_windowOverlap; - const size_t begin_fall_ix = d_spacing - d_windowOverlap; - const size_t end_real_symbol_ox = d_symSize; - end_real_symbol_ix = end of symbol - const size_t end_fall_ox = d_symSize + d_windowOverlap; - const size_t end_fall_ix = d_spacing + d_windowOverlap; - */ - - ssize_t ox = start_rise_ox; - size_t ix = start_rise_ix; - - for (size_t i = 0; ix < end_rise_ix; i++) { - out[ox] += in[ix] * d_window.at(i); - ix++; - ox++; - } - assert(ox == end_rise_ox); - - const size_t remaining_prefix_length = end_cyclic_prefix_ox - end_rise_ox; - memcpy( &out[ox], &in[ix], - remaining_prefix_length * sizeof(complexf)); - ox += remaining_prefix_length; - assert(ox == end_cyclic_prefix_ox); - ix = 0; - - const bool last_symbol = (sym_ix + 1 >= d_nbSymbols); - if (last_symbol) { - // No windowing at all at end - memcpy(&out[ox], &in[ix], d_spacing * sizeof(complexf)); - ox += d_spacing; - } - else { - // Copy the middle part of the symbol, d_windowOverlap samples - // short of the end. - memcpy( &out[ox], - &in[ix], - (d_spacing - d_windowOverlap) * sizeof(complexf)); - ox += d_spacing - d_windowOverlap; - ix += d_spacing - d_windowOverlap; - assert(ox == (ssize_t)(d_symSize - d_windowOverlap)); - - // Apply window from 1 to 0.5 for the end of the symbol - for (size_t i = 0; ox < (ssize_t)d_symSize; i++) { - out[ox] = in[ix] * d_window[2*d_windowOverlap - (i+1)]; - ox++; + // Data symbols + for (size_t sym_ix = 0; sym_ix < p.nbSymbols; sym_ix++) { + /* _ix variables are indices into in[], _ox variables are + * indices for out[] */ + const ssize_t start_rise_ox = -p.windowOverlap; + const size_t start_rise_ix = 2 * p.spacing - p.symSize - p.windowOverlap; + /* + const size_t start_real_symbol_ox = 0; + const size_t start_real_symbol_ix = 2 * p.spacing - p.symSize; + */ + const ssize_t end_rise_ox = p.windowOverlap; + const size_t end_rise_ix = 2 * p.spacing - p.symSize + p.windowOverlap; + const ssize_t end_cyclic_prefix_ox = p.symSize - p.spacing; + /* end_cyclic_prefix_ix = end of symbol + const size_t begin_fall_ox = p.symSize - p.windowOverlap; + const size_t begin_fall_ix = p.spacing - p.windowOverlap; + const size_t end_real_symbol_ox = p.symSize; + end_real_symbol_ix = end of symbol + const size_t end_fall_ox = p.symSize + p.windowOverlap; + const size_t end_fall_ix = p.spacing + p.windowOverlap; + */ + + ssize_t ox = start_rise_ox; + size_t ix = start_rise_ix; + + for (size_t i = 0; ix < end_rise_ix; i++) { + out[ox] += in[ix] * p.window.at(i); ix++; + ox++; } - assert(ix == d_spacing); + assert(ox == end_rise_ox); + const size_t remaining_prefix_length = end_cyclic_prefix_ox - end_rise_ox; + memcpy( &out[ox], &in[ix], + remaining_prefix_length * sizeof(T)); + ox += remaining_prefix_length; + assert(ox == end_cyclic_prefix_ox); ix = 0; - // Cyclic suffix, with window from 0.5 to 0 - for (size_t i = 0; ox < (ssize_t)(d_symSize + d_windowOverlap); i++) { - out[ox] = in[ix] * d_window[d_windowOverlap - (i+1)]; - ox++; - ix++; + + const bool last_symbol = (sym_ix + 1 >= p.nbSymbols); + if (last_symbol) { + // No windowing at all at end + memcpy(&out[ox], &in[ix], p.spacing * sizeof(T)); + ox += p.spacing; + } + else { + // Copy the middle part of the symbol, p.windowOverlap samples + // short of the end. + memcpy( &out[ox], + &in[ix], + (p.spacing - p.windowOverlap) * sizeof(T)); + ox += p.spacing - p.windowOverlap; + ix += p.spacing - p.windowOverlap; + assert(ox == (ssize_t)(p.symSize - p.windowOverlap)); + + // Apply window from 1 to 0.5 for the end of the symbol + for (size_t i = 0; ox < (ssize_t)p.symSize; i++) { + out[ox] = in[ix] * p.window[2*p.windowOverlap - (i+1)]; + ox++; + ix++; + } + assert(ix == p.spacing); + + ix = 0; + // Cyclic suffix, with window from 0.5 to 0 + for (size_t i = 0; ox < (ssize_t)(p.symSize + p.windowOverlap); i++) { + out[ox] = in[ix] * p.window[p.windowOverlap - (i+1)]; + ox++; + ix++; + } + + assert(ix == p.windowOverlap); } - assert(ix == d_windowOverlap); + out += p.symSize; + in += p.spacing; + // out is now pointing to the proper end of symbol. There are + // windowOverlap samples ahead that were already written. } - - out += d_symSize; - in += d_spacing; - // out is now pointing to the proper end of symbol. There are - // d_windowOverlap samples ahead that were already written. + } + else { + throw std::runtime_error("fixed-point doesn't support window overlap"); } } else { // Handle Null symbol separately because it is longer // end - (nullSize - spacing) = 2 * spacing - nullSize - memcpy(out, &in[2 * d_spacing - d_nullSize], - (d_nullSize - d_spacing) * sizeof(complexf)); - memcpy(&out[d_nullSize - d_spacing], in, d_spacing * sizeof(complexf)); - in += d_spacing; - out += d_nullSize; + memcpy(out, &in[2 * p.spacing - p.nullSize], + (p.nullSize - p.spacing) * sizeof(T)); + memcpy(&out[p.nullSize - p.spacing], in, p.spacing * sizeof(T)); + in += p.spacing; + out += p.nullSize; // Data symbols - for (size_t i = 0; i < d_nbSymbols; ++i) { + for (size_t i = 0; i < p.nbSymbols; ++i) { // end - (symSize - spacing) = 2 * spacing - symSize - memcpy(out, &in[2 * d_spacing - d_symSize], - (d_symSize - d_spacing) * sizeof(complexf)); - memcpy(&out[d_symSize - d_spacing], in, d_spacing * sizeof(complexf)); - in += d_spacing; - out += d_symSize; + memcpy(out, &in[2 * p.spacing - p.symSize], + (p.symSize - p.spacing) * sizeof(T)); + memcpy(&out[p.symSize - p.spacing], in, p.spacing * sizeof(T)); + in += p.spacing; + out += p.symSize; } } - return sizeIn; + const auto sizeOut = dataOut->getLength(); + return sizeOut; +} + +int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut) +{ + switch (m_fftEngine) { + case FFTEngine::FFTW: + return do_process<complexf>(m_params, dataIn, dataOut); + case FFTEngine::KISS: + if (m_params.windowOverlap) { + throw std::runtime_error("fixed point and ofdm windowing not supported"); + } + return do_process<complexfix>(m_params, dataIn, dataOut); + case FFTEngine::DEXTER: + return do_process<complexfix_wide>(m_params, dataIn, dataOut); + } + throw std::logic_error("Unhandled fftEngine variant"); } void GuardIntervalInserter::set_parameter( @@ -293,7 +323,7 @@ const std::string GuardIntervalInserter::get_parameter(const std::string& parame using namespace std; stringstream ss; if (parameter == "windowlen") { - ss << d_windowOverlap; + ss << m_params.windowOverlap; } else { ss << "Parameter '" << parameter << @@ -306,6 +336,6 @@ const std::string GuardIntervalInserter::get_parameter(const std::string& parame const json::map_t GuardIntervalInserter::get_all_values() const { json::map_t map; - map["windowlen"].v = d_windowOverlap; + map["windowlen"].v = m_params.windowOverlap; return map; } diff --git a/src/GuardIntervalInserter.h b/src/GuardIntervalInserter.h index f78ac91..8d329ff 100644 --- a/src/GuardIntervalInserter.h +++ b/src/GuardIntervalInserter.h @@ -30,6 +30,7 @@ # include <config.h> #endif +#include "ConfigParser.h" #include "ModPlugin.h" #include "RemoteControl.h" #include <stdint.h> @@ -50,7 +51,8 @@ class GuardIntervalInserter : public ModCodec, public RemoteControllable size_t spacing, size_t nullSize, size_t symSize, - size_t& windowOverlap); + size_t& windowOverlap, + FFTEngine fftEngine); virtual ~GuardIntervalInserter() {} @@ -62,16 +64,30 @@ class GuardIntervalInserter : public ModCodec, public RemoteControllable virtual const std::string get_parameter(const std::string& parameter) const override; virtual const json::map_t get_all_values() const override; + struct Params { + Params( + size_t nbSymbols, + size_t spacing, + size_t nullSize, + size_t symSize, + size_t& windowOverlap); + + size_t nbSymbols; + size_t spacing; + size_t nullSize; + size_t symSize; + size_t& windowOverlap; + + mutable std::mutex windowMutex; + std::vector<float> window; + }; + protected: void update_window(size_t new_window_overlap); - size_t d_nbSymbols; - size_t d_spacing; - size_t d_nullSize; - size_t d_symSize; + FFTEngine m_fftEngine; + + Params m_params; - mutable std::mutex d_windowMutex; - size_t& d_windowOverlap; - std::vector<float> d_window; }; diff --git a/src/MemlessPoly.h b/src/MemlessPoly.h index 91e6860..72de62c 100644 --- a/src/MemlessPoly.h +++ b/src/MemlessPoly.h @@ -32,13 +32,10 @@ #include "RemoteControl.h" #include "ModPlugin.h" -#include "PcDebug.h" #include "ThreadsafeQueue.h" #include <sys/types.h> #include <array> -#include <complex> -#include <memory> #include <string> #include <thread> #include <vector> @@ -47,8 +44,6 @@ #define MEMLESSPOLY_PIPELINE_DELAY 1 -typedef std::complex<float> complexf; - enum class dpd_type_t { odd_only_poly, lookup_table diff --git a/src/ModPlugin.h b/src/ModPlugin.h index 470508f..bb3ee2c 100644 --- a/src/ModPlugin.h +++ b/src/ModPlugin.h @@ -33,9 +33,7 @@ #include "Buffer.h" #include "ThreadsafeQueue.h" #include "TimestampDecoder.h" -#include <cstddef> #include <vector> -#include <memory> #include <thread> #include <atomic> diff --git a/src/NullSymbol.cpp b/src/NullSymbol.cpp index 4684dfe..526e662 100644 --- a/src/NullSymbol.cpp +++ b/src/NullSymbol.cpp @@ -27,18 +27,16 @@ #include "NullSymbol.h" #include "PcDebug.h" -#include <stdio.h> -#include <stdlib.h> -#include <complex> -#include <string.h> +#include <cstdio> +#include <cstdlib> +#include <cstring> -typedef std::complex<float> complexf; - -NullSymbol::NullSymbol(size_t nbCarriers) : +NullSymbol::NullSymbol(size_t numCarriers, size_t typeSize) : ModInput(), - myNbCarriers(nbCarriers) + m_numCarriers(numCarriers), + m_typeSize(typeSize) { - PDEBUG("NullSymbol::NullSymbol(%zu) @ %p\n", nbCarriers, this); + PDEBUG("NullSymbol::NullSymbol(%zu) @ %p\n", numCarriers, this); } @@ -52,7 +50,7 @@ int NullSymbol::process(Buffer* dataOut) { PDEBUG("NullSymbol::process(dataOut: %p)\n", dataOut); - dataOut->setLength(myNbCarriers * 2 * sizeof(float)); + dataOut->setLength(m_numCarriers * m_typeSize); memset(dataOut->getData(), 0, dataOut->getLength()); return dataOut->getLength(); diff --git a/src/NullSymbol.h b/src/NullSymbol.h index 814e434..6ba9e63 100644 --- a/src/NullSymbol.h +++ b/src/NullSymbol.h @@ -39,14 +39,14 @@ class NullSymbol : public ModInput { public: - NullSymbol(size_t nbCarriers); + NullSymbol(size_t nunCarriers, size_t typeSize); virtual ~NullSymbol(); int process(Buffer* dataOut); const char* name() { return "NullSymbol"; } private: - size_t myNbCarriers; - + size_t m_numCarriers; + size_t m_typeSize; }; diff --git a/src/OfdmGenerator.cpp b/src/OfdmGenerator.cpp index cb799d3..38648c9 100644 --- a/src/OfdmGenerator.cpp +++ b/src/OfdmGenerator.cpp @@ -2,7 +2,7 @@ Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty the Queen in Right of Canada (Communications Research Center Canada) - Copyright (C) 2023 + Copyright (C) 2024 Matthias P. Braendli, matthias.braendli@mpb.li http://opendigitalradio.org @@ -27,17 +27,19 @@ #include "OfdmGenerator.h" #include "PcDebug.h" -#define FFT_TYPE fftwf_complex - -#include <string.h> #include <stdexcept> #include <assert.h> #include <string> #include <numeric> +#include <vector> +#include <cstring> +#include <complex> static const size_t MAX_CLIP_STATS = 10; -OfdmGenerator::OfdmGenerator(size_t nbSymbols, +using FFTW_TYPE = fftwf_complex; + +OfdmGeneratorCF32::OfdmGeneratorCF32(size_t nbSymbols, size_t nbCarriers, size_t spacing, bool& enableCfr, @@ -62,8 +64,7 @@ OfdmGenerator::OfdmGenerator(size_t nbSymbols, nbSymbols, nbCarriers, spacing, inverse ? "true" : "false", this); if (nbCarriers > spacing) { - throw std::runtime_error( - "OfdmGenerator::OfdmGenerator nbCarriers > spacing!"); + throw std::runtime_error("OfdmGenerator nbCarriers > spacing!"); } /* register the parameters that can be remote controlled */ @@ -102,29 +103,29 @@ OfdmGenerator::OfdmGenerator(size_t nbSymbols, PDEBUG(" myZeroSize: %u\n", myZeroSize); const int N = mySpacing; // The size of the FFT - myFftIn = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N); - myFftOut = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N); + myFftIn = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N); + myFftOut = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N); fftwf_set_timelimit(2); myFftPlan = fftwf_plan_dft_1d(N, myFftIn, myFftOut, FFTW_BACKWARD, FFTW_MEASURE); - myCfrPostClip = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N); - myCfrPostFft = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N); + myCfrPostClip = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N); + myCfrPostFft = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N); myCfrFft = fftwf_plan_dft_1d(N, myCfrPostClip, myCfrPostFft, FFTW_FORWARD, FFTW_MEASURE); - if (sizeof(complexf) != sizeof(FFT_TYPE)) { + if (sizeof(complexf) != sizeof(FFTW_TYPE)) { printf("sizeof(complexf) %zu\n", sizeof(complexf)); - printf("sizeof(FFT_TYPE) %zu\n", sizeof(FFT_TYPE)); + printf("sizeof(FFT_TYPE) %zu\n", sizeof(FFTW_TYPE)); throw std::runtime_error( "OfdmGenerator::process complexf size is not FFT_TYPE size!"); } } -OfdmGenerator::~OfdmGenerator() +OfdmGeneratorCF32::~OfdmGeneratorCF32() { PDEBUG("OfdmGenerator::~OfdmGenerator() @ %p\n", this); @@ -153,15 +154,15 @@ OfdmGenerator::~OfdmGenerator() } } -int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) +int OfdmGeneratorCF32::process(Buffer* const dataIn, Buffer* dataOut) { PDEBUG("OfdmGenerator::process(dataIn: %p, dataOut: %p)\n", dataIn, dataOut); dataOut->setLength(myNbSymbols * mySpacing * sizeof(complexf)); - FFT_TYPE* in = reinterpret_cast<FFT_TYPE*>(dataIn->getData()); - FFT_TYPE* out = reinterpret_cast<FFT_TYPE*>(dataOut->getData()); + FFTW_TYPE *in = reinterpret_cast<FFTW_TYPE*>(dataIn->getData()); + FFTW_TYPE *out = reinterpret_cast<FFTW_TYPE*>(dataOut->getData()); size_t sizeIn = dataIn->getLength() / sizeof(complexf); size_t sizeOut = dataOut->getLength() / sizeof(complexf); @@ -203,7 +204,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) myPaprAfterCFR.clear(); } - for (size_t i = 0; i < myNbSymbols; ++i) { + for (size_t i = 0; i < myNbSymbols; i++) { myFftIn[0][0] = 0; myFftIn[0][1] = 0; @@ -212,22 +213,20 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) * PosSrc=0 PosDst=1 PosSize=768 * NegSrc=768 NegDst=1280 NegSize=768 */ - memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(FFT_TYPE)); + memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(FFTW_TYPE)); memcpy(&myFftIn[myPosDst], &in[myPosSrc], - myPosSize * sizeof(FFT_TYPE)); + myPosSize * sizeof(FFTW_TYPE)); memcpy(&myFftIn[myNegDst], &in[myNegSrc], - myNegSize * sizeof(FFT_TYPE)); - + myNegSize * sizeof(FFTW_TYPE)); if (myCfr) { reference.resize(mySpacing); memcpy(reinterpret_cast<fftwf_complex*>(reference.data()), - myFftIn, mySpacing * sizeof(FFT_TYPE)); + myFftIn, mySpacing * sizeof(FFTW_TYPE)); } fftwf_execute(myFftPlan); // IFFT from myFftIn to myFftOut - if (myCfr) { complexf *symbol = reinterpret_cast<complexf*>(myFftOut); myPaprBeforeCFR.process_block(symbol, mySpacing); @@ -235,7 +234,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) if (myMERCalcIndex == i) { before_cfr.resize(mySpacing); memcpy(reinterpret_cast<fftwf_complex*>(before_cfr.data()), - myFftOut, mySpacing * sizeof(FFT_TYPE)); + myFftOut, mySpacing * sizeof(FFTW_TYPE)); } /* cfr_one_iteration runs the myFftPlan again at the end, and @@ -277,7 +276,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) num_error_clip += stat.errclip_count; } - memcpy(out, myFftOut, mySpacing * sizeof(FFT_TYPE)); + memcpy(out, myFftOut, mySpacing * sizeof(FFTW_TYPE)); in += myNbCarriers; out += mySpacing; @@ -308,14 +307,14 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) return sizeOut; } -OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration( +OfdmGeneratorCF32::cfr_iter_stat_t OfdmGeneratorCF32::cfr_one_iteration( complexf *symbol, const complexf *reference) { // use std::norm instead of std::abs to avoid calculating the // square roots const float clip_squared = myCfrClip * myCfrClip; - OfdmGenerator::cfr_iter_stat_t ret; + OfdmGeneratorCF32::cfr_iter_stat_t ret; // Clip for (size_t i = 0; i < mySpacing; i++) { @@ -331,7 +330,7 @@ OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration( } // Take FFT of our clipped signal - memcpy(myCfrPostClip, symbol, mySpacing * sizeof(FFT_TYPE)); + memcpy(myCfrPostClip, symbol, mySpacing * sizeof(FFTW_TYPE)); fftwf_execute(myCfrFft); // FFT from myCfrPostClip to myCfrPostFft // Calculate the error in frequency domain by subtracting our reference @@ -374,7 +373,7 @@ OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration( } -void OfdmGenerator::set_parameter(const std::string& parameter, +void OfdmGeneratorCF32::set_parameter(const std::string& parameter, const std::string& value) { using namespace std; @@ -404,7 +403,7 @@ void OfdmGenerator::set_parameter(const std::string& parameter, } } -const std::string OfdmGenerator::get_parameter(const std::string& parameter) const +const std::string OfdmGeneratorCF32::get_parameter(const std::string& parameter) const { using namespace std; stringstream ss; @@ -458,9 +457,333 @@ const std::string OfdmGenerator::get_parameter(const std::string& parameter) con return ss.str(); } -const json::map_t OfdmGenerator::get_all_values() const +const json::map_t OfdmGeneratorCF32::get_all_values() const { json::map_t map; // TODO needs rework of the values return map; } + +OfdmGeneratorFixed::OfdmGeneratorFixed(size_t nbSymbols, + size_t nbCarriers, + size_t spacing, + bool inverse) : + ModCodec(), + myNbSymbols(nbSymbols), + myNbCarriers(nbCarriers), + mySpacing(spacing) +{ + PDEBUG("OfdmGenerator::OfdmGenerator(%zu, %zu, %zu, %s) @ %p\n", + nbSymbols, nbCarriers, spacing, inverse ? "true" : "false", this); + + etiLog.level(info) << "Using KISS FFT by Mark Borgerding for fixed-point transform"; + + if (nbCarriers > spacing) { + throw std::runtime_error("OfdmGenerator nbCarriers > spacing!"); + } + + if (inverse) { + myPosDst = (nbCarriers & 1 ? 0 : 1); + myPosSrc = 0; + myPosSize = (nbCarriers + 1) / 2; + myNegDst = spacing - (nbCarriers / 2); + myNegSrc = (nbCarriers + 1) / 2; + myNegSize = nbCarriers / 2; + } + else { + myPosDst = (nbCarriers & 1 ? 0 : 1); + myPosSrc = nbCarriers / 2; + myPosSize = (nbCarriers + 1) / 2; + myNegDst = spacing - (nbCarriers / 2); + myNegSrc = 0; + myNegSize = nbCarriers / 2; + } + myZeroDst = myPosDst + myPosSize; + myZeroSize = myNegDst - myZeroDst; + + PDEBUG(" myPosDst: %u\n", myPosDst); + PDEBUG(" myPosSrc: %u\n", myPosSrc); + PDEBUG(" myPosSize: %u\n", myPosSize); + PDEBUG(" myNegDst: %u\n", myNegDst); + PDEBUG(" myNegSrc: %u\n", myNegSrc); + PDEBUG(" myNegSize: %u\n", myNegSize); + PDEBUG(" myZeroDst: %u\n", myZeroDst); + PDEBUG(" myZeroSize: %u\n", myZeroSize); + + const int N = mySpacing; // The size of the FFT + + const size_t nbytes = N * sizeof(kiss_fft_cpx); + myFftIn = (kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes); + myFftOut = (kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes); + memset(myFftIn, 0, nbytes); + + myKissCfg = kiss_fft_alloc(N, inverse, nullptr, nullptr); +} + +OfdmGeneratorFixed::~OfdmGeneratorFixed() +{ + if (myKissCfg) KISS_FFT_FREE(myKissCfg); + if (myFftIn) KISS_FFT_FREE(myFftIn); + if (myFftOut) KISS_FFT_FREE(myFftOut); +} + +int OfdmGeneratorFixed::process(Buffer* const dataIn, Buffer* dataOut) +{ + dataOut->setLength(myNbSymbols * mySpacing * sizeof(kiss_fft_cpx)); + + kiss_fft_cpx *in = reinterpret_cast<kiss_fft_cpx*>(dataIn->getData()); + kiss_fft_cpx *out = reinterpret_cast<kiss_fft_cpx*>(dataOut->getData()); + + size_t sizeIn = dataIn->getLength() / sizeof(kiss_fft_cpx); + size_t sizeOut = dataOut->getLength() / sizeof(kiss_fft_cpx); + + if (sizeIn != myNbSymbols * myNbCarriers) { + PDEBUG("Nb symbols: %zu\n", myNbSymbols); + PDEBUG("Nb carriers: %zu\n", myNbCarriers); + PDEBUG("Spacing: %zu\n", mySpacing); + PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * myNbCarriers); + throw std::runtime_error( + "OfdmGenerator::process input size not valid!"); + } + if (sizeOut != myNbSymbols * mySpacing) { + PDEBUG("Nb symbols: %zu\n", myNbSymbols); + PDEBUG("Nb carriers: %zu\n", myNbCarriers); + PDEBUG("Spacing: %zu\n", mySpacing); + PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * mySpacing); + throw std::runtime_error( + "OfdmGenerator::process output size not valid!"); + } + + for (size_t i = 0; i < myNbSymbols; i++) { + myFftIn[0].r = 0; + myFftIn[0].i = 0; + + /* For TM I this is: + * ZeroDst=769 ZeroSize=511 + * PosSrc=0 PosDst=1 PosSize=768 + * NegSrc=768 NegDst=1280 NegSize=768 + */ + memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(kiss_fft_cpx)); + memcpy(&myFftIn[myPosDst], &in[myPosSrc], myPosSize * sizeof(kiss_fft_cpx)); + memcpy(&myFftIn[myNegDst], &in[myNegSrc], myNegSize * sizeof(kiss_fft_cpx)); + + kiss_fft(myKissCfg, myFftIn, myFftOut); + + memcpy(out, myFftOut, mySpacing * sizeof(kiss_fft_cpx)); + + in += myNbCarriers; + out += mySpacing; + } + + return sizeOut; +} + +#ifdef HAVE_DEXTER +OfdmGeneratorDEXTER::OfdmGeneratorDEXTER(size_t nbSymbols, + size_t nbCarriers, + size_t spacing) : + ModCodec(), + myNbSymbols(nbSymbols), + myNbCarriers(nbCarriers), + mySpacing(spacing) +{ + PDEBUG("OfdmGeneratorDEXTER::OfdmGeneratorDEXTER(%zu, %zu, %zu) @ %p\n", + nbSymbols, nbCarriers, spacing, this); + + etiLog.level(info) << "Using DEXTER FFT Accelerator for fixed-point transform"; + + if (nbCarriers > spacing) { + throw std::runtime_error("OfdmGenerator nbCarriers > spacing!"); + } + + myPosDst = (nbCarriers & 1 ? 0 : 1); + myPosSrc = 0; + myPosSize = (nbCarriers + 1) / 2; + myNegDst = spacing - (nbCarriers / 2); + myNegSrc = (nbCarriers + 1) / 2; + myNegSize = nbCarriers / 2; + + myZeroDst = myPosDst + myPosSize; + myZeroSize = myNegDst - myZeroDst; + + PDEBUG(" myPosDst: %u\n", myPosDst); + PDEBUG(" myPosSrc: %u\n", myPosSrc); + PDEBUG(" myPosSize: %u\n", myPosSize); + PDEBUG(" myNegDst: %u\n", myNegDst); + PDEBUG(" myNegSrc: %u\n", myNegSrc); + PDEBUG(" myNegSize: %u\n", myNegSize); + PDEBUG(" myZeroDst: %u\n", myZeroDst); + PDEBUG(" myZeroSize: %u\n", myZeroSize); + + const size_t nbytes_in = mySpacing * sizeof(complexfix); + const size_t nbytes_out = mySpacing * sizeof(complexfix_wide); + +#define IIO_ENSURE(expr, err) { \ + if (!(expr)) { \ + etiLog.log(error, "%s (%s:%d)\n", err, __FILE__, __LINE__); \ + throw std::runtime_error("Failed to set FFT for OfdmGeneratorDEXTER"); \ + } \ +} + IIO_ENSURE((m_ctx = iio_create_default_context()), "No context"); + IIO_ENSURE(m_dev_in = iio_context_find_device(m_ctx, "fft-accelerator-in"), "no dev"); + IIO_ENSURE(m_dev_out = iio_context_find_device(m_ctx, "fft-accelerator-out"), "no dev"); + IIO_ENSURE(m_channel_in = iio_device_find_channel(m_dev_in, "voltage0", true), "no channel"); + IIO_ENSURE(m_channel_out = iio_device_find_channel(m_dev_out, "voltage0", false), "no channel"); + + iio_channel_enable(m_channel_in); + iio_channel_enable(m_channel_out); + + m_buf_in = iio_device_create_buffer(m_dev_in, nbytes_in, false); + if (!m_buf_in) { + throw std::runtime_error("OfdmGeneratorDEXTER could not create in buffer"); + } + + m_buf_out = iio_device_create_buffer(m_dev_out, nbytes_out, false); + if (!m_buf_out) { + throw std::runtime_error("OfdmGeneratorDEXTER could not create out buffer"); + } +} + +OfdmGeneratorDEXTER::~OfdmGeneratorDEXTER() +{ + if (m_buf_in) { + iio_buffer_destroy(m_buf_in); + m_buf_in = nullptr; + } + + if (m_buf_out) { + iio_buffer_destroy(m_buf_out); + m_buf_out = nullptr; + } + + if (m_channel_in) { + iio_channel_disable(m_channel_in); + m_channel_in = nullptr; + } + + if (m_channel_out) { + iio_channel_disable(m_channel_out); + m_channel_out = nullptr; + } + + if (m_ctx) { + iio_context_destroy(m_ctx); + m_ctx = nullptr; + } +} + +int OfdmGeneratorDEXTER::process(Buffer* const dataIn, Buffer* dataOut) +{ + dataOut->setLength(myNbSymbols * mySpacing * sizeof(complexfix_wide)); + + complexfix *in = reinterpret_cast<complexfix*>(dataIn->getData()); + complexfix_wide *out = reinterpret_cast<complexfix_wide*>(dataOut->getData()); + + size_t sizeIn = dataIn->getLength() / sizeof(complexfix); + size_t sizeOut = dataOut->getLength() / sizeof(complexfix_wide); + + if (sizeIn != myNbSymbols * myNbCarriers) { + PDEBUG("Nb symbols: %zu\n", myNbSymbols); + PDEBUG("Nb carriers: %zu\n", myNbCarriers); + PDEBUG("Spacing: %zu\n", mySpacing); + PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * myNbCarriers); + throw std::runtime_error( + "OfdmGenerator::process input size not valid!"); + } + if (sizeOut != myNbSymbols * mySpacing) { + PDEBUG("Nb symbols: %zu\n", myNbSymbols); + PDEBUG("Nb carriers: %zu\n", myNbCarriers); + PDEBUG("Spacing: %zu\n", mySpacing); + PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * mySpacing); + throw std::runtime_error("OfdmGenerator::process output size not valid!"); + } + + ptrdiff_t iio_buf_size = (uint8_t*)iio_buffer_end(m_buf_in) - (uint8_t*)iio_buffer_start(m_buf_in); + if (iio_buf_size != (ssize_t)(mySpacing * sizeof(complexfix))) { + throw std::runtime_error("OfdmGenerator::process incorrect iio buffer size!"); + } + + for (size_t i = 0; i < myNbSymbols; i++) { + complexfix *fft_in = reinterpret_cast<complexfix*>(iio_buffer_start(m_buf_in)); + + /* For TM I this is: + * ZeroDst=769 ZeroSize=511 + * PosSrc=0 PosDst=1 PosSize=768 + * NegSrc=768 NegDst=1280 NegSize=768 + */ + + fft_in[0] = static_cast<complexfix::value_type>(0); + for (size_t i = 0; i < myZeroSize; i++) { + fft_in[myZeroDst + i] = static_cast<complexfix::value_type>(0); + } + + memcpy(&fft_in[myPosDst], &in[myPosSrc], myPosSize * sizeof(complexfix)); + memcpy(&fft_in[myNegDst], &in[myNegSrc], myNegSize * sizeof(complexfix)); + + ssize_t nbytes_tx = iio_buffer_push(m_buf_in); + if (nbytes_tx < 0) { + throw std::runtime_error("OfdmGenerator::process error pushing IIO buffer!"); + } + + in += myNbCarriers; + + // Keep one buffer in flight while we're doing shuffling data around here, + // this improves performance. + // I believe that, by default, IIO allocates four buffers in total. + if (i > 0) { + ssize_t nbytes_rx = iio_buffer_refill(m_buf_out); + if (nbytes_rx < 0) { + throw std::runtime_error("OfdmGenerator::process error refilling IIO buffer!"); + } + + ptrdiff_t p_inc = iio_buffer_step(m_buf_out); + if (p_inc != 1) { + throw std::runtime_error("OfdmGenerator::process Wrong p_inc"); + } + + // The FFT Accelerator takes 16-bit I + 16-bit Q, and outputs 32-bit I and 32-bit Q. + // The formatconvert will take care of this + const uint8_t *fft_out = (const uint8_t*)iio_buffer_first(m_buf_out, m_channel_out); + const uint8_t *fft_out_end = (const uint8_t*)iio_buffer_end(m_buf_out); + constexpr size_t sizeof_out_iq = sizeof(complexfix_wide); + if ((fft_out_end - fft_out) != (ssize_t)(mySpacing * sizeof_out_iq)) { + fprintf(stderr, "FFT_OUT: %p %p %zu %zu\n", + fft_out, fft_out_end, (fft_out_end - fft_out), + mySpacing * sizeof_out_iq); + throw std::runtime_error("OfdmGenerator::process fft_out length invalid!"); + } + + memcpy(out, fft_out, mySpacing * sizeof_out_iq); + + out += mySpacing; + } + } + + ssize_t nbytes_rx = iio_buffer_refill(m_buf_out); + if (nbytes_rx < 0) { + throw std::runtime_error("OfdmGenerator::process error refilling IIO buffer!"); + } + + ptrdiff_t p_inc = iio_buffer_step(m_buf_out); + if (p_inc != 1) { + throw std::runtime_error("OfdmGenerator::process Wrong p_inc"); + } + + // The FFT Accelerator takes 16-bit I + 16-bit Q, and outputs 32-bit I and 32-bit Q. + // The formatconvert will take care of this + const uint8_t *fft_out = (const uint8_t*)iio_buffer_first(m_buf_out, m_channel_out); + const uint8_t *fft_out_end = (const uint8_t*)iio_buffer_end(m_buf_out); + constexpr size_t sizeof_out_iq = sizeof(complexfix_wide); + if ((fft_out_end - fft_out) != (ssize_t)(mySpacing * sizeof_out_iq)) { + fprintf(stderr, "FFT_OUT: %p %p %zu %zu\n", + fft_out, fft_out_end, (fft_out_end - fft_out), + mySpacing * sizeof_out_iq); + throw std::runtime_error("OfdmGenerator::process fft_out length invalid!"); + } + + memcpy(out, fft_out, mySpacing * sizeof_out_iq); + + return sizeOut; +} + +#endif // HAVE_DEXTER diff --git a/src/OfdmGenerator.h b/src/OfdmGenerator.h index dc1ad46..475b2a4 100644 --- a/src/OfdmGenerator.h +++ b/src/OfdmGenerator.h @@ -2,7 +2,7 @@ Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty the Queen in Right of Canada (Communications Research Center Canada) - Copyright (C) 2023 + Copyright (C) 2024 Matthias P. Braendli, matthias.braendli@mpb.li http://opendigitalradio.org @@ -33,27 +33,30 @@ #include "ModPlugin.h" #include "RemoteControl.h" #include "PAPRStats.h" -#include "fftw3.h" +#include "kiss_fft.h" + #include <cstddef> -#include <vector> -#include <complex> #include <atomic> +#include <fftw3.h> -typedef std::complex<float> complexf; +#ifdef HAVE_DEXTER +# include <iio.h> +#endif -class OfdmGenerator : public ModCodec, public RemoteControllable +// Complex Float uses FFTW +class OfdmGeneratorCF32 : public ModCodec, public RemoteControllable { public: - OfdmGenerator(size_t nbSymbols, + OfdmGeneratorCF32(size_t nbSymbols, size_t nbCarriers, size_t spacing, bool& enableCfr, float& cfrClip, float& cfrErrorClip, bool inverse = true); - virtual ~OfdmGenerator(); - OfdmGenerator(const OfdmGenerator&) = delete; - OfdmGenerator& operator=(const OfdmGenerator&) = delete; + virtual ~OfdmGeneratorCF32(); + OfdmGeneratorCF32(const OfdmGeneratorCF32&) = delete; + OfdmGeneratorCF32& operator=(const OfdmGeneratorCF32&) = delete; int process(Buffer* const dataIn, Buffer* dataOut) override; const char* name() override { return "OfdmGenerator"; } @@ -107,4 +110,76 @@ class OfdmGenerator : public ModCodec, public RemoteControllable std::deque<double> myMERs; }; +// Fixed point implementation uses KISS FFT with -DFIXED_POINT=32 +class OfdmGeneratorFixed : public ModCodec +{ + public: + OfdmGeneratorFixed(size_t nbSymbols, + size_t nbCarriers, + size_t spacing, + bool inverse = true); + virtual ~OfdmGeneratorFixed(); + OfdmGeneratorFixed(const OfdmGeneratorFixed&) = delete; + OfdmGeneratorFixed& operator=(const OfdmGeneratorFixed&) = delete; + + int process(Buffer* const dataIn, Buffer* dataOut) override; + const char* name() override { return "OfdmGenerator"; } + + private: + kiss_fft_cfg myKissCfg = nullptr; + kiss_fft_cpx *myFftIn, *myFftOut; + + const size_t myNbSymbols; + const size_t myNbCarriers; + const size_t mySpacing; + unsigned myPosSrc; + unsigned myPosDst; + unsigned myPosSize; + unsigned myNegSrc; + unsigned myNegDst; + unsigned myNegSize; + unsigned myZeroDst; + unsigned myZeroSize; +}; + +#ifdef HAVE_DEXTER +// The PrecisionWave DEXTER device contains an FFT accelerator in FPGA +// It only does inverse FFTs +class OfdmGeneratorDEXTER : public ModCodec +{ + public: + OfdmGeneratorDEXTER(size_t nbSymbols, + size_t nbCarriers, + size_t spacing); + virtual ~OfdmGeneratorDEXTER(); + OfdmGeneratorDEXTER(const OfdmGeneratorDEXTER&) = delete; + OfdmGeneratorDEXTER& operator=(const OfdmGeneratorDEXTER&) = delete; + + int process(Buffer* const dataIn, Buffer* dataOut) override; + const char* name() override { return "OfdmGenerator"; } + + private: + struct iio_context *m_ctx = nullptr; + // "in" and "out" are from the point of view of the FFT Accelerator block + struct iio_device *m_dev_in = nullptr; + struct iio_channel *m_channel_in = nullptr; + struct iio_buffer *m_buf_in = nullptr; + + struct iio_device *m_dev_out = nullptr; + struct iio_channel *m_channel_out = nullptr; + struct iio_buffer *m_buf_out = nullptr; + + const size_t myNbSymbols; + const size_t myNbCarriers; + const size_t mySpacing; + unsigned myPosSrc; + unsigned myPosDst; + unsigned myPosSize; + unsigned myNegSrc; + unsigned myNegDst; + unsigned myNegSize; + unsigned myZeroDst; + unsigned myZeroSize; +}; +#endif // HAVE_DEXTER diff --git a/src/OutputMemory.cpp b/src/OutputMemory.cpp index d6ef917..f673555 100644 --- a/src/OutputMemory.cpp +++ b/src/OutputMemory.cpp @@ -26,20 +26,14 @@ #include "OutputMemory.h" #include "PcDebug.h" -#include "Log.h" -#include "TimestampDecoder.h" - -#include <stdexcept> -#include <string.h> -#include <math.h> - +#include <cmath> OutputMemory::OutputMemory(Buffer* dataOut) : ModOutput() { PDEBUG("OutputMemory::OutputMemory(%p) @ %p\n", dataOut, this); - setOutput(dataOut); + m_dataOut = dataOut; #if OUTPUT_MEM_HISTOGRAM myMax = 0.0f; @@ -49,7 +43,6 @@ OutputMemory::OutputMemory(Buffer* dataOut) #endif } - OutputMemory::~OutputMemory() { #if OUTPUT_MEM_HISTOGRAM @@ -66,19 +59,12 @@ OutputMemory::~OutputMemory() PDEBUG("OutputMemory::~OutputMemory() @ %p\n", this); } - -void OutputMemory::setOutput(Buffer* dataOut) -{ - myDataOut = dataOut; -} - - int OutputMemory::process(Buffer* dataIn) { PDEBUG("OutputMemory::process(dataIn: %p)\n", dataIn); - *myDataOut = *dataIn; + *m_dataOut = *dataIn; #if OUTPUT_MEM_HISTOGRAM const float* in = (const float*)dataIn->getData(); @@ -93,17 +79,17 @@ int OutputMemory::process(Buffer* dataIn) } #endif - return myDataOut->getLength(); + return m_dataOut->getLength(); } meta_vec_t OutputMemory::process_metadata(const meta_vec_t& metadataIn) { - myMetadata = metadataIn; + m_metadata = metadataIn; return {}; } meta_vec_t OutputMemory::get_latest_metadata() { - return myMetadata; + return m_metadata; } diff --git a/src/OutputMemory.h b/src/OutputMemory.h index f0a5fbb..299d31d 100644 --- a/src/OutputMemory.h +++ b/src/OutputMemory.h @@ -61,11 +61,9 @@ public: meta_vec_t get_latest_metadata(void); - void setOutput(Buffer* dataOut); - protected: - Buffer* myDataOut; - meta_vec_t myMetadata; + Buffer* m_dataOut; + meta_vec_t m_metadata; #if OUTPUT_MEM_HISTOGRAM // keep track of max value diff --git a/src/PAPRStats.cpp b/src/PAPRStats.cpp index 0c9764a..103f02f 100644 --- a/src/PAPRStats.cpp +++ b/src/PAPRStats.cpp @@ -33,7 +33,6 @@ # include <iostream> #endif - PAPRStats::PAPRStats(size_t num_blocks_to_accumulate) : m_num_blocks_to_accumulate(num_blocks_to_accumulate) { diff --git a/src/PAPRStats.h b/src/PAPRStats.h index 86ad8b0..a4ded86 100644 --- a/src/PAPRStats.h +++ b/src/PAPRStats.h @@ -31,12 +31,9 @@ #endif #include <cstddef> -#include <vector> #include <deque> #include <complex> -typedef std::complex<float> complexf; - /* Helper class to calculate Peak-to-average-power ratio. * Definition of PAPR: * @@ -53,6 +50,8 @@ typedef std::complex<float> complexf; */ class PAPRStats { + typedef std::complex<float> complexf; + public: PAPRStats(size_t num_blocks_to_accumulate); diff --git a/src/PhaseReference.cpp b/src/PhaseReference.cpp index 568e15e..71dec87 100644 --- a/src/PhaseReference.cpp +++ b/src/PhaseReference.cpp @@ -29,12 +29,10 @@ #include <stdexcept> -using complexf = std::complex<float>; - /* ETSI EN 300 401 Table 43 (Clause 14.3.2) * Contains h_{i,k} values */ -const uint8_t PhaseReference::d_h[4][32] = { +static const uint8_t d_h[4][32] = { /* h0 */ { 0, 2, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 2, 2, 1, 1, 0, 2, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 2, 2, 1, 1 }, /* h1 */ { 0, 3, 2, 3, 0, 1, 3, 0, 2, 1, 2, 3, 2, 3, 3, 0, @@ -54,41 +52,80 @@ const uint8_t PhaseReference::d_h[4][32] = { * Tables 44 to 47 describe the frequency interleaving done in * FrequencyInterleaver. */ -PhaseReference::PhaseReference(unsigned int dabmode) : +PhaseReference::PhaseReference(unsigned int dabmode, bool fixedPoint) : ModInput(), - d_dabmode(dabmode) + d_dabmode(dabmode), + d_fixedPoint(fixedPoint) { PDEBUG("PhaseReference::PhaseReference(%u) @ %p\n", dabmode, this); switch (d_dabmode) { case 1: d_carriers = 1536; - d_num = 2048; break; case 2: d_carriers = 384; - d_num = 512; break; case 3: d_carriers = 192; - d_num = 256; break; case 4: d_dabmode = 0; case 0: d_carriers = 768; - d_num = 1024; break; default: throw std::runtime_error( "PhaseReference::PhaseReference DAB mode not valid!"); } - d_dataIn.resize(d_carriers); - fillData(); + + if (d_fixedPoint) { + d_phaseRefFixed.fillData(d_dabmode, d_carriers); + } + else { + d_phaseRefCF32.fillData(d_dabmode, d_carriers); + } } -complexf convert(uint8_t data) { +static const int table[][48][2] = { + { // Mode 0/4 + // Positive part + { 0, 0 }, { 3, 1 }, { 2, 0 }, { 1, 2 }, { 0, 0 }, { 3, 1 }, + { 2, 2 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 3 }, { 1, 0 }, + // Negative part + { 0, 0 }, { 1, 1 }, { 2, 1 }, { 3, 2 }, { 0, 2 }, { 1, 2 }, + { 2, 0 }, { 3, 3 }, { 0, 3 }, { 1, 1 }, { 2, 3 }, { 3, 2 }, + }, + { // Mode 1 + // Positive part + { 0, 3 }, { 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 2 }, { 3, 2 }, + { 2, 1 }, { 1, 0 }, { 0, 2 }, { 3, 2 }, { 2, 3 }, { 1, 3 }, + { 0, 0 }, { 3, 2 }, { 2, 1 }, { 1, 3 }, { 0, 3 }, { 3, 3 }, + { 2, 3 }, { 1, 0 }, { 0, 3 }, { 3, 0 }, { 2, 1 }, { 1, 1 }, + // Negative part + { 0, 1 }, { 1, 2 }, { 2, 0 }, { 3, 1 }, { 0, 3 }, { 1, 2 }, + { 2, 2 }, { 3, 3 }, { 0, 2 }, { 1, 1 }, { 2, 2 }, { 3, 3 }, + { 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 3 }, { 0, 2 }, { 1, 2 }, + { 2, 2 }, { 3, 1 }, { 0, 1 }, { 1, 3 }, { 2, 1 }, { 3, 2 }, + }, + { // Mode 2 + // Positive part + { 2, 0 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 0 }, { 1, 3 }, + // Negative part + { 0, 2 }, { 1, 3 }, { 2, 2 }, { 3, 2 }, { 0, 1 }, { 1, 2 }, + }, + { // Mode 3 + // Positive part + { 3, 2 }, { 2, 2 }, { 1, 2 }, + // Negative part + { 0, 2 }, { 1, 3 }, { 2, 0 }, + }, +}; + + +template <> +complexf PhaseRefGen<complexf>::convert(uint8_t data) { const complexf value[] = { complexf(1, 0), complexf(0, 1), @@ -98,62 +135,37 @@ complexf convert(uint8_t data) { return value[data % 4]; } +template <> +complexfix PhaseRefGen<complexfix>::convert(uint8_t data) { + constexpr auto one = fixed_16{1}; + constexpr auto zero = fixed_16{0}; -void PhaseReference::fillData() -{ - const int table[][48][2] = { - { // Mode 0/4 - // Positive part - { 0, 0 }, { 3, 1 }, { 2, 0 }, { 1, 2 }, { 0, 0 }, { 3, 1 }, - { 2, 2 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 3 }, { 1, 0 }, - // Negative part - { 0, 0 }, { 1, 1 }, { 2, 1 }, { 3, 2 }, { 0, 2 }, { 1, 2 }, - { 2, 0 }, { 3, 3 }, { 0, 3 }, { 1, 1 }, { 2, 3 }, { 3, 2 }, - }, - { // Mode 1 - // Positive part - { 0, 3 }, { 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 2 }, { 3, 2 }, - { 2, 1 }, { 1, 0 }, { 0, 2 }, { 3, 2 }, { 2, 3 }, { 1, 3 }, - { 0, 0 }, { 3, 2 }, { 2, 1 }, { 1, 3 }, { 0, 3 }, { 3, 3 }, - { 2, 3 }, { 1, 0 }, { 0, 3 }, { 3, 0 }, { 2, 1 }, { 1, 1 }, - // Negative part - { 0, 1 }, { 1, 2 }, { 2, 0 }, { 3, 1 }, { 0, 3 }, { 1, 2 }, - { 2, 2 }, { 3, 3 }, { 0, 2 }, { 1, 1 }, { 2, 2 }, { 3, 3 }, - { 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 3 }, { 0, 2 }, { 1, 2 }, - { 2, 2 }, { 3, 1 }, { 0, 1 }, { 1, 3 }, { 2, 1 }, { 3, 2 }, - }, - { // Mode 2 - // Positive part - { 2, 0 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 0 }, { 1, 3 }, - // Negative part - { 0, 2 }, { 1, 3 }, { 2, 2 }, { 3, 2 }, { 0, 1 }, { 1, 2 }, - }, - { // Mode 3 - // Positive part - { 3, 2 }, { 2, 2 }, { 1, 2 }, - // Negative part - { 0, 2 }, { 1, 3 }, { 2, 0 }, - }, + const complexfix value[] = { + complexfix(one, zero), + complexfix(zero, one), + complexfix(-one, zero), + complexfix(zero, -one), }; + return value[data % 4]; +} - if (d_dabmode > 3) { - throw std::runtime_error( - "PhaseReference::fillData invalid DAB mode!"); - } - - if (d_dataIn.size() != d_carriers) { +template <typename T> +void PhaseRefGen<T>::fillData(unsigned int dabmode, size_t carriers) +{ + dataIn.resize(carriers); + if (dataIn.size() != carriers) { throw std::runtime_error( - "PhaseReference::fillData d_dataIn has incorrect size!"); + "PhaseReference::fillData dataIn has incorrect size!"); } for (size_t index = 0, offset = 0; - index < d_dataIn.size(); + index < dataIn.size(); ++offset) { for (size_t k = 0; k < 32; ++k) { - d_dataIn[index++] = convert( - d_h[ table[d_dabmode][offset][0] ][k] + - table[d_dabmode][offset][1] ); + dataIn[index++] = convert( + d_h[ table[dabmode][offset][0] ][k] + + table[dabmode][offset][1] ); } } } @@ -163,7 +175,12 @@ int PhaseReference::process(Buffer* dataOut) { PDEBUG("PhaseReference::process(dataOut: %p)\n", dataOut); - dataOut->setData(&d_dataIn[0], d_carriers * sizeof(complexf)); + if (d_fixedPoint) { + dataOut->setData(d_phaseRefFixed.dataIn.data(), d_carriers * sizeof(complexfix)); + } + else { + dataOut->setData(d_phaseRefCF32.dataIn.data(), d_carriers * sizeof(complexf)); + } return 1; } diff --git a/src/PhaseReference.h b/src/PhaseReference.h index 6ecdc4e..735009c 100644 --- a/src/PhaseReference.h +++ b/src/PhaseReference.h @@ -32,25 +32,33 @@ #include "ModPlugin.h" -#include <cstddef> -#include <complex> #include <vector> +#include <cstddef> + +template <typename T> +struct PhaseRefGen { + std::vector<T> dataIn; + void fillData(unsigned int dabmode, size_t carriers); + + private: + T convert(uint8_t data); +}; + class PhaseReference : public ModInput { public: - PhaseReference(unsigned int dabmode); + PhaseReference(unsigned int dabmode, bool fixedPoint); int process(Buffer* dataOut) override; const char* name() override { return "PhaseReference"; } protected: unsigned int d_dabmode; + bool d_fixedPoint; size_t d_carriers; - size_t d_num; - const static uint8_t d_h[4][32]; - std::vector<std::complex<float> > d_dataIn; - void fillData(); + PhaseRefGen<complexf> d_phaseRefCF32; + PhaseRefGen<complexfix> d_phaseRefFixed; }; diff --git a/src/QpskSymbolMapper.cpp b/src/QpskSymbolMapper.cpp index e26853a..c12ad80 100644 --- a/src/QpskSymbolMapper.cpp +++ b/src/QpskSymbolMapper.cpp @@ -23,7 +23,6 @@ #include <cstdio> #include <cstring> #include <stdexcept> -#include <complex> #include <cmath> #ifdef __SSE__ # include <xmmintrin.h> @@ -32,12 +31,10 @@ #include "QpskSymbolMapper.h" #include "PcDebug.h" - -typedef std::complex<float> complexf; - -QpskSymbolMapper::QpskSymbolMapper(size_t carriers) : +QpskSymbolMapper::QpskSymbolMapper(size_t carriers, bool fixedPoint) : ModCodec(), - d_carriers(carriers) { } + m_fixedPoint(fixedPoint), + m_carriers(carriers) { } int QpskSymbolMapper::process(Buffer* const dataIn, Buffer* dataOut) { @@ -45,112 +42,172 @@ int QpskSymbolMapper::process(Buffer* const dataIn, Buffer* dataOut) "(dataIn: %p, dataOut: %p)\n", dataIn, dataOut); - dataOut->setLength(dataIn->getLength() * 4 * 2 * sizeof(float)); // 4 output complex symbols per input byte -#ifdef __SSE__ - const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData()); - __m128* out = reinterpret_cast<__m128*>(dataOut->getData()); - - if (dataIn->getLength() % (d_carriers / 4) != 0) { - throw std::runtime_error( - "QpskSymbolMapper::process input size not valid: " + - std::to_string(dataIn->getLength()) + - "(input size) % (" + std::to_string(d_carriers) + - " (carriers) / 4) != 0"); - } + // 4 output complex symbols per input byte + + if (m_fixedPoint) { + dataOut->setLength(dataIn->getLength() * 4 * sizeof(complexfix)); + + using fixed_t = complexfix::value_type; - const static __m128 symbols[16] = { - _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2) - }; - size_t inOffset = 0; - size_t outOffset = 0; - uint8_t tmp = 0; - for (size_t i = 0; i < dataIn->getLength(); i += d_carriers / 4) { - for (size_t j = 0; j < d_carriers / 8; ++j) { - tmp = (in[inOffset] & 0xc0) >> 4; - tmp |= (in[inOffset + (d_carriers / 8)] & 0xc0) >> 6; - out[outOffset] = symbols[tmp]; - tmp = (in[inOffset] & 0x30) >> 2; - tmp |= (in[inOffset + (d_carriers / 8)] & 0x30) >> 4; - out[outOffset + 1] = symbols[tmp]; - tmp = (in[inOffset] & 0x0c); - tmp |= (in[inOffset + (d_carriers / 8)] & 0x0c) >> 2; - out[outOffset + 2] = symbols[tmp]; - tmp = (in[inOffset] & 0x03) << 2; - tmp |= (in[inOffset + (d_carriers / 8)] & 0x03); - out[outOffset + 3] = symbols[tmp]; - ++inOffset; - outOffset += 4; + const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData()); + fixed_t* out = reinterpret_cast<fixed_t*>(dataOut->getData()); + + if (dataIn->getLength() % (m_carriers / 4) != 0) { + throw std::runtime_error( + "QpskSymbolMapper::process input size not valid!"); + } + + constexpr fixed_t v = static_cast<fixed_t>(M_SQRT1_2); + + const static fixed_t symbols[16][4] = { + { v, v, v, v}, + { v, v, v, -v}, + { v, -v, v, v}, + { v, -v, v, -v}, + { v, v, -v, v}, + { v, v, -v, -v}, + { v, -v, -v, v}, + { v, -v, -v, -v}, + {-v, v, v, v}, + {-v, v, v, -v}, + {-v, -v, v, v}, + {-v, -v, v, -v}, + {-v, v, -v, v}, + {-v, v, -v, -v}, + {-v, -v, -v, v}, + {-v, -v, -v, -v} + }; + size_t inOffset = 0; + size_t outOffset = 0; + uint8_t tmp; + for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) { + for (size_t j = 0; j < m_carriers / 8; ++j) { + tmp = (in[inOffset] & 0xc0) >> 4; + tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6; + memcpy(&out[outOffset], symbols[tmp], sizeof(fixed_t) * 4); + tmp = (in[inOffset] & 0x30) >> 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4; + memcpy(&out[outOffset + 4], symbols[tmp], sizeof(fixed_t) * 4); + tmp = (in[inOffset] & 0x0c); + tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2; + memcpy(&out[outOffset + 8], symbols[tmp], sizeof(fixed_t) * 4); + tmp = (in[inOffset] & 0x03) << 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x03); + memcpy(&out[outOffset + 12], symbols[tmp], sizeof(fixed_t) * 4); + ++inOffset; + outOffset += 4*4; + } + inOffset += m_carriers / 8; } - inOffset += d_carriers / 8; } + else { + dataOut->setLength(dataIn->getLength() * 4 * sizeof(complexf)); +#ifdef __SSE__ + const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData()); + __m128* out = reinterpret_cast<__m128*>(dataOut->getData()); + + if (dataIn->getLength() % (m_carriers / 4) != 0) { + throw std::runtime_error( + "QpskSymbolMapper::process input size not valid: " + + std::to_string(dataIn->getLength()) + + "(input size) % (" + std::to_string(m_carriers) + + " (carriers) / 4) != 0"); + } + + const static __m128 symbols[16] = { + _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2) + }; + size_t inOffset = 0; + size_t outOffset = 0; + uint8_t tmp = 0; + for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) { + for (size_t j = 0; j < m_carriers / 8; ++j) { + tmp = (in[inOffset] & 0xc0) >> 4; + tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6; + out[outOffset] = symbols[tmp]; + tmp = (in[inOffset] & 0x30) >> 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4; + out[outOffset + 1] = symbols[tmp]; + tmp = (in[inOffset] & 0x0c); + tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2; + out[outOffset + 2] = symbols[tmp]; + tmp = (in[inOffset] & 0x03) << 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x03); + out[outOffset + 3] = symbols[tmp]; + ++inOffset; + outOffset += 4; + } + inOffset += m_carriers / 8; + } #else // !__SSE__ - const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData()); - float* out = reinterpret_cast<float*>(dataOut->getData()); - if (dataIn->getLength() % (d_carriers / 4) != 0) { - throw std::runtime_error( - "QpskSymbolMapper::process input size not valid!"); - } - if (dataOut->getLength() / sizeof(float) != dataIn->getLength() * 4 * 2) { // 4 output complex symbols per input byte - throw std::runtime_error( - "QpskSymbolMapper::process output size not valid!"); - } + const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData()); + float* out = reinterpret_cast<float*>(dataOut->getData()); + if (dataIn->getLength() % (m_carriers / 4) != 0) { + throw std::runtime_error( + "QpskSymbolMapper::process input size not valid!"); + } + if (dataOut->getLength() / sizeof(float) != dataIn->getLength() * 4 * 2) { // 4 output complex symbols per input byte + throw std::runtime_error( + "QpskSymbolMapper::process output size not valid!"); + } - const static float symbols[16][4] = { - { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, - { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, - { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, - { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, - { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, - { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, - { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, - { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, - {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, - {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, - {-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, - {-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, - {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, - {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, - {-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, - {-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2} - }; - size_t inOffset = 0; - size_t outOffset = 0; - uint8_t tmp; - for (size_t i = 0; i < dataIn->getLength(); i += d_carriers / 4) { - for (size_t j = 0; j < d_carriers / 8; ++j) { - tmp = (in[inOffset] & 0xc0) >> 4; - tmp |= (in[inOffset + (d_carriers / 8)] & 0xc0) >> 6; - memcpy(&out[outOffset], symbols[tmp], sizeof(float) * 4); - tmp = (in[inOffset] & 0x30) >> 2; - tmp |= (in[inOffset + (d_carriers / 8)] & 0x30) >> 4; - memcpy(&out[outOffset + 4], symbols[tmp], sizeof(float) * 4); - tmp = (in[inOffset] & 0x0c); - tmp |= (in[inOffset + (d_carriers / 8)] & 0x0c) >> 2; - memcpy(&out[outOffset + 8], symbols[tmp], sizeof(float) * 4); - tmp = (in[inOffset] & 0x03) << 2; - tmp |= (in[inOffset + (d_carriers / 8)] & 0x03); - memcpy(&out[outOffset + 12], symbols[tmp], sizeof(float) * 4); - ++inOffset; - outOffset += 4*4; + const static float symbols[16][4] = { + { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, + { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, + { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, + { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, + { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, + { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, + { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, + { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, + {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, + {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, + {-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, + {-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, + {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, + {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, + {-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, + {-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2} + }; + size_t inOffset = 0; + size_t outOffset = 0; + uint8_t tmp; + for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) { + for (size_t j = 0; j < m_carriers / 8; ++j) { + tmp = (in[inOffset] & 0xc0) >> 4; + tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6; + memcpy(&out[outOffset], symbols[tmp], sizeof(float) * 4); + tmp = (in[inOffset] & 0x30) >> 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4; + memcpy(&out[outOffset + 4], symbols[tmp], sizeof(float) * 4); + tmp = (in[inOffset] & 0x0c); + tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2; + memcpy(&out[outOffset + 8], symbols[tmp], sizeof(float) * 4); + tmp = (in[inOffset] & 0x03) << 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x03); + memcpy(&out[outOffset + 12], symbols[tmp], sizeof(float) * 4); + ++inOffset; + outOffset += 4*4; + } + inOffset += m_carriers / 8; } - inOffset += d_carriers / 8; - } #endif // __SSE__ + } return 1; } diff --git a/src/QpskSymbolMapper.h b/src/QpskSymbolMapper.h index dbcf4dd..6cf7a2e 100644 --- a/src/QpskSymbolMapper.h +++ b/src/QpskSymbolMapper.h @@ -31,12 +31,13 @@ class QpskSymbolMapper : public ModCodec { public: - QpskSymbolMapper(size_t carriers); + QpskSymbolMapper(size_t carriers, bool fixedPoint); int process(Buffer* const dataIn, Buffer* dataOut); const char* name() { return "QpskSymbolMapper"; } protected: - size_t d_carriers; + bool m_fixedPoint; + size_t m_carriers; }; diff --git a/src/Resampler.h b/src/Resampler.h index d1a9f7a..2c810f6 100644 --- a/src/Resampler.h +++ b/src/Resampler.h @@ -37,9 +37,6 @@ #define FFT_TYPE fftwf_complex #define FFT_PLAN fftwf_plan -#include <complex> -typedef std::complex<float> complexf; - class Resampler : public ModCodec { diff --git a/src/SignalMultiplexer.cpp b/src/SignalMultiplexer.cpp index 1d95bdd..d4955d0 100644 --- a/src/SignalMultiplexer.cpp +++ b/src/SignalMultiplexer.cpp @@ -22,25 +22,20 @@ #include "SignalMultiplexer.h" #include "PcDebug.h" -#include <stdio.h> -#include <stdexcept> +#include <cstdio> #include <assert.h> -#include <string.h> -SignalMultiplexer::SignalMultiplexer(size_t framesize) : - ModMux(), - d_frameSize(framesize) +SignalMultiplexer::SignalMultiplexer() : + ModMux() { - PDEBUG("SignalMultiplexer::SignalMultiplexer(%zu) @ %p\n", framesize, this); - + PDEBUG("SignalMultiplexer::SignalMultiplexer() @ %p\n", this); } SignalMultiplexer::~SignalMultiplexer() { PDEBUG("SignalMultiplexer::~SignalMultiplexer() @ %p\n", this); - } diff --git a/src/SignalMultiplexer.h b/src/SignalMultiplexer.h index 5186a8d..1f6bc12 100644 --- a/src/SignalMultiplexer.h +++ b/src/SignalMultiplexer.h @@ -36,7 +36,7 @@ class SignalMultiplexer : public ModMux { public: - SignalMultiplexer(size_t frameSize); + SignalMultiplexer(); virtual ~SignalMultiplexer(); SignalMultiplexer(const SignalMultiplexer&); SignalMultiplexer& operator=(const SignalMultiplexer&); @@ -44,8 +44,5 @@ public: int process(std::vector<Buffer*> dataIn, Buffer* dataOut); const char* name() { return "SignalMultiplexer"; } - -protected: - size_t d_frameSize; }; diff --git a/src/TII.cpp b/src/TII.cpp index 2656cbf..bce15aa 100644 --- a/src/TII.cpp +++ b/src/TII.cpp @@ -2,7 +2,7 @@ Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty the Queen in Right of Canada (Communications Research Center Canada) - Copyright (C) 2023 + Copyright (C) 2024 Matthias P. Braendli, matthias.braendli@mpb.li http://opendigitalradio.org @@ -27,11 +27,8 @@ #include "TII.h" #include "PcDebug.h" -#include <stdio.h> -#include <stdexcept> -#include <string.h> - -typedef std::complex<float> complexf; +#include <cstdio> +#include <cstring> /* TII pattern for TM I, II, IV */ const int pattern_tm1_2_4[][8] = { // {{{ @@ -106,11 +103,12 @@ const int pattern_tm1_2_4[][8] = { // {{{ {1,1,1,0,1,0,0,0}, {1,1,1,1,0,0,0,0} }; // }}} -TII::TII(unsigned int dabmode, tii_config_t& tii_config) : +TII::TII(unsigned int dabmode, tii_config_t& tii_config, bool fixedPoint) : ModCodec(), RemoteControllable("tii"), m_dabmode(dabmode), - m_conf(tii_config) + m_conf(tii_config), + m_fixedPoint(fixedPoint) { PDEBUG("TII::TII(%u) @ %p\n", dabmode, this); @@ -171,56 +169,72 @@ const char* TII::name() return m_name.c_str(); } +template<typename T> +void do_process(size_t carriers, bool old_variant, const std::vector<bool>& Acp, Buffer* dataIn, Buffer* dataOut) +{ + const T* in = reinterpret_cast<const T*>(dataIn->getData()); + T* out = reinterpret_cast<T*>(dataOut->getData()); + + /* Normalise the TII carrier power according to ETSI TR 101 496-3 + * Clause 5.4.2.2 Paragraph 7: + * + * > The ratio of carriers in a TII symbol to a normal DAB symbol + * > is 1:48 for all Modes, so that the signal power in a TII symbol is + * > 16 dB below the signal power of the other symbols. + * + * This is because we only enable 32 out of 1536 carriers, not because + * every carrier is lower power. + */ + for (size_t i = 0; i < Acp.size(); i++) { + /* See header file for an explanation of the old variant. + * + * A_{c,p}(k) and A_{c,p}(k-1) are never both simultaneously true, + * so instead of doing the sum inside z_{m,0,k}, we could do + * + * if (m_Acp[i]) out[i] = in[i]; + * if (m_Acp[i-1]) out[i] = in[i-1] + * + * (Considering only the new variant) + * + * To avoid messing with indices, we substitute j = i-1 + * + * if (m_Acp[i]) out[i] = in[i]; + * if (m_Acp[j]) out[j+1] = in[j] + * + * and fuse the two conditionals together: + */ + if (Acp[i]) { + out[i] = in[i]; + out[i+1] = (old_variant ? in[i+1] : in[i]); + } + } +} int TII::process(Buffer* dataIn, Buffer* dataOut) { + const size_t sizeof_samples = m_fixedPoint ? sizeof(complexfix) : sizeof(complexf); + PDEBUG("TII::process(dataOut: %p)\n", dataOut); if ( (dataIn == NULL) or - (dataIn->getLength() != m_carriers * sizeof(complexf))) { + (dataIn->getLength() != m_carriers * sizeof_samples)) { throw TIIError("TII::process input size not valid!"); } - dataOut->setLength(m_carriers * sizeof(complexf)); - memset(dataOut->getData(), 0, dataOut->getLength()); + dataOut->setLength(m_carriers * sizeof_samples); + memset(dataOut->getData(), 0, dataOut->getLength()); if (m_conf.enable and m_insert) { std::lock_guard<std::mutex> lock(m_enabled_carriers_mutex); - complexf* in = reinterpret_cast<complexf*>(dataIn->getData()); - complexf* out = reinterpret_cast<complexf*>(dataOut->getData()); - - /* Normalise the TII carrier power according to ETSI TR 101 496-3 - * Clause 5.4.2.2 Paragraph 7: - * - * > The ratio of carriers in a TII symbol to a normal DAB symbol - * > is 1:48 for all Modes, so that the signal power in a TII symbol is - * > 16 dB below the signal power of the other symbols. - * - * This is because we only enable 32 out of 1536 carriers, not because - * every carrier is lower power. - */ - for (size_t i = 0; i < m_Acp.size(); i++) { - /* See header file for an explanation of the old variant. - * - * A_{c,p}(k) and A_{c,p}(k-1) are never both simultaneously true, - * so instead of doing the sum inside z_{m,0,k}, we could do - * - * if (m_Acp[i]) out[i] = in[i]; - * if (m_Acp[i-1]) out[i] = in[i-1] - * - * (Considering only the new variant) - * - * To avoid messing with indices, we substitute j = i-1 - * - * if (m_Acp[i]) out[i] = in[i]; - * if (m_Acp[j]) out[j+1] = in[j] - * - * and fuse the two conditionals together: - */ - if (m_Acp[i]) { - out[i] = in[i]; - out[i+1] = (m_conf.old_variant ? in[i+1] : in[i]); - } + if (m_fixedPoint) { + do_process<complexfix>( + m_carriers, m_conf.old_variant, m_Acp, + dataIn, dataOut); + } + else { + do_process<complexf>( + m_carriers, m_conf.old_variant, m_Acp, + dataIn, dataOut); } } @@ -2,7 +2,7 @@ Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty the Queen in Right of Canada (Communications Research Center Canada) - Copyright (C) 2023 + Copyright (C) 2024 Matthias P. Braendli, matthias.braendli@mpb.li http://opendigitalradio.org @@ -36,8 +36,6 @@ #include "RemoteControl.h" #include <cstddef> -#include <thread> -#include <complex> #include <vector> #include <string> @@ -81,7 +79,7 @@ class TIIError : public std::runtime_error { class TII : public ModCodec, public RemoteControllable { public: - TII(unsigned int dabmode, tii_config_t& tii_config); + TII(unsigned int dabmode, tii_config_t& tii_config, bool fixedPoint); virtual ~TII() {} int process(Buffer* dataIn, Buffer* dataOut) override; @@ -106,6 +104,8 @@ class TII : public ModCodec, public RemoteControllable // Remote-controllable settings tii_config_t& m_conf; + bool m_fixedPoint = false; + // Internal flag when to insert TII bool m_insert = true; diff --git a/src/Utils.cpp b/src/Utils.cpp index fa2fd5d..f947acd 100644 --- a/src/Utils.cpp +++ b/src/Utils.cpp @@ -66,6 +66,9 @@ static void printHeader() #if defined(__SSE__) "SSE " << #endif +#if defined(__ARM_NEON) + "NEON " << +#endif "\n"; } diff --git a/src/output/Dexter.h b/src/output/Dexter.h index d4f425f..f8a17ba 100644 --- a/src/output/Dexter.h +++ b/src/output/Dexter.h @@ -98,16 +98,16 @@ class Dexter : public Output::SDRDevice SDRDeviceConfig& m_conf; - struct iio_context* m_ctx = nullptr; - struct iio_device* m_dexter_dsp_tx = nullptr; + struct iio_context *m_ctx = nullptr; + struct iio_device *m_dexter_dsp_tx = nullptr; - struct iio_device* m_ad9957 = nullptr; - struct iio_device* m_ad9957_tx0 = nullptr; - struct iio_channel* m_tx_channel = nullptr; + struct iio_device *m_ad9957 = nullptr; + struct iio_device *m_ad9957_tx0 = nullptr; + struct iio_channel *m_tx_channel = nullptr; struct iio_buffer *m_buffer = nullptr; /* Underflows are counted in a separate thread */ - struct iio_context* m_underflow_ctx = nullptr; + struct iio_context *m_underflow_ctx = nullptr; std::atomic<bool> m_running = ATOMIC_VAR_INIT(false); std::thread m_underflow_read_thread; void underflow_read_process(); diff --git a/src/output/SDR.cpp b/src/output/SDR.cpp index 594171f..22398c7 100644 --- a/src/output/SDR.cpp +++ b/src/output/SDR.cpp @@ -34,6 +34,7 @@ #include "RemoteControl.h" #include "Utils.h" +#include <chrono> #include <cmath> #include <iostream> #include <assert.h> diff --git a/src/output/SDR.h b/src/output/SDR.h index 960de0c..86bf295 100644 --- a/src/output/SDR.h +++ b/src/output/SDR.h @@ -34,16 +34,12 @@ DESCRIPTION: # include <config.h> #endif -#include <chrono> #include "ModPlugin.h" -#include "EtiReader.h" #include "output/SDRDevice.h" #include "output/Feedback.h" namespace Output { -using complexf = std::complex<float>; - class SDR : public ModOutput, public ModMetadata, public RemoteControllable { public: SDR(SDRDeviceConfig& config, std::shared_ptr<SDRDevice> device); diff --git a/src/output/SDRDevice.h b/src/output/SDRDevice.h index 378829c..ec9373d 100644 --- a/src/output/SDRDevice.h +++ b/src/output/SDRDevice.h @@ -38,9 +38,7 @@ DESCRIPTION: #include <string> #include <vector> #include <complex> -#include <variant> #include <optional> -#include <unordered_map> #include "TimestampDecoder.h" @@ -59,6 +57,8 @@ struct SDRDeviceConfig { std::string tx_antenna; std::string rx_antenna; + bool fixedPoint = false; + long masterClockRate = 32768000; unsigned sampleRate = 2048000; double frequency = 0.0; diff --git a/src/output/UHD.cpp b/src/output/UHD.cpp index e097692..b30f9e1 100644 --- a/src/output/UHD.cpp +++ b/src/output/UHD.cpp @@ -31,10 +31,7 @@ //#define MDEBUG(fmt, args...) fprintf(LOG, fmt , ## args) #define MDEBUG(fmt, args...) -#include "PcDebug.h" #include "Log.h" -#include "RemoteControl.h" -#include "Utils.h" #include <thread> #include <iomanip> @@ -52,14 +49,12 @@ # include <uhd/utils/thread_priority.hpp> #endif - -#include <cmath> #include <iostream> -#include <assert.h> +#include <cmath> +#include <cassert> #include <stdexcept> -#include <stdio.h> +#include <cstdio> #include <time.h> -#include <errno.h> #include <unistd.h> #include <pthread.h> @@ -235,7 +230,8 @@ UHD::UHD(SDRDeviceConfig& config) : m_usrp->set_rx_gain(m_conf.rxgain); etiLog.log(debug, "OutputUHD:Actual RX Gain: %f", m_usrp->get_rx_gain()); - const uhd::stream_args_t stream_args("fc32"); //complex floats + const uhd::stream_args_t stream_args( + m_conf.fixedPoint ? "sc16" : "fc32"); m_rx_stream = m_usrp->get_rx_stream(stream_args); m_tx_stream = m_usrp->get_tx_stream(stream_args); @@ -319,8 +315,9 @@ double UHD::get_bandwidth(void) const void UHD::transmit_frame(struct FrameData&& frame) { const double tx_timeout = 20.0; - const size_t sizeIn = frame.buf.size() / sizeof(complexf); - const complexf* in_data = reinterpret_cast<const complexf*>(&frame.buf[0]); + + const size_t sample_size = m_conf.fixedPoint ? (2 * sizeof(int16_t)) : sizeof(complexf); + const size_t sizeIn = frame.buf.size() / sample_size; uhd::tx_metadata_t md_tx; @@ -353,9 +350,9 @@ void UHD::transmit_frame(struct FrameData&& frame) samps_to_send <= usrp_max_num_samps ); m_require_timestamp_refresh = false; - //send a single packet + // send a single packet size_t num_tx_samps = m_tx_stream->send( - &in_data[num_acc_samps], + frame.buf.data() + sample_size * num_acc_samps, samps_to_send, md_tx, tx_timeout); etiLog.log(trace, "UHD,sent %zu of %zu", num_tx_samps, samps_to_send); diff --git a/src/output/UHD.h b/src/output/UHD.h index 9891c7a..c4f1a45 100644 --- a/src/output/UHD.h +++ b/src/output/UHD.h @@ -45,12 +45,9 @@ DESCRIPTION: #include <atomic> #include <thread> -#include "Log.h" #include "output/SDR.h" #include "output/USRPTime.h" #include "TimestampDecoder.h" -#include "RemoteControl.h" -#include "ThreadsafeQueue.h" #include <stdio.h> #include <sys/types.h> |