45 files changed, 1368 insertions, 731 deletions
diff --git a/src/Buffer.h b/src/Buffer.h
index af52e93..2c2a65e 100644
--- a/src/Buffer.h
+++ b/src/Buffer.h
@@ -33,9 +33,17 @@
 
 #include <vector>
 #include <memory>
+#include <complex>
+#include "fpm/fixed.hpp"
+
+typedef std::complex<float> complexf;
+
+using fixed_16 = fpm::fixed<std::int16_t, std::int32_t, 14>;
+typedef std::complex<fixed_16> complexfix;
+typedef std::complex<fpm::fixed_16_16> complexfix_wide;
 
 /* Buffer is a container for a byte array, which is memory-aligned
- * to 32 bytes for SSE performance.
+ * to 32 bytes for SIMD performance.
  *
  * The allocation/freeing of the data is handled internally.
  */
diff --git a/src/CicEqualizer.h b/src/CicEqualizer.h
index 792da02..4510d0c 100644
--- a/src/CicEqualizer.h
+++ b/src/CicEqualizer.h
@@ -25,18 +25,10 @@
 #   include <config.h>
 #endif
 
-
 #include "ModPlugin.h"
 
 #include <vector>
 #include <sys/types.h>
-#include <complex>
-#ifdef __SSE__
-#   include <xmmintrin.h>
-#endif
-
-
-typedef std::complex<float> complexf;
 
 class CicEqualizer : public ModCodec
 {
diff --git a/src/ConfigParser.cpp b/src/ConfigParser.cpp
index fb2c1a1..c92a520 100644
--- a/src/ConfigParser.cpp
+++ b/src/ConfigParser.cpp
@@ -63,6 +63,27 @@ static GainMode parse_gainmode(const std::string &gainMode_setting)
     throw std::runtime_error("Configuration error");
 }
 
+static FFTEngine parse_fft_engine(const std::string &fft_engine_setting)
+{
+    string fft_engine_minuscule(fft_engine_setting);
+    std::transform(fft_engine_minuscule.begin(), fft_engine_minuscule.end(),
+            fft_engine_minuscule.begin(), ::tolower);
+
+    if (fft_engine_minuscule == "fftw") {
+        return FFTEngine::FFTW;
+    }
+    else if (fft_engine_minuscule == "kiss") {
+        return FFTEngine::KISS;
+    }
+    else if (fft_engine_minuscule == "dexter") {
+        return FFTEngine::DEXTER;
+    }
+
+    cerr << "Modulator fft_engine setting '" << fft_engine_setting <<
+        "' not recognised." << endl;
+    throw std::runtime_error("Configuration error");
+}
+
 static void parse_configfile(
         const std::string& configuration_file,
         mod_settings_t& mod_settings)
@@ -156,6 +177,9 @@ static void parse_configfile(
             mod_settings.showProcessTime);
 
     // modulator parameters:
+    const string fft_engine_setting = pt.Get("modulator.fft_engine", "fftw");
+    mod_settings.fftEngine = parse_fft_engine(fft_engine_setting);
+
     const string gainMode_setting = pt.Get("modulator.gainmode", "var");
     mod_settings.gainMode = parse_gainmode(gainMode_setting);
     mod_settings.gainmodeVariance = pt.GetReal("modulator.normalise_variance",
diff --git a/src/ConfigParser.h b/src/ConfigParser.h
index ae76dee..3bacfdd 100644
--- a/src/ConfigParser.h
+++ b/src/ConfigParser.h
@@ -36,6 +36,12 @@
 #include "TII.h"
 #include "output/SDRDevice.h"
 
+enum class FFTEngine {
+    FFTW, // floating point in software
+    KISS, // fixed-point in software
+    DEXTER // fixed-point in FPGA
+};
+
 struct mod_settings_t {
     std::string startupCheck;
 
@@ -51,6 +57,8 @@ struct mod_settings_t {
     bool useLimeOutput = false;
     bool useBladeRFOutput = false;
 
+    FFTEngine fftEngine = FFTEngine::FFTW;
+
     size_t outputRate = 2048000;
     size_t clockRate = 0;
     unsigned dabMode = 1;
diff --git a/src/DabMod.cpp b/src/DabMod.cpp
index 3b072c1..7866818 100644
--- a/src/DabMod.cpp
+++ b/src/DabMod.cpp
@@ -31,10 +31,8 @@
 #endif
 
 #include <memory>
-#include <complex>
 #include <string>
 #include <iostream>
-#include <iomanip>
 #include <cstdlib>
 #include <stdexcept>
 #include <cstdio>
@@ -51,7 +49,6 @@
 #include "Utils.h"
 #include "Log.h"
 #include "DabModulator.h"
-#include "InputMemory.h"
 #include "OutputFile.h"
 #include "FormatConverter.h"
 #include "FrameMultiplexer.h"
@@ -75,16 +72,16 @@
  * samples can have peaks up to about 48000. The value of 50000
  * should guarantee that with a digital gain of 1.0, UHD never clips
  * our samples.
+ *
+ * This only applies when fixed_point == false.
  */
 static const float normalise_factor = 50000.0f;
 
-//Empirical normalisation factors used to normalise the samples to amplitude 1.
+// Empirical normalisation factors used to normalise the samples to amplitude 1.
 static const float normalise_factor_file_fix = 81000.0f;
 static const float normalise_factor_file_var = 46000.0f;
 static const float normalise_factor_file_max = 46000.0f;
 
-typedef std::complex<float> complexf;
-
 using namespace std;
 
 volatile sig_atomic_t running = 1;
@@ -255,7 +252,11 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s)
     shared_ptr<ModOutput> output;
 
     if (s.useFileOutput) {
-        if (s.fileOutputFormat == "complexf") {
+        if (s.fftEngine != FFTEngine::FFTW) {
+            // Intentionally ignore fileOutputFormat, it is always sc16
+            output = make_shared<OutputFile>(s.outputName, s.fileOutputShowMetadata);
+        }
+        else if (s.fileOutputFormat == "complexf") {
             output = make_shared<OutputFile>(s.outputName, s.fileOutputShowMetadata);
         }
         else if (s.fileOutputFormat == "complexf_normalised") {
@@ -291,6 +292,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s)
     else if (s.useUHDOutput) {
         s.normalise = 1.0f / normalise_factor;
         s.sdr_device_config.sampleRate = s.outputRate;
+        s.sdr_device_config.fixedPoint = (s.fftEngine != FFTEngine::FFTW);
         auto uhddevice = make_shared<Output::UHD>(s.sdr_device_config);
         output = make_shared<Output::SDR>(s.sdr_device_config, uhddevice);
         rcs.enrol((Output::SDR*)output.get());
@@ -301,6 +303,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s)
         /* We normalise the same way as for the UHD output */
         s.normalise = 1.0f / normalise_factor;
         s.sdr_device_config.sampleRate = s.outputRate;
+        if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("soapy fixed_point unsupported");
         auto soapydevice = make_shared<Output::Soapy>(s.sdr_device_config);
         output = make_shared<Output::SDR>(s.sdr_device_config, soapydevice);
         rcs.enrol((Output::SDR*)output.get());
@@ -320,6 +323,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s)
     else if (s.useLimeOutput) {
         /* We normalise the same way as for the UHD output */
         s.normalise = 1.0f / normalise_factor;
+        if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("limesdr fixed_point unsupported");
         s.sdr_device_config.sampleRate = s.outputRate;
         auto limedevice = make_shared<Output::Lime>(s.sdr_device_config);
         output = make_shared<Output::SDR>(s.sdr_device_config, limedevice);
@@ -330,6 +334,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s)
     else if (s.useBladeRFOutput) {
         /* We normalise specifically for the BladeRF output : range [-2048; 2047] */
         s.normalise = 2047.0f / normalise_factor;
+        if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("bladerf fixed_point unsupported");
         s.sdr_device_config.sampleRate = s.outputRate;
         auto bladerfdevice = make_shared<Output::BladeRF>(s.sdr_device_config);
         output = make_shared<Output::SDR>(s.sdr_device_config, bladerfdevice);
@@ -420,7 +425,8 @@ int launch_modulator(int argc, char* argv[])
     ModulatorData m;
     rcs.enrol(&m);
 
-    {
+    // Neither KISS FFT used for fixedpoint nor the FFT Accelerator used for DEXTER need planning.
+    if (mod_settings.fftEngine == FFTEngine::FFTW) {
         // This is mostly useful on ARM systems where FFTW planning takes some time. If we do it here
         // it will be done before the modulator starts up
         etiLog.level(debug) << "Running FFTW planning...";
@@ -442,7 +448,14 @@ int launch_modulator(int argc, char* argv[])
     }
 
     std::string output_format;
-    if (mod_settings.useFileOutput and
+    if (mod_settings.fftEngine == FFTEngine::KISS) {
+        output_format = ""; //fixed point is native sc16, no converter needed
+    }
+    else if (mod_settings.fftEngine == FFTEngine::DEXTER) {
+        output_format = "s16"; // FPGA FFT Engine outputs s32
+    }
+    // else FFTW, i.e. floating point
+    else if (mod_settings.useFileOutput and
             (mod_settings.fileOutputFormat == "s8" or
              mod_settings.fileOutputFormat == "u8" or
              mod_settings.fileOutputFormat == "s16")) {
diff --git a/src/DabModulator.cpp b/src/DabModulator.cpp
index 4a29132..5f7aaf6 100644
--- a/src/DabModulator.cpp
+++ b/src/DabModulator.cpp
@@ -3,7 +3,7 @@
    Her Majesty the Queen in Right of Canada (Communications Research
    Center Canada)
 
-   Copyright (C) 2023
+   Copyright (C) 2024
    Matthias P. Braendli, matthias.braendli@mpb.li
 
     http://opendigitalradio.org
@@ -54,7 +54,6 @@
 #include "SignalMultiplexer.h"
 #include "TII.h"
 #include "TimeInterleaver.h"
-#include "TimestampDecoder.h"
 
 using namespace std;
 
@@ -142,14 +141,15 @@ int DabModulator::process(Buffer* dataOut)
         auto cifMux = make_shared<FrameMultiplexer>(m_etiSource);
         auto cifPart = make_shared<BlockPartitioner>(mode);
 
-        auto cifMap = make_shared<QpskSymbolMapper>(m_nbCarriers);
-        auto cifRef = make_shared<PhaseReference>(mode);
-        auto cifFreq = make_shared<FrequencyInterleaver>(mode);
-        auto cifDiff = make_shared<DifferentialModulator>(m_nbCarriers);
+        const bool fixedPoint = m_settings.fftEngine != FFTEngine::FFTW;
+        auto cifMap = make_shared<QpskSymbolMapper>(m_nbCarriers, fixedPoint);
+        auto cifRef = make_shared<PhaseReference>(mode, fixedPoint);
+        auto cifFreq = make_shared<FrequencyInterleaver>(mode, fixedPoint);
+        auto cifDiff = make_shared<DifferentialModulator>(m_nbCarriers, fixedPoint);
 
-        auto cifNull = make_shared<NullSymbol>(m_nbCarriers);
-        auto cifSig = make_shared<SignalMultiplexer>(
-                (1 + m_nbSymbols) * m_nbCarriers * sizeof(complexf));
+        auto cifNull = make_shared<NullSymbol>(m_nbCarriers,
+                fixedPoint ? sizeof(complexfix) : sizeof(complexf));
+        auto cifSig = make_shared<SignalMultiplexer>();
 
         // TODO this needs a review
         bool useCicEq = false;
@@ -180,46 +180,79 @@ int DabModulator::process(Buffer* dataOut)
         try {
             tii = make_shared<TII>(
                     m_settings.dabMode,
-                    m_settings.tiiConfig);
+                    m_settings.tiiConfig,
+                    fixedPoint);
             rcs.enrol(tii.get());
-            tiiRef = make_shared<PhaseReference>(mode);
+            tiiRef = make_shared<PhaseReference>(mode, fixedPoint);
         }
         catch (const TIIError& e) {
             etiLog.level(error) << "Could not initialise TII: " << e.what();
         }
 
-        auto cifOfdm = make_shared<OfdmGenerator>(
-                (1 + m_nbSymbols),
-                m_nbCarriers,
-                m_spacing,
-                m_settings.enableCfr,
-                m_settings.cfrClip,
-                m_settings.cfrErrorClip);
+        shared_ptr<ModPlugin> cifOfdm;
+
+        switch (m_settings.fftEngine) {
+            case FFTEngine::FFTW:
+                {
+                    auto ofdm = make_shared<OfdmGeneratorCF32>(
+                            (1 + m_nbSymbols),
+                            m_nbCarriers,
+                            m_spacing,
+                            m_settings.enableCfr,
+                            m_settings.cfrClip,
+                            m_settings.cfrErrorClip);
+                    rcs.enrol(ofdm.get());
+                    cifOfdm = ofdm;
+                }
+                break;
+            case FFTEngine::KISS:
+                cifOfdm = make_shared<OfdmGeneratorFixed>(
+                        (1 + m_nbSymbols),
+                        m_nbCarriers,
+                        m_spacing);
+                break;
+            case FFTEngine::DEXTER:
+#if defined(HAVE_DEXTER)
+                cifOfdm = make_shared<OfdmGeneratorDEXTER>(
+                        (1 + m_nbSymbols),
+                        m_nbCarriers,
+                        m_spacing);
+#else
+                throw std::runtime_error("Cannot use DEXTER fft engine without --enable-dexter");
+#endif
+                break;
+        }
 
-        rcs.enrol(cifOfdm.get());
+        shared_ptr<GainControl> cifGain;
 
-        auto cifGain = make_shared<GainControl>(
-                m_spacing,
-                m_settings.gainMode,
-                m_settings.digitalgain,
-                m_settings.normalise,
-                m_settings.gainmodeVariance);
+        if (not fixedPoint) {
+            cifGain = make_shared<GainControl>(
+                    m_spacing,
+                    m_settings.gainMode,
+                    m_settings.digitalgain,
+                    m_settings.normalise,
+                    m_settings.gainmodeVariance);
 
-        rcs.enrol(cifGain.get());
+            rcs.enrol(cifGain.get());
+        }
 
         auto cifGuard = make_shared<GuardIntervalInserter>(
                 m_nbSymbols, m_spacing, m_nullSize, m_symSize,
-                m_settings.ofdmWindowOverlap);
+                m_settings.ofdmWindowOverlap, m_settings.fftEngine);
         rcs.enrol(cifGuard.get());
 
         shared_ptr<FIRFilter> cifFilter;
         if (not m_settings.filterTapsFilename.empty()) {
+            if (fixedPoint) throw std::runtime_error("fixed point doesn't support fir filter");
+
             cifFilter = make_shared<FIRFilter>(m_settings.filterTapsFilename);
             rcs.enrol(cifFilter.get());
         }
 
         shared_ptr<MemlessPoly> cifPoly;
         if (not m_settings.polyCoefFilename.empty()) {
+            if (fixedPoint) throw std::runtime_error("fixed point doesn't support predistortion");
+
             cifPoly = make_shared<MemlessPoly>(m_settings.polyCoefFilename,
                                                m_settings.polyNumThreads);
             rcs.enrol(cifPoly.get());
@@ -227,15 +260,21 @@ int DabModulator::process(Buffer* dataOut)
 
         shared_ptr<Resampler> cifRes;
         if (m_settings.outputRate != 2048000) {
+            if (fixedPoint) throw std::runtime_error("fixed point doesn't support resampler");
+
             cifRes = make_shared<Resampler>(
                     2048000,
                     m_settings.outputRate,
                     m_spacing);
         }
 
-        if (not m_format.empty()) {
-            m_formatConverter = make_shared<FormatConverter>(m_format);
+        if (m_settings.fftEngine == FFTEngine::FFTW and not m_format.empty()) {
+            m_formatConverter = make_shared<FormatConverter>(false, m_format);
+        }
+        else if (m_settings.fftEngine == FFTEngine::DEXTER) {
+            m_formatConverter = make_shared<FormatConverter>(true, m_format);
         }
+        // KISS is already in s16
 
         m_output = make_shared<OutputMemory>(dataOut);
 
diff --git a/src/DabModulator.h b/src/DabModulator.h
index 093a782..82782cd 100644
--- a/src/DabModulator.h
+++ b/src/DabModulator.h
@@ -40,12 +40,8 @@
 #include "EtiReader.h"
 #include "Flowgraph.h"
 #include "FormatConverter.h"
-#include "GainControl.h"
 #include "OutputMemory.h"
 #include "RemoteControl.h"
-#include "Log.h"
-#include "TII.h"
-
 
 class DabModulator : public ModInput, public ModMetadata, public RemoteControllable
 {
diff --git a/src/DifferentialModulator.cpp b/src/DifferentialModulator.cpp
index 97a7998..21b4c3e 100644
--- a/src/DifferentialModulator.cpp
+++ b/src/DifferentialModulator.cpp
@@ -22,17 +22,14 @@
 #include "DifferentialModulator.h"
 #include "PcDebug.h"
 
-#include <stdio.h>
+#include <cstdio>
 #include <stdexcept>
-#include <complex>
-#include <string.h>
+#include <cstring>
 
-typedef std::complex<float> complexf;
-
-
-DifferentialModulator::DifferentialModulator(size_t carriers) :
+DifferentialModulator::DifferentialModulator(size_t carriers, bool fixedPoint) :
     ModMux(),
-    d_carriers(carriers)
+    m_carriers(carriers),
+    m_fixedPoint(fixedPoint)
 {
     PDEBUG("DifferentialModulator::DifferentialModulator(%zu)\n", carriers);
 
@@ -42,10 +39,42 @@ DifferentialModulator::DifferentialModulator(size_t carriers) :
 DifferentialModulator::~DifferentialModulator()
 {
     PDEBUG("DifferentialModulator::~DifferentialModulator()\n");
-
 }
 
 
+template<typename T>
+void do_process(size_t carriers, const std::vector<Buffer*>& dataIn, Buffer* dataOut)
+{
+    size_t phaseSize = dataIn[0]->getLength() / sizeof(T);
+    size_t dataSize = dataIn[1]->getLength() / sizeof(T);
+    dataOut->setLength((phaseSize + dataSize) * sizeof(T));
+
+    const T* phase = reinterpret_cast<const T*>(dataIn[0]->getData());
+    const T* in = reinterpret_cast<const T*>(dataIn[1]->getData());
+    T* out = reinterpret_cast<T*>(dataOut->getData());
+
+    if (phaseSize != carriers) {
+        throw std::runtime_error(
+                "DifferentialModulator::process input phase size not valid!");
+    }
+    if (dataSize % carriers != 0) {
+        throw std::runtime_error(
+                "DifferentialModulator::process input data size not valid!");
+    }
+
+    memcpy(dataOut->getData(), phase, phaseSize * sizeof(T));
+    for (size_t i = 0; i < dataSize; i += carriers) {
+        for (size_t j = 0; j < carriers; j += 4) {
+            out[carriers + j] = out[j] * in[j];
+            out[carriers + j + 1] = out[j + 1] * in[j + 1];
+            out[carriers + j + 2] = out[j + 2] * in[j + 2];
+            out[carriers + j + 3] = out[j + 3] * in[j + 3];
+        }
+        in += carriers;
+        out += carriers;
+    }
+}
+
 // dataIn[0] -> phase reference
 // dataIn[1] -> data symbols
 int DifferentialModulator::process(std::vector<Buffer*> dataIn, Buffer* dataOut)
@@ -67,33 +96,11 @@ int DifferentialModulator::process(std::vector<Buffer*> dataIn, Buffer* dataOut)
                 "DifferentialModulator::process nb of input streams not 2!");
     }
 
-    size_t phaseSize = dataIn[0]->getLength() / sizeof(complexf);
-    size_t dataSize = dataIn[1]->getLength() / sizeof(complexf);
-    dataOut->setLength((phaseSize + dataSize) * sizeof(complexf));
-
-    const complexf* phase = reinterpret_cast<const complexf*>(dataIn[0]->getData());
-    const complexf* in = reinterpret_cast<const complexf*>(dataIn[1]->getData());
-    complexf* out = reinterpret_cast<complexf*>(dataOut->getData());
-
-    if (phaseSize != d_carriers) {
-        throw std::runtime_error(
-                "DifferentialModulator::process input phase size not valid!");
-    }
-    if (dataSize % d_carriers != 0) {
-        throw std::runtime_error(
-                "DifferentialModulator::process input data size not valid!");
+    if (m_fixedPoint) {
+        do_process<complexfix>(m_carriers, dataIn, dataOut);
     }
-
-    memcpy(dataOut->getData(), phase, phaseSize * sizeof(complexf));
-    for (size_t i = 0; i < dataSize; i += d_carriers) {
-        for (size_t j = 0; j < d_carriers; j += 4) {
-            out[d_carriers + j] = out[j] * in[j];
-            out[d_carriers + j + 1] = out[j + 1] * in[j + 1];
-            out[d_carriers + j + 2] = out[j + 2] * in[j + 2];
-            out[d_carriers + j + 3] = out[j + 3] * in[j + 3];
-        }
-        in += d_carriers;
-        out += d_carriers;
+    else {
+        do_process<complexf>(m_carriers, dataIn, dataOut);
     }
 
     return dataOut->getLength();
diff --git a/src/DifferentialModulator.h b/src/DifferentialModulator.h
index b26ea8b..9cc5081 100644
--- a/src/DifferentialModulator.h
+++ b/src/DifferentialModulator.h
@@ -35,7 +35,7 @@
 class DifferentialModulator : public ModMux
 {
 public:
-    DifferentialModulator(size_t carriers);
+    DifferentialModulator(size_t carriers, bool fixedPoint);
     virtual ~DifferentialModulator();
     DifferentialModulator(const DifferentialModulator&);
     DifferentialModulator& operator=(const DifferentialModulator&);
@@ -45,6 +45,7 @@ public:
     const char* name() { return "DifferentialModulator"; }
 
 protected:
-    size_t d_carriers;
+    size_t m_carriers;
+    size_t m_fixedPoint;
 };
 
diff --git a/src/FIRFilter.h b/src/FIRFilter.h
index a4effa1..2d8fba9 100644
--- a/src/FIRFilter.h
+++ b/src/FIRFilter.h
@@ -33,21 +33,14 @@
 
 #include "RemoteControl.h"
 #include "ModPlugin.h"
-#include "PcDebug.h"
 
 #include <sys/types.h>
-#include <complex>
-#include <thread>
 #include <vector>
-#include <time.h>
 #include <cstdio>
 #include <string>
-#include <memory>
 
 #define FIRFILTER_PIPELINE_DELAY 1
 
-typedef std::complex<float> complexf;
-
 class FIRFilter : public PipelinedModCodec, public RemoteControllable
 {
 public:
diff --git a/src/Flowgraph.cpp b/src/Flowgraph.cpp
index 3d4cdcc..339e326 100644
--- a/src/Flowgraph.cpp
+++ b/src/Flowgraph.cpp
@@ -27,12 +27,10 @@
 #include "Flowgraph.h"
 #include "PcDebug.h"
 #include "Log.h"
-#include <string>
 #include <memory>
 #include <algorithm>
 #include <sstream>
 #include <sys/types.h>
-#include <stdexcept>
 #include <assert.h>
 #include <sys/time.h>
 
@@ -254,15 +252,15 @@ Flowgraph::~Flowgraph()
         char node_time_sz[1024] = {};
 
         for (const auto &node : nodes) {
-            snprintf(node_time_sz, 1023, "  %30s: %10lu us (%2.2f %%)\n",
+            snprintf(node_time_sz, 1023, "  %30s: %10lld us (%2.2f %%)\n",
                     node->plugin()->name(),
-                    node->processTime(),
+                    (long long)node->processTime(),
                     node->processTime() * 100.0 / myProcessTime);
             ss << node_time_sz;
         }
 
-        snprintf(node_time_sz, 1023, "  %30s: %10lu us (100.00 %%)\n", "total",
-                myProcessTime);
+        snprintf(node_time_sz, 1023, "  %30s: %10lld us (100.00 %%)\n", "total",
+                (long long)myProcessTime);
         ss << node_time_sz;
 
         etiLog.level(debug) << ss.str();
diff --git a/src/FormatConverter.cpp b/src/FormatConverter.cpp
index e8e76ed..517f26e 100644
--- a/src/FormatConverter.cpp
+++ b/src/FormatConverter.cpp
@@ -28,17 +28,37 @@
 
 #include "FormatConverter.h"
 #include "PcDebug.h"
+#include "Log.h"
 
-#include <sys/types.h>
-#include <string.h>
 #include <stdexcept>
+#include <cstring>
 #include <assert.h>
+#include <sys/types.h>
+#if defined(__ARM_NEON)
+#include <arm_neon.h>
+#endif
 
-FormatConverter::FormatConverter(const std::string& format) :
+FormatConverter::FormatConverter(bool input_is_complexfix_wide, const std::string& format_out) :
     ModCodec(),
-    m_format(format)
+    m_input_complexfix_wide(input_is_complexfix_wide),
+    m_format_out(format_out)
 { }
 
+FormatConverter::~FormatConverter()
+{
+    if (
+#if defined(__ARM_NEON)
+    not m_input_complexfix_wide
+#else
+    true
+#endif
+    ) {
+        etiLog.level(debug) << "FormatConverter: " <<
+            m_num_clipped_samples.load() << " clipped";
+    }
+}
+
+
 /* Expect the input samples to be in the correct range for the required format */
 int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut)
 {
@@ -47,71 +67,113 @@ int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut)
 
     size_t num_clipped_samples = 0;
 
-    size_t sizeIn = dataIn->getLength() / sizeof(float);
-    float* in = reinterpret_cast<float*>(dataIn->getData());
+    if (m_input_complexfix_wide) {
+        size_t sizeIn = dataIn->getLength() / sizeof(int32_t);
+        if (m_format_out == "s16") {
+            dataOut->setLength(sizeIn * sizeof(int16_t));
+            const int32_t *in = reinterpret_cast<int32_t*>(dataIn->getData());
+            int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData());
 
-    if (m_format == "s16") {
-        dataOut->setLength(sizeIn * sizeof(int16_t));
-        int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData());
+            constexpr int shift = 6;
 
-        for (size_t i = 0; i < sizeIn; i++) {
-            if (in[i] < INT16_MIN) {
-                out[i] = INT16_MIN;
-                num_clipped_samples++;
+#if defined(__ARM_NEON)
+            if (sizeIn % 4 != 0) {
+                throw std::logic_error("Unexpected length not multiple of 4");
             }
-            else if (in[i] > INT16_MAX) {
-                out[i] = INT16_MAX;
-                num_clipped_samples++;
+
+            for (size_t i = 0; i < sizeIn; i += 4) {
+                int32x4_t input_vec = vld1q_s32(&in[i]);
+                // Apply shift right, saturate on conversion to int16_t
+                int16x4_t output_vec = vqshrn_n_s32(input_vec, shift);
+                vst1_s16(&out[i], output_vec);
             }
-            else {
-                out[i] = in[i];
+#else
+            for (size_t i = 0; i < sizeIn; i++) {
+                const int32_t val = in[i] >> shift;
+                if (val < INT16_MIN) {
+                    out[i] = INT16_MIN;
+                    num_clipped_samples++;
+                }
+                else if (val > INT16_MAX) {
+                    out[i] = INT16_MAX;
+                    num_clipped_samples++;
+                }
+                else {
+                    out[i] = val;
+                }
             }
+#endif
         }
-    }
-    else if (m_format == "u8") {
-        dataOut->setLength(sizeIn * sizeof(int8_t));
-        uint8_t* out = reinterpret_cast<uint8_t*>(dataOut->getData());
-
-        for (size_t i = 0; i < sizeIn; i++) {
-            const auto samp = in[i] + 128.0f;
-            if (samp < 0) {
-                out[i] = 0;
-                num_clipped_samples++;
-            }
-            else if (samp > UINT8_MAX) {
-                out[i] = UINT8_MAX;
-                num_clipped_samples++;
-            }
-            else {
-                out[i] = samp;
-            }
-
+        else {
+            throw std::runtime_error("FormatConverter: Invalid fix format " + m_format_out);
         }
     }
-    else if (m_format == "s8") {
-        dataOut->setLength(sizeIn * sizeof(int8_t));
-        int8_t* out = reinterpret_cast<int8_t*>(dataOut->getData());
-
-        for (size_t i = 0; i < sizeIn; i++) {
-            if (in[i] < INT8_MIN) {
-                out[i] = INT8_MIN;
-                num_clipped_samples++;
+    else {
+        size_t sizeIn = dataIn->getLength() / sizeof(float);
+        const float* in = reinterpret_cast<float*>(dataIn->getData());
+
+        if (m_format_out == "s16") {
+            dataOut->setLength(sizeIn * sizeof(int16_t));
+            int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData());
+
+            for (size_t i = 0; i < sizeIn; i++) {
+                if (in[i] < INT16_MIN) {
+                    out[i] = INT16_MIN;
+                    num_clipped_samples++;
+                }
+                else if (in[i] > INT16_MAX) {
+                    out[i] = INT16_MAX;
+                    num_clipped_samples++;
+                }
+                else {
+                    out[i] = in[i];
+                }
             }
-            else if (in[i] > INT8_MAX) {
-                out[i] = INT8_MAX;
-                num_clipped_samples++;
+        }
+        else if (m_format_out == "u8") {
+            dataOut->setLength(sizeIn * sizeof(int8_t));
+            uint8_t* out = reinterpret_cast<uint8_t*>(dataOut->getData());
+
+            for (size_t i = 0; i < sizeIn; i++) {
+                const auto samp = in[i] + 128.0f;
+                if (samp < 0) {
+                    out[i] = 0;
+                    num_clipped_samples++;
+                }
+                else if (samp > UINT8_MAX) {
+                    out[i] = UINT8_MAX;
+                    num_clipped_samples++;
+                }
+                else {
+                    out[i] = samp;
+                }
+
             }
-            else {
-                out[i] = in[i];
+        }
+        else if (m_format_out == "s8") {
+            dataOut->setLength(sizeIn * sizeof(int8_t));
+            int8_t* out = reinterpret_cast<int8_t*>(dataOut->getData());
+
+            for (size_t i = 0; i < sizeIn; i++) {
+                if (in[i] < INT8_MIN) {
+                    out[i] = INT8_MIN;
+                    num_clipped_samples++;
+                }
+                else if (in[i] > INT8_MAX) {
+                    out[i] = INT8_MAX;
+                    num_clipped_samples++;
+                }
+                else {
+                    out[i] = in[i];
+                }
             }
         }
-    }
-    else {
-        throw std::runtime_error("FormatConverter: Invalid format " + m_format);
+        else {
+            throw std::runtime_error("FormatConverter: Invalid format " + m_format_out);
+        }
     }
 
     m_num_clipped_samples.store(num_clipped_samples);
-
     return dataOut->getLength();
 }
 
diff --git a/src/FormatConverter.h b/src/FormatConverter.h
index 05511c0..1ed2283 100644
--- a/src/FormatConverter.h
+++ b/src/FormatConverter.h
@@ -33,18 +33,19 @@
 #endif
 
 #include "ModPlugin.h"
-#include <complex>
 #include <atomic>
 #include <string>
-#include <cstdint>
 
 class FormatConverter : public ModCodec
 {
     public:
         static size_t get_format_size(const std::string& format);
 
-        // Allowed formats: s8, u8 and s16
-        FormatConverter(const std::string& format);
+        // floating-point input allows output formats: s8, u8 and s16
+        // complexfix_wide input allows output formats: s16
+        // complexfix input is already in s16, and needs no converter
+        FormatConverter(bool input_is_complexfix_wide, const std::string& format_out);
+        virtual ~FormatConverter();
 
         int process(Buffer* const dataIn, Buffer* dataOut);
         const char* name();
@@ -52,7 +53,8 @@ class FormatConverter : public ModCodec
         size_t get_num_clipped_samples() const;
 
     private:
-        std::string m_format;
+        bool m_input_complexfix_wide;
+        std::string m_format_out;
 
         std::atomic<size_t> m_num_clipped_samples = 0;
 };
diff --git a/src/FrameMultiplexer.cpp b/src/FrameMultiplexer.cpp
index e893120..ebd8b76 100644
--- a/src/FrameMultiplexer.cpp
+++ b/src/FrameMultiplexer.cpp
@@ -25,17 +25,11 @@
  */
 
 #include "FrameMultiplexer.h"
-#include "PcDebug.h"
 
-#include <stdio.h>
 #include <string>
-#include <stdexcept>
-#include <complex>
-#include <memory>
-#include <assert.h>
-#include <string.h>
-
-typedef std::complex<float> complexf;
+#include <cstdio>
+#include <cassert>
+#include <cstring>
 
 FrameMultiplexer::FrameMultiplexer(
         const EtiSource& etiSource) :
diff --git a/src/FrequencyInterleaver.cpp b/src/FrequencyInterleaver.cpp
index e76d525..6f36dcb 100644
--- a/src/FrequencyInterleaver.cpp
+++ b/src/FrequencyInterleaver.cpp
@@ -22,17 +22,15 @@
 #include "FrequencyInterleaver.h"
 #include "PcDebug.h"
 
-#include <stdio.h>
 #include <stdexcept>
 #include <string>
-#include <stdlib.h>
-#include <complex>
+#include <cstdio>
+#include <cstdlib>
 
-typedef std::complex<float> complexf;
 
-
-FrequencyInterleaver::FrequencyInterleaver(size_t mode) :
-    ModCodec()
+FrequencyInterleaver::FrequencyInterleaver(size_t mode, bool fixedPoint) :
+    ModCodec(),
+    m_fixedPoint(fixedPoint)
 {
     PDEBUG("FrequencyInterleaver::FrequencyInterleaver(%zu) @ %p\n",
             mode, this);
@@ -42,54 +40,53 @@ FrequencyInterleaver::FrequencyInterleaver(size_t mode) :
     size_t beta;
     switch (mode) {
     case 1:
-        d_carriers = 1536;
+        m_carriers = 1536;
         num = 2048;
         beta = 511;
         break;
     case 2:
-        d_carriers = 384;
+        m_carriers = 384;
         num = 512;
         beta = 127;
         break;
     case 3:
-        d_carriers = 192;
+        m_carriers = 192;
         num = 256;
         beta = 63;
         break;
     case 0:
     case 4:
-        d_carriers = 768;
+        m_carriers = 768;
         num = 1024;
         beta = 255;
         break;
     default:
-        PDEBUG("Carriers: %zu\n", (d_carriers >> 1) << 1);
-        throw std::runtime_error("FrequencyInterleaver::FrequencyInterleaver "
-                "nb of carriers invalid!");
-        break;
+        PDEBUG("Carriers: %zu\n", (m_carriers >> 1) << 1);
+        throw std::runtime_error("FrequencyInterleaver: invalid dab mode");
     }
 
-    const int ret = posix_memalign((void**)(&d_indexes), 16, d_carriers * sizeof(size_t));
+    const int ret = posix_memalign((void**)(&m_indices), 16, m_carriers * sizeof(size_t));
     if (ret != 0) {
         throw std::runtime_error("memory allocation failed: " + std::to_string(ret));
     }
 
-    size_t* index = d_indexes;
+    size_t *index = m_indices;
     size_t perm = 0;
     PDEBUG("i: %4u, R: %4u\n", 0, 0);
     for (size_t j = 1; j < num; ++j) {
         perm = (alpha * perm + beta) & (num - 1);
-        if (perm >= ((num - d_carriers) / 2)
-                && perm <= (num - (num - d_carriers) / 2)
+        if (perm >= ((num - m_carriers) / 2)
+                && perm <= (num - (num - m_carriers) / 2)
                 && perm != (num / 2)) {
             PDEBUG("i: %4zu, R: %4zu, d: %4zu, n: %4zu, k: %5zi, index: %zu\n",
-                    j, perm, perm, index - d_indexes, perm - num / 2,
+                    j, perm, perm, index - m_indices, perm - num / 2,
                     perm > num / 2
                     ?  perm - (1 + (num / 2))
-                    : perm + (d_carriers - (num / 2)));
+                    : perm + (m_carriers - (num / 2)));
             *(index++) = perm > num / 2 ?
-                perm - (1 + (num / 2)) : perm + (d_carriers - (num / 2));
-        } else {
+                perm - (1 + (num / 2)) : perm + (m_carriers - (num / 2));
+        }
+        else {
             PDEBUG("i: %4zu, R: %4zu\n", j, perm);
         }
     }
@@ -100,9 +97,33 @@ FrequencyInterleaver::~FrequencyInterleaver()
 {
     PDEBUG("FrequencyInterleaver::~FrequencyInterleaver() @ %p\n", this);
 
-    free(d_indexes);
+    free(m_indices);
 }
 
+template<typename T>
+void do_process(Buffer* const dataIn, Buffer* dataOut,
+        size_t carriers, const size_t * const indices)
+{
+    const T* in = reinterpret_cast<const T*>(dataIn->getData());
+    T* out = reinterpret_cast<T*>(dataOut->getData());
+    size_t sizeIn = dataIn->getLength() / sizeof(T);
+
+    if (sizeIn % carriers != 0) {
+        throw std::runtime_error(
+                "FrequencyInterleaver::process input size not valid!");
+    }
+
+    for (size_t i = 0; i < sizeIn;) {
+//      memset(out, 0, m_carriers * sizeof(T));
+        for (size_t j = 0; j < carriers; i += 4, j += 4) {
+            out[indices[j]] = in[i];
+            out[indices[j + 1]] = in[i + 1];
+            out[indices[j + 2]] = in[i + 2];
+            out[indices[j + 3]] = in[i + 3];
+        }
+        out += carriers;
+    }
+}
 
 int FrequencyInterleaver::process(Buffer* const dataIn, Buffer* dataOut)
 {
@@ -112,24 +133,11 @@ int FrequencyInterleaver::process(Buffer* const dataIn, Buffer* dataOut)
 
     dataOut->setLength(dataIn->getLength());
 
-    const complexf* in = reinterpret_cast<const complexf*>(dataIn->getData());
-    complexf* out = reinterpret_cast<complexf*>(dataOut->getData());
-    size_t sizeIn = dataIn->getLength() / sizeof(complexf);
-
-    if (sizeIn % d_carriers != 0) {
-        throw std::runtime_error(
-                "FrequencyInterleaver::process input size not valid!");
+    if (m_fixedPoint) {
+        do_process<complexfix>(dataIn, dataOut, m_carriers, m_indices);
     }
-
-    for (size_t i = 0; i < sizeIn;) {
-//        memset(out, 0, d_carriers * sizeof(complexf));
-        for (size_t j = 0; j < d_carriers; i += 4, j += 4) {
-            out[d_indexes[j]] = in[i];
-            out[d_indexes[j + 1]] = in[i + 1];
-            out[d_indexes[j + 2]] = in[i + 2];
-            out[d_indexes[j + 3]] = in[i + 3];
-        }
-        out += d_carriers;
+    else {
+        do_process<complexf>(dataIn, dataOut, m_carriers, m_indices);
     }
 
     return 1;
diff --git a/src/FrequencyInterleaver.h b/src/FrequencyInterleaver.h
index 43ca21a..b31b968 100644
--- a/src/FrequencyInterleaver.h
+++ b/src/FrequencyInterleaver.h
@@ -25,16 +25,14 @@
 #   include <config.h>
 #endif
 
-
 #include "ModPlugin.h"
 
 #include <sys/types.h>
 
-
 class FrequencyInterleaver : public ModCodec
 {
 public:
-    FrequencyInterleaver(size_t mode);
+    FrequencyInterleaver(size_t mode, bool fixedPoint);
     virtual ~FrequencyInterleaver();
     FrequencyInterleaver(const FrequencyInterleaver&) = delete;
     FrequencyInterleaver& operator=(const FrequencyInterleaver&) = delete;
@@ -43,7 +41,8 @@ public:
     const char* name() override { return "FrequencyInterleaver"; }
 
 protected:
-    size_t d_carriers;
-    size_t* d_indexes;
+    bool m_fixedPoint;
+    size_t m_carriers;
+    size_t *m_indices;
 };
 
diff --git a/src/GainControl.h b/src/GainControl.h
index 04f6b58..d40a7d7 100644
--- a/src/GainControl.h
+++ b/src/GainControl.h
@@ -35,7 +35,6 @@
 #include "RemoteControl.h"
 
 #include <sys/types.h>
-#include <complex>
 #include <string>
 #include <mutex>
 
@@ -43,9 +42,6 @@
 #   include <xmmintrin.h>
 #endif
 
-
-typedef std::complex<float> complexf;
-
 enum class GainMode { GAIN_FIX = 0, GAIN_MAX = 1, GAIN_VAR = 2 };
 
 class GainControl : public PipelinedModCodec, public RemoteControllable
diff --git a/src/GuardIntervalInserter.cpp b/src/GuardIntervalInserter.cpp
index 3c2db14..26d4fd1 100644
--- a/src/GuardIntervalInserter.cpp
+++ b/src/GuardIntervalInserter.cpp
@@ -29,39 +29,47 @@
 #include <cstring>
 #include <cassert>
 #include <stdexcept>
-#include <complex>
 #include <mutex>
 
-typedef std::complex<float> complexf;
+GuardIntervalInserter::Params::Params(
+        size_t nbSymbols,
+        size_t spacing,
+        size_t nullSize,
+        size_t symSize,
+        size_t& windowOverlap) :
+    nbSymbols(nbSymbols),
+    spacing(spacing),
+    nullSize(nullSize),
+    symSize(symSize),
+    windowOverlap(windowOverlap) {}
 
 GuardIntervalInserter::GuardIntervalInserter(
         size_t nbSymbols,
         size_t spacing,
         size_t nullSize,
         size_t symSize,
-        size_t& windowOverlap) :
+        size_t& windowOverlap,
+        FFTEngine fftEngine) :
     ModCodec(),
     RemoteControllable("guardinterval"),
-    d_nbSymbols(nbSymbols),
-    d_spacing(spacing),
-    d_nullSize(nullSize),
-    d_symSize(symSize),
-    d_windowOverlap(windowOverlap)
+    m_fftEngine(fftEngine),
+    m_params(nbSymbols, spacing, nullSize, symSize, windowOverlap)
 {
-    if (d_nullSize == 0) {
+    if (nullSize == 0) {
         throw std::logic_error("NULL symbol must be present");
     }
 
+
     RC_ADD_PARAMETER(windowlen, "Window length for OFDM windowng [0 to disable]");
 
     /* We use a raised-cosine window for the OFDM windowing.
-     * Each symbol is extended on both sides by d_windowOverlap samples.
+     * Each symbol is extended on both sides by windowOverlap samples.
      *
      *
      * Sym n             |####################|
      * Sym n+1                                 |####################|
      *
-     * We now extend the symbols by d_windowOverlap (one dash)
+     * We now extend the symbols by windowOverlap (one dash)
      *
      * Sym n extended   -|####################|-
      * Sym n+1 extended                       -|####################|-
@@ -75,7 +83,7 @@ GuardIntervalInserter::GuardIntervalInserter(
      *                                         /                    \
      *                    ... ________________/                      \__ ...
      *
-     * The window length is 2*d_windowOverlap.
+     * The window length is 2*windowOverlap.
      */
 
     update_window(windowOverlap);
@@ -87,44 +95,43 @@ GuardIntervalInserter::GuardIntervalInserter(
 
 void GuardIntervalInserter::update_window(size_t new_window_overlap)
 {
-    std::lock_guard<std::mutex> lock(d_windowMutex);
+    std::lock_guard<std::mutex> lock(m_params.windowMutex);
 
-    d_windowOverlap = new_window_overlap;
+    m_params.windowOverlap = new_window_overlap;
 
-    // d_window only contains the rising window edge.
-    d_window.resize(2*d_windowOverlap);
-    for (size_t i = 0; i < 2*d_windowOverlap; i++) {
-        d_window[i] = (float)(0.5 * (1.0 - cos(M_PI * i / (2*d_windowOverlap - 1))));
+    // m_params.window only contains the rising window edge.
+    m_params.window.resize(2*m_params.windowOverlap);
+    for (size_t i = 0; i < 2*m_params.windowOverlap; i++) {
+        m_params.window[i] = (float)(0.5 * (1.0 - cos(M_PI * i / (2*m_params.windowOverlap - 1))));
     }
 }
 
-int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut)
+template<typename T>
+int do_process(const GuardIntervalInserter::Params& p, Buffer* const dataIn, Buffer* dataOut)
 {
-    PDEBUG("GuardIntervalInserter::process(dataIn: %p, dataOut: %p)\n",
+    PDEBUG("GuardIntervalInserter do_process(dataIn: %p, dataOut: %p)\n",
             dataIn, dataOut);
 
-    std::lock_guard<std::mutex> lock(d_windowMutex);
-
-    // Every symbol overlaps over a length of d_windowOverlap with
+    // Every symbol overlaps over a length of windowOverlap with
     // the previous symbol, and with the next symbol. First symbol
     // receives no prefix window, because we don't remember the
     // last symbol from the previous TF (yet). Last symbol also
     // receives no suffix window, for the same reason.
     // Overall output buffer length must stay independent of the windowing.
-    dataOut->setLength((d_nullSize + (d_nbSymbols * d_symSize)) * sizeof(complexf));
+    dataOut->setLength((p.nullSize + (p.nbSymbols * p.symSize)) * sizeof(T));
 
-    const complexf* in = reinterpret_cast<const complexf*>(dataIn->getData());
-    complexf* out = reinterpret_cast<complexf*>(dataOut->getData());
-    size_t sizeIn = dataIn->getLength() / sizeof(complexf);
+    const T* in = reinterpret_cast<const T*>(dataIn->getData());
+    T* out = reinterpret_cast<T*>(dataOut->getData());
+    size_t sizeIn = dataIn->getLength() / sizeof(T);
 
-    const size_t num_symbols = d_nbSymbols + 1;
-    if (sizeIn != num_symbols * d_spacing)
+    const size_t num_symbols = p.nbSymbols + 1;
+    if (sizeIn != num_symbols * p.spacing)
     {
-        PDEBUG("Nb symbols: %zu\n", d_nbSymbols);
-        PDEBUG("Spacing: %zu\n", d_spacing);
-        PDEBUG("Null size: %zu\n", d_nullSize);
-        PDEBUG("Sym size: %zu\n", d_symSize);
-        PDEBUG("\n%zu != %zu\n", sizeIn, (d_nbSymbols + 1) * d_spacing);
+        PDEBUG("Nb symbols: %zu\n", p.nbSymbols);
+        PDEBUG("Spacing: %zu\n", p.spacing);
+        PDEBUG("Null size: %zu\n", p.nullSize);
+        PDEBUG("Sym size: %zu\n", p.symSize);
+        PDEBUG("\n%zu != %zu\n", sizeIn, (p.nbSymbols + 1) * p.spacing);
         throw std::runtime_error(
                 "GuardIntervalInserter::process input size not valid!");
     }
@@ -132,139 +139,162 @@ int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut)
     // TODO remember the end of the last TF so that we can do some
     //      windowing too.
 
-    if (d_windowOverlap) {
-        {
-            // Handle Null symbol separately because it is longer
-            const size_t prefixlength = d_nullSize - d_spacing;
-
-            // end = spacing
-            memcpy(out, &in[d_spacing - prefixlength],
-                    prefixlength * sizeof(complexf));
-
-            memcpy(&out[prefixlength], in, (d_spacing - d_windowOverlap) * sizeof(complexf));
+    std::lock_guard<std::mutex> lock(p.windowMutex);
+    if (p.windowOverlap) {
+        if constexpr (std::is_same_v<complexf, T>) {
+            {
+                // Handle Null symbol separately because it is longer
+                const size_t prefixlength = p.nullSize - p.spacing;
+
+                // end = spacing
+                memcpy(out, &in[p.spacing - prefixlength],
+                        prefixlength * sizeof(T));
+
+                memcpy(&out[prefixlength], in, (p.spacing - p.windowOverlap) * sizeof(T));
+
+                // The remaining part of the symbol must have half of the window applied,
+                // sloping down from 1 to 0.5
+                for (size_t i = 0; i < p.windowOverlap; i++) {
+                    const size_t out_ix = prefixlength + p.spacing - p.windowOverlap + i;
+                    const size_t in_ix = p.spacing - p.windowOverlap + i;
+                    out[out_ix] = in[in_ix] * p.window[2*p.windowOverlap - (i+1)];
+                }
 
-            // The remaining part of the symbol must have half of the window applied,
-            // sloping down from 1 to 0.5
-            for (size_t i = 0; i < d_windowOverlap; i++) {
-                const size_t out_ix = prefixlength + d_spacing - d_windowOverlap + i;
-                const size_t in_ix = d_spacing - d_windowOverlap + i;
-                out[out_ix] = in[in_ix] * d_window[2*d_windowOverlap - (i+1)];
-            }
+                // Suffix is taken from the beginning of the symbol, and sees the other
+                // half of the window applied.
+                for (size_t i = 0; i < p.windowOverlap; i++) {
+                    const size_t out_ix = prefixlength + p.spacing + i;
+                    out[out_ix] = in[i] * p.window[p.windowOverlap - (i+1)];
+                }
 
-            // Suffix is taken from the beginning of the symbol, and sees the other
-            // half of the window applied.
-            for (size_t i = 0; i < d_windowOverlap; i++) {
-                const size_t out_ix = prefixlength + d_spacing + i;
-                out[out_ix] = in[i] * d_window[d_windowOverlap - (i+1)];
+                in += p.spacing;
+                out += p.nullSize;
+                // out is now pointing to the proper end of symbol. There are
+                // windowOverlap samples ahead that were already written.
             }
 
-            in += d_spacing;
-            out += d_nullSize;
-            // out is now pointing to the proper end of symbol. There are
-            // d_windowOverlap samples ahead that were already written.
-        }
-
-        // Data symbols
-        for (size_t sym_ix = 0; sym_ix < d_nbSymbols; sym_ix++) {
-            /* _ix variables are indices into in[], _ox variables are
-             * indices for out[] */
-            const ssize_t start_rise_ox = -d_windowOverlap;
-            const size_t start_rise_ix = 2 * d_spacing - d_symSize - d_windowOverlap;
-            /*
-            const size_t start_real_symbol_ox = 0;
-            const size_t start_real_symbol_ix = 2 * d_spacing - d_symSize;
-            */
-            const ssize_t end_rise_ox = d_windowOverlap;
-            const size_t end_rise_ix = 2 * d_spacing - d_symSize + d_windowOverlap;
-            const ssize_t end_cyclic_prefix_ox = d_symSize - d_spacing;
-            /* end_cyclic_prefix_ix = end of symbol
-            const size_t begin_fall_ox = d_symSize - d_windowOverlap;
-            const size_t begin_fall_ix = d_spacing - d_windowOverlap;
-            const size_t end_real_symbol_ox = d_symSize;
-             end_real_symbol_ix = end of symbol
-            const size_t end_fall_ox = d_symSize + d_windowOverlap;
-            const size_t end_fall_ix = d_spacing + d_windowOverlap;
-            */
-
-            ssize_t ox = start_rise_ox;
-            size_t ix = start_rise_ix;
-
-            for (size_t i = 0; ix < end_rise_ix; i++) {
-                out[ox] += in[ix] * d_window.at(i);
-                ix++;
-                ox++;
-            }
-            assert(ox == end_rise_ox);
-
-            const size_t remaining_prefix_length = end_cyclic_prefix_ox - end_rise_ox;
-            memcpy( &out[ox], &in[ix],
-                    remaining_prefix_length * sizeof(complexf));
-            ox += remaining_prefix_length;
-            assert(ox == end_cyclic_prefix_ox);
-            ix = 0;
-
-            const bool last_symbol = (sym_ix + 1 >= d_nbSymbols);
-            if (last_symbol) {
-                // No windowing at all at end
-                memcpy(&out[ox], &in[ix], d_spacing * sizeof(complexf));
-                ox += d_spacing;
-            }
-            else {
-                // Copy the middle part of the symbol, d_windowOverlap samples
-                // short of the end.
-                memcpy( &out[ox],
-                        &in[ix],
-                        (d_spacing - d_windowOverlap) * sizeof(complexf));
-                ox += d_spacing - d_windowOverlap;
-                ix += d_spacing - d_windowOverlap;
-                assert(ox == (ssize_t)(d_symSize - d_windowOverlap));
-
-                // Apply window from 1 to 0.5 for the end of the symbol
-                for (size_t i = 0; ox < (ssize_t)d_symSize; i++) {
-                    out[ox] = in[ix] * d_window[2*d_windowOverlap - (i+1)];
-                    ox++;
+            // Data symbols
+            for (size_t sym_ix = 0; sym_ix < p.nbSymbols; sym_ix++) {
+                /* _ix variables are indices into in[], _ox variables are
+                 * indices for out[] */
+                const ssize_t start_rise_ox = -p.windowOverlap;
+                const size_t start_rise_ix = 2 * p.spacing - p.symSize - p.windowOverlap;
+                /*
+                   const size_t start_real_symbol_ox = 0;
+                   const size_t start_real_symbol_ix = 2 * p.spacing - p.symSize;
+                   */
+                const ssize_t end_rise_ox = p.windowOverlap;
+                const size_t end_rise_ix = 2 * p.spacing - p.symSize + p.windowOverlap;
+                const ssize_t end_cyclic_prefix_ox = p.symSize - p.spacing;
+                /* end_cyclic_prefix_ix = end of symbol
+                   const size_t begin_fall_ox = p.symSize - p.windowOverlap;
+                   const size_t begin_fall_ix = p.spacing - p.windowOverlap;
+                   const size_t end_real_symbol_ox = p.symSize;
+                   end_real_symbol_ix = end of symbol
+                   const size_t end_fall_ox = p.symSize + p.windowOverlap;
+                   const size_t end_fall_ix = p.spacing + p.windowOverlap;
+                   */
+
+                ssize_t ox = start_rise_ox;
+                size_t ix = start_rise_ix;
+
+                for (size_t i = 0; ix < end_rise_ix; i++) {
+                    out[ox] += in[ix] * p.window.at(i);
                     ix++;
+                    ox++;
                 }
-                assert(ix == d_spacing);
+                assert(ox == end_rise_ox);
 
+                const size_t remaining_prefix_length = end_cyclic_prefix_ox - end_rise_ox;
+                memcpy( &out[ox], &in[ix],
+                        remaining_prefix_length * sizeof(T));
+                ox += remaining_prefix_length;
+                assert(ox == end_cyclic_prefix_ox);
                 ix = 0;
-                // Cyclic suffix, with window from 0.5 to 0
-                for (size_t i = 0; ox < (ssize_t)(d_symSize + d_windowOverlap); i++) {
-                    out[ox] = in[ix] * d_window[d_windowOverlap - (i+1)];
-                    ox++;
-                    ix++;
+
+                const bool last_symbol = (sym_ix + 1 >= p.nbSymbols);
+                if (last_symbol) {
+                    // No windowing at all at end
+                    memcpy(&out[ox], &in[ix], p.spacing * sizeof(T));
+                    ox += p.spacing;
+                }
+                else {
+                    // Copy the middle part of the symbol, p.windowOverlap samples
+                    // short of the end.
+                    memcpy( &out[ox],
+                            &in[ix],
+                            (p.spacing - p.windowOverlap) * sizeof(T));
+                    ox += p.spacing - p.windowOverlap;
+                    ix += p.spacing - p.windowOverlap;
+                    assert(ox == (ssize_t)(p.symSize - p.windowOverlap));
+
+                    // Apply window from 1 to 0.5 for the end of the symbol
+                    for (size_t i = 0; ox < (ssize_t)p.symSize; i++) {
+                        out[ox] = in[ix] * p.window[2*p.windowOverlap - (i+1)];
+                        ox++;
+                        ix++;
+                    }
+                    assert(ix == p.spacing);
+
+                    ix = 0;
+                    // Cyclic suffix, with window from 0.5 to 0
+                    for (size_t i = 0; ox < (ssize_t)(p.symSize + p.windowOverlap); i++) {
+                        out[ox] = in[ix] * p.window[p.windowOverlap - (i+1)];
+                        ox++;
+                        ix++;
+                    }
+
+                    assert(ix == p.windowOverlap);
                 }
 
-                assert(ix == d_windowOverlap);
+                out += p.symSize;
+                in += p.spacing;
+                // out is now pointing to the proper end of symbol. There are
+                // windowOverlap samples ahead that were already written.
             }
-
-            out += d_symSize;
-            in += d_spacing;
-            // out is now pointing to the proper end of symbol. There are
-            // d_windowOverlap samples ahead that were already written.
+        }
+        else {
+            throw std::runtime_error("fixed-point doesn't support window overlap");
         }
     }
     else {
         // Handle Null symbol separately because it is longer
         // end - (nullSize - spacing) = 2 * spacing - nullSize
-        memcpy(out, &in[2 * d_spacing - d_nullSize],
-                (d_nullSize - d_spacing) * sizeof(complexf));
-        memcpy(&out[d_nullSize - d_spacing], in, d_spacing * sizeof(complexf));
-        in += d_spacing;
-        out += d_nullSize;
+        memcpy(out, &in[2 * p.spacing - p.nullSize],
+                (p.nullSize - p.spacing) * sizeof(T));
+        memcpy(&out[p.nullSize - p.spacing], in, p.spacing * sizeof(T));
+        in += p.spacing;
+        out += p.nullSize;
 
         // Data symbols
-        for (size_t i = 0; i < d_nbSymbols; ++i) {
+        for (size_t i = 0; i < p.nbSymbols; ++i) {
             // end - (symSize - spacing) = 2 * spacing - symSize
-            memcpy(out, &in[2 * d_spacing - d_symSize],
-                    (d_symSize - d_spacing) * sizeof(complexf));
-            memcpy(&out[d_symSize - d_spacing], in, d_spacing * sizeof(complexf));
-            in += d_spacing;
-            out += d_symSize;
+            memcpy(out, &in[2 * p.spacing - p.symSize],
+                    (p.symSize - p.spacing) * sizeof(T));
+            memcpy(&out[p.symSize - p.spacing], in, p.spacing * sizeof(T));
+            in += p.spacing;
+            out += p.symSize;
         }
     }
 
-    return sizeIn;
+    const auto sizeOut = dataOut->getLength();
+    return sizeOut;
+}
+
+int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut)
+{
+    switch (m_fftEngine) {
+        case FFTEngine::FFTW:
+            return do_process<complexf>(m_params, dataIn, dataOut);
+        case FFTEngine::KISS:
+            if (m_params.windowOverlap) {
+                throw std::runtime_error("fixed point and ofdm windowing not supported");
+            }
+            return do_process<complexfix>(m_params, dataIn, dataOut);
+        case FFTEngine::DEXTER:
+            return do_process<complexfix_wide>(m_params, dataIn, dataOut);
+    }
+    throw std::logic_error("Unhandled fftEngine variant");
 }
 
 void GuardIntervalInserter::set_parameter(
@@ -293,7 +323,7 @@ const std::string GuardIntervalInserter::get_parameter(const std::string& parame
     using namespace std;
     stringstream ss;
     if (parameter == "windowlen") {
-        ss << d_windowOverlap;
+        ss << m_params.windowOverlap;
     }
     else {
         ss << "Parameter '" << parameter <<
@@ -306,6 +336,6 @@ const std::string GuardIntervalInserter::get_parameter(const std::string& parame
 const json::map_t GuardIntervalInserter::get_all_values() const
 {
     json::map_t map;
-    map["windowlen"].v = d_windowOverlap;
+    map["windowlen"].v = m_params.windowOverlap;
     return map;
 }
diff --git a/src/GuardIntervalInserter.h b/src/GuardIntervalInserter.h
index f78ac91..8d329ff 100644
--- a/src/GuardIntervalInserter.h
+++ b/src/GuardIntervalInserter.h
@@ -30,6 +30,7 @@
 #   include <config.h>
 #endif
 
+#include "ConfigParser.h"
 #include "ModPlugin.h"
 #include "RemoteControl.h"
 #include <stdint.h>
@@ -50,7 +51,8 @@ class GuardIntervalInserter : public ModCodec, public RemoteControllable
                 size_t spacing,
                 size_t nullSize,
                 size_t symSize,
-                size_t& windowOverlap);
+                size_t& windowOverlap,
+                FFTEngine fftEngine);
 
         virtual ~GuardIntervalInserter() {}
 
@@ -62,16 +64,30 @@ class GuardIntervalInserter : public ModCodec, public RemoteControllable
         virtual const std::string get_parameter(const std::string& parameter) const override;
         virtual const json::map_t get_all_values() const override;
 
+        struct Params {
+            Params(
+                size_t nbSymbols,
+                size_t spacing,
+                size_t nullSize,
+                size_t symSize,
+                size_t& windowOverlap);
+
+            size_t nbSymbols;
+            size_t spacing;
+            size_t nullSize;
+            size_t symSize;
+            size_t& windowOverlap;
+
+            mutable std::mutex windowMutex;
+            std::vector<float> window;
+        };
+
     protected:
         void update_window(size_t new_window_overlap);
 
-        size_t d_nbSymbols;
-        size_t d_spacing;
-        size_t d_nullSize;
-        size_t d_symSize;
+        FFTEngine m_fftEngine;
+
+        Params m_params;
 
-        mutable std::mutex d_windowMutex;
-        size_t& d_windowOverlap;
-        std::vector<float> d_window;
 };
 
diff --git a/src/MemlessPoly.h b/src/MemlessPoly.h
index 91e6860..72de62c 100644
--- a/src/MemlessPoly.h
+++ b/src/MemlessPoly.h
@@ -32,13 +32,10 @@
 
 #include "RemoteControl.h"
 #include "ModPlugin.h"
-#include "PcDebug.h"
 #include "ThreadsafeQueue.h"
 
 #include <sys/types.h>
 #include <array>
-#include <complex>
-#include <memory>
 #include <string>
 #include <thread>
 #include <vector>
@@ -47,8 +44,6 @@
 
 #define MEMLESSPOLY_PIPELINE_DELAY 1
 
-typedef std::complex<float> complexf;
-
 enum class dpd_type_t {
     odd_only_poly,
     lookup_table
diff --git a/src/ModPlugin.h b/src/ModPlugin.h
index 470508f..bb3ee2c 100644
--- a/src/ModPlugin.h
+++ b/src/ModPlugin.h
@@ -33,9 +33,7 @@
 #include "Buffer.h"
 #include "ThreadsafeQueue.h"
 #include "TimestampDecoder.h"
-#include <cstddef>
 #include <vector>
-#include <memory>
 #include <thread>
 #include <atomic>
 
diff --git a/src/NullSymbol.cpp b/src/NullSymbol.cpp
index 4684dfe..526e662 100644
--- a/src/NullSymbol.cpp
+++ b/src/NullSymbol.cpp
@@ -27,18 +27,16 @@
 #include "NullSymbol.h"
 #include "PcDebug.h"
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <complex>
-#include <string.h>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
 
-typedef std::complex<float> complexf;
-
-NullSymbol::NullSymbol(size_t nbCarriers) :
+NullSymbol::NullSymbol(size_t numCarriers, size_t typeSize) :
     ModInput(),
-    myNbCarriers(nbCarriers)
+    m_numCarriers(numCarriers),
+    m_typeSize(typeSize)
 {
-    PDEBUG("NullSymbol::NullSymbol(%zu) @ %p\n", nbCarriers, this);
+    PDEBUG("NullSymbol::NullSymbol(%zu) @ %p\n", numCarriers, this);
 }
 
 
@@ -52,7 +50,7 @@ int NullSymbol::process(Buffer* dataOut)
 {
     PDEBUG("NullSymbol::process(dataOut: %p)\n", dataOut);
 
-    dataOut->setLength(myNbCarriers * 2 * sizeof(float));
+    dataOut->setLength(m_numCarriers * m_typeSize);
     memset(dataOut->getData(), 0, dataOut->getLength());
 
     return dataOut->getLength();
diff --git a/src/NullSymbol.h b/src/NullSymbol.h
index 814e434..6ba9e63 100644
--- a/src/NullSymbol.h
+++ b/src/NullSymbol.h
@@ -39,14 +39,14 @@
 class NullSymbol : public ModInput
 {
 public:
-    NullSymbol(size_t nbCarriers);
+    NullSymbol(size_t nunCarriers, size_t typeSize);
     virtual ~NullSymbol();
 
     int process(Buffer* dataOut);
     const char* name() { return "NullSymbol"; }
 
 private:
-    size_t myNbCarriers;
-
+    size_t m_numCarriers;
+    size_t m_typeSize;
 };
 
diff --git a/src/OfdmGenerator.cpp b/src/OfdmGenerator.cpp
index cb799d3..38648c9 100644
--- a/src/OfdmGenerator.cpp
+++ b/src/OfdmGenerator.cpp
@@ -2,7 +2,7 @@
    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty
    the Queen in Right of Canada (Communications Research Center Canada)
 
-   Copyright (C) 2023
+   Copyright (C) 2024
    Matthias P. Braendli, matthias.braendli@mpb.li
 
     http://opendigitalradio.org
@@ -27,17 +27,19 @@
 #include "OfdmGenerator.h"
 #include "PcDebug.h"
 
-#define FFT_TYPE fftwf_complex
-
-#include <string.h>
 #include <stdexcept>
 #include <assert.h>
 #include <string>
 #include <numeric>
+#include <vector>
+#include <cstring>
+#include <complex>
 
 static const size_t MAX_CLIP_STATS = 10;
 
-OfdmGenerator::OfdmGenerator(size_t nbSymbols,
+using FFTW_TYPE = fftwf_complex;
+
+OfdmGeneratorCF32::OfdmGeneratorCF32(size_t nbSymbols,
                              size_t nbCarriers,
                              size_t spacing,
                              bool& enableCfr,
@@ -62,8 +64,7 @@ OfdmGenerator::OfdmGenerator(size_t nbSymbols,
             nbSymbols, nbCarriers, spacing, inverse ? "true" : "false", this);
 
     if (nbCarriers > spacing) {
-        throw std::runtime_error(
-                "OfdmGenerator::OfdmGenerator nbCarriers > spacing!");
+        throw std::runtime_error("OfdmGenerator nbCarriers > spacing!");
     }
 
     /* register the parameters that can be remote controlled */
@@ -102,29 +103,29 @@ OfdmGenerator::OfdmGenerator(size_t nbSymbols,
     PDEBUG("  myZeroSize: %u\n", myZeroSize);
 
     const int N = mySpacing; // The size of the FFT
-    myFftIn = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N);
-    myFftOut = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N);
+    myFftIn = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N);
+    myFftOut = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N);
     fftwf_set_timelimit(2);
     myFftPlan = fftwf_plan_dft_1d(N,
             myFftIn, myFftOut,
             FFTW_BACKWARD, FFTW_MEASURE);
 
-    myCfrPostClip = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N);
-    myCfrPostFft = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N);
+    myCfrPostClip = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N);
+    myCfrPostFft = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N);
     myCfrFft = fftwf_plan_dft_1d(N,
             myCfrPostClip, myCfrPostFft,
             FFTW_FORWARD, FFTW_MEASURE);
 
-    if (sizeof(complexf) != sizeof(FFT_TYPE)) {
+    if (sizeof(complexf) != sizeof(FFTW_TYPE)) {
         printf("sizeof(complexf) %zu\n", sizeof(complexf));
-        printf("sizeof(FFT_TYPE) %zu\n", sizeof(FFT_TYPE));
+        printf("sizeof(FFT_TYPE) %zu\n", sizeof(FFTW_TYPE));
         throw std::runtime_error(
                 "OfdmGenerator::process complexf size is not FFT_TYPE size!");
     }
 }
 
 
-OfdmGenerator::~OfdmGenerator()
+OfdmGeneratorCF32::~OfdmGeneratorCF32()
 {
     PDEBUG("OfdmGenerator::~OfdmGenerator() @ %p\n", this);
 
@@ -153,15 +154,15 @@ OfdmGenerator::~OfdmGenerator()
     }
 }
 
-int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
+int OfdmGeneratorCF32::process(Buffer* const dataIn, Buffer* dataOut)
 {
     PDEBUG("OfdmGenerator::process(dataIn: %p, dataOut: %p)\n",
             dataIn, dataOut);
 
     dataOut->setLength(myNbSymbols * mySpacing * sizeof(complexf));
 
-    FFT_TYPE* in = reinterpret_cast<FFT_TYPE*>(dataIn->getData());
-    FFT_TYPE* out = reinterpret_cast<FFT_TYPE*>(dataOut->getData());
+    FFTW_TYPE *in = reinterpret_cast<FFTW_TYPE*>(dataIn->getData());
+    FFTW_TYPE *out = reinterpret_cast<FFTW_TYPE*>(dataOut->getData());
 
     size_t sizeIn = dataIn->getLength() / sizeof(complexf);
     size_t sizeOut = dataOut->getLength() / sizeof(complexf);
@@ -203,7 +204,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
         myPaprAfterCFR.clear();
     }
 
-    for (size_t i = 0; i < myNbSymbols; ++i) {
+    for (size_t i = 0; i < myNbSymbols; i++) {
         myFftIn[0][0] = 0;
         myFftIn[0][1] = 0;
 
@@ -212,22 +213,20 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
          * PosSrc=0 PosDst=1 PosSize=768
          * NegSrc=768 NegDst=1280 NegSize=768
          */
-        memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(FFT_TYPE));
+        memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(FFTW_TYPE));
         memcpy(&myFftIn[myPosDst], &in[myPosSrc],
-                myPosSize * sizeof(FFT_TYPE));
+                myPosSize * sizeof(FFTW_TYPE));
         memcpy(&myFftIn[myNegDst], &in[myNegSrc],
-                myNegSize * sizeof(FFT_TYPE));
-
+                myNegSize * sizeof(FFTW_TYPE));
 
         if (myCfr) {
             reference.resize(mySpacing);
             memcpy(reinterpret_cast<fftwf_complex*>(reference.data()),
-                    myFftIn, mySpacing * sizeof(FFT_TYPE));
+                    myFftIn, mySpacing * sizeof(FFTW_TYPE));
         }
 
         fftwf_execute(myFftPlan); // IFFT from myFftIn to myFftOut
 
-        
         if (myCfr) {
             complexf *symbol = reinterpret_cast<complexf*>(myFftOut);
             myPaprBeforeCFR.process_block(symbol, mySpacing);
@@ -235,7 +234,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
             if (myMERCalcIndex == i) {
                 before_cfr.resize(mySpacing);
                 memcpy(reinterpret_cast<fftwf_complex*>(before_cfr.data()),
-                        myFftOut, mySpacing * sizeof(FFT_TYPE));
+                        myFftOut, mySpacing * sizeof(FFTW_TYPE));
             }
 
             /* cfr_one_iteration runs the myFftPlan again at the end, and
@@ -277,7 +276,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
             num_error_clip += stat.errclip_count;
         }
 
-        memcpy(out, myFftOut, mySpacing * sizeof(FFT_TYPE));
+        memcpy(out, myFftOut, mySpacing * sizeof(FFTW_TYPE));
 
         in += myNbCarriers;
         out += mySpacing;
@@ -308,14 +307,14 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
     return sizeOut;
 }
 
-OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration(
+OfdmGeneratorCF32::cfr_iter_stat_t OfdmGeneratorCF32::cfr_one_iteration(
         complexf *symbol, const complexf *reference)
 {
     // use std::norm instead of std::abs to avoid calculating the
     // square roots
     const float clip_squared = myCfrClip * myCfrClip;
 
-    OfdmGenerator::cfr_iter_stat_t ret;
+    OfdmGeneratorCF32::cfr_iter_stat_t ret;
 
     // Clip
     for (size_t i = 0; i < mySpacing; i++) {
@@ -331,7 +330,7 @@ OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration(
     }
 
     // Take FFT of our clipped signal
-    memcpy(myCfrPostClip, symbol, mySpacing * sizeof(FFT_TYPE));
+    memcpy(myCfrPostClip, symbol, mySpacing * sizeof(FFTW_TYPE));
     fftwf_execute(myCfrFft); // FFT from myCfrPostClip to myCfrPostFft
 
     // Calculate the error in frequency domain by subtracting our reference
@@ -374,7 +373,7 @@ OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration(
 }
 
 
-void OfdmGenerator::set_parameter(const std::string& parameter,
+void OfdmGeneratorCF32::set_parameter(const std::string& parameter,
                                   const std::string& value)
 {
     using namespace std;
@@ -404,7 +403,7 @@ void OfdmGenerator::set_parameter(const std::string& parameter,
     }
 }
 
-const std::string OfdmGenerator::get_parameter(const std::string& parameter) const
+const std::string OfdmGeneratorCF32::get_parameter(const std::string& parameter) const
 {
     using namespace std;
     stringstream ss;
@@ -458,9 +457,333 @@ const std::string OfdmGenerator::get_parameter(const std::string& parameter) con
     return ss.str();
 }
 
-const json::map_t OfdmGenerator::get_all_values() const
+const json::map_t OfdmGeneratorCF32::get_all_values() const
 {
     json::map_t map;
     // TODO needs rework of the values
     return map;
 }
+
+OfdmGeneratorFixed::OfdmGeneratorFixed(size_t nbSymbols,
+                             size_t nbCarriers,
+                             size_t spacing,
+                             bool inverse) :
+    ModCodec(),
+    myNbSymbols(nbSymbols),
+    myNbCarriers(nbCarriers),
+    mySpacing(spacing)
+{
+    PDEBUG("OfdmGenerator::OfdmGenerator(%zu, %zu, %zu, %s) @ %p\n",
+            nbSymbols, nbCarriers, spacing, inverse ? "true" : "false", this);
+
+    etiLog.level(info) << "Using KISS FFT by Mark Borgerding for fixed-point transform";
+
+    if (nbCarriers > spacing) {
+        throw std::runtime_error("OfdmGenerator nbCarriers > spacing!");
+    }
+
+    if (inverse) {
+        myPosDst = (nbCarriers & 1 ? 0 : 1);
+        myPosSrc = 0;
+        myPosSize = (nbCarriers + 1) / 2;
+        myNegDst = spacing - (nbCarriers / 2);
+        myNegSrc = (nbCarriers + 1) / 2;
+        myNegSize = nbCarriers / 2;
+    }
+    else {
+        myPosDst = (nbCarriers & 1 ? 0 : 1);
+        myPosSrc = nbCarriers / 2;
+        myPosSize = (nbCarriers + 1) / 2;
+        myNegDst = spacing - (nbCarriers / 2);
+        myNegSrc = 0;
+        myNegSize = nbCarriers / 2;
+    }
+    myZeroDst = myPosDst + myPosSize;
+    myZeroSize = myNegDst - myZeroDst;
+
+    PDEBUG("  myPosDst: %u\n", myPosDst);
+    PDEBUG("  myPosSrc: %u\n", myPosSrc);
+    PDEBUG("  myPosSize: %u\n", myPosSize);
+    PDEBUG("  myNegDst: %u\n", myNegDst);
+    PDEBUG("  myNegSrc: %u\n", myNegSrc);
+    PDEBUG("  myNegSize: %u\n", myNegSize);
+    PDEBUG("  myZeroDst: %u\n", myZeroDst);
+    PDEBUG("  myZeroSize: %u\n", myZeroSize);
+
+    const int N = mySpacing; // The size of the FFT
+
+    const size_t nbytes = N * sizeof(kiss_fft_cpx);
+    myFftIn = (kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes);
+    myFftOut = (kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes);
+    memset(myFftIn, 0, nbytes);
+
+    myKissCfg = kiss_fft_alloc(N, inverse, nullptr, nullptr);
+}
+
+OfdmGeneratorFixed::~OfdmGeneratorFixed()
+{
+    if (myKissCfg) KISS_FFT_FREE(myKissCfg);
+    if (myFftIn) KISS_FFT_FREE(myFftIn);
+    if (myFftOut) KISS_FFT_FREE(myFftOut);
+}
+
+int OfdmGeneratorFixed::process(Buffer* const dataIn, Buffer* dataOut)
+{
+    dataOut->setLength(myNbSymbols * mySpacing * sizeof(kiss_fft_cpx));
+
+    kiss_fft_cpx *in = reinterpret_cast<kiss_fft_cpx*>(dataIn->getData());
+    kiss_fft_cpx *out = reinterpret_cast<kiss_fft_cpx*>(dataOut->getData());
+
+    size_t sizeIn = dataIn->getLength() / sizeof(kiss_fft_cpx);
+    size_t sizeOut = dataOut->getLength() / sizeof(kiss_fft_cpx);
+
+    if (sizeIn != myNbSymbols * myNbCarriers) {
+        PDEBUG("Nb symbols: %zu\n", myNbSymbols);
+        PDEBUG("Nb carriers: %zu\n", myNbCarriers);
+        PDEBUG("Spacing: %zu\n", mySpacing);
+        PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * myNbCarriers);
+        throw std::runtime_error(
+                "OfdmGenerator::process input size not valid!");
+    }
+    if (sizeOut != myNbSymbols * mySpacing) {
+        PDEBUG("Nb symbols: %zu\n", myNbSymbols);
+        PDEBUG("Nb carriers: %zu\n", myNbCarriers);
+        PDEBUG("Spacing: %zu\n", mySpacing);
+        PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * mySpacing);
+        throw std::runtime_error(
+                "OfdmGenerator::process output size not valid!");
+    }
+
+    for (size_t i = 0; i < myNbSymbols; i++) {
+        myFftIn[0].r = 0;
+        myFftIn[0].i = 0;
+
+        /* For TM I this is:
+         * ZeroDst=769 ZeroSize=511
+         * PosSrc=0 PosDst=1 PosSize=768
+         * NegSrc=768 NegDst=1280 NegSize=768
+         */
+        memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(kiss_fft_cpx));
+        memcpy(&myFftIn[myPosDst], &in[myPosSrc], myPosSize * sizeof(kiss_fft_cpx));
+        memcpy(&myFftIn[myNegDst], &in[myNegSrc], myNegSize * sizeof(kiss_fft_cpx));
+
+        kiss_fft(myKissCfg, myFftIn, myFftOut);
+
+        memcpy(out, myFftOut, mySpacing * sizeof(kiss_fft_cpx));
+
+        in += myNbCarriers;
+        out += mySpacing;
+    }
+
+    return sizeOut;
+}
+
+#ifdef HAVE_DEXTER
+OfdmGeneratorDEXTER::OfdmGeneratorDEXTER(size_t nbSymbols,
+                             size_t nbCarriers,
+                             size_t spacing) :
+    ModCodec(),
+    myNbSymbols(nbSymbols),
+    myNbCarriers(nbCarriers),
+    mySpacing(spacing)
+{
+    PDEBUG("OfdmGeneratorDEXTER::OfdmGeneratorDEXTER(%zu, %zu, %zu) @ %p\n",
+            nbSymbols, nbCarriers, spacing, this);
+
+    etiLog.level(info) << "Using DEXTER FFT Accelerator for fixed-point transform";
+
+    if (nbCarriers > spacing) {
+        throw std::runtime_error("OfdmGenerator nbCarriers > spacing!");
+    }
+
+    myPosDst = (nbCarriers & 1 ? 0 : 1);
+    myPosSrc = 0;
+    myPosSize = (nbCarriers + 1) / 2;
+    myNegDst = spacing - (nbCarriers / 2);
+    myNegSrc = (nbCarriers + 1) / 2;
+    myNegSize = nbCarriers / 2;
+
+    myZeroDst = myPosDst + myPosSize;
+    myZeroSize = myNegDst - myZeroDst;
+
+    PDEBUG("  myPosDst: %u\n", myPosDst);
+    PDEBUG("  myPosSrc: %u\n", myPosSrc);
+    PDEBUG("  myPosSize: %u\n", myPosSize);
+    PDEBUG("  myNegDst: %u\n", myNegDst);
+    PDEBUG("  myNegSrc: %u\n", myNegSrc);
+    PDEBUG("  myNegSize: %u\n", myNegSize);
+    PDEBUG("  myZeroDst: %u\n", myZeroDst);
+    PDEBUG("  myZeroSize: %u\n", myZeroSize);
+
+    const size_t nbytes_in = mySpacing * sizeof(complexfix);
+    const size_t nbytes_out = mySpacing * sizeof(complexfix_wide);
+
+#define IIO_ENSURE(expr, err) { \
+    if (!(expr)) { \
+        etiLog.log(error, "%s (%s:%d)\n", err, __FILE__, __LINE__); \
+        throw std::runtime_error("Failed to set FFT for OfdmGeneratorDEXTER"); \
+    } \
+}
+    IIO_ENSURE((m_ctx = iio_create_default_context()), "No context");
+    IIO_ENSURE(m_dev_in = iio_context_find_device(m_ctx, "fft-accelerator-in"), "no dev");
+    IIO_ENSURE(m_dev_out = iio_context_find_device(m_ctx, "fft-accelerator-out"), "no dev");
+    IIO_ENSURE(m_channel_in = iio_device_find_channel(m_dev_in, "voltage0", true), "no channel");
+    IIO_ENSURE(m_channel_out = iio_device_find_channel(m_dev_out, "voltage0", false), "no channel");
+
+    iio_channel_enable(m_channel_in);
+    iio_channel_enable(m_channel_out);
+
+    m_buf_in = iio_device_create_buffer(m_dev_in, nbytes_in, false);
+    if (!m_buf_in) {
+        throw std::runtime_error("OfdmGeneratorDEXTER could not create in buffer");
+    }
+
+    m_buf_out = iio_device_create_buffer(m_dev_out, nbytes_out, false);
+    if (!m_buf_out) {
+        throw std::runtime_error("OfdmGeneratorDEXTER could not create out buffer");
+    }
+}
+
+OfdmGeneratorDEXTER::~OfdmGeneratorDEXTER()
+{
+    if (m_buf_in) {
+        iio_buffer_destroy(m_buf_in);
+        m_buf_in = nullptr;
+    }
+
+    if (m_buf_out) {
+        iio_buffer_destroy(m_buf_out);
+        m_buf_out = nullptr;
+    }
+
+    if (m_channel_in) {
+        iio_channel_disable(m_channel_in);
+        m_channel_in = nullptr;
+    }
+
+    if (m_channel_out) {
+        iio_channel_disable(m_channel_out);
+        m_channel_out = nullptr;
+    }
+
+    if (m_ctx) {
+        iio_context_destroy(m_ctx);
+        m_ctx = nullptr;
+    }
+}
+
+int OfdmGeneratorDEXTER::process(Buffer* const dataIn, Buffer* dataOut)
+{
+    dataOut->setLength(myNbSymbols * mySpacing * sizeof(complexfix_wide));
+
+    complexfix *in = reinterpret_cast<complexfix*>(dataIn->getData());
+    complexfix_wide *out = reinterpret_cast<complexfix_wide*>(dataOut->getData());
+
+    size_t sizeIn = dataIn->getLength() / sizeof(complexfix);
+    size_t sizeOut = dataOut->getLength() / sizeof(complexfix_wide);
+
+    if (sizeIn != myNbSymbols * myNbCarriers) {
+        PDEBUG("Nb symbols: %zu\n", myNbSymbols);
+        PDEBUG("Nb carriers: %zu\n", myNbCarriers);
+        PDEBUG("Spacing: %zu\n", mySpacing);
+        PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * myNbCarriers);
+        throw std::runtime_error(
+                "OfdmGenerator::process input size not valid!");
+    }
+    if (sizeOut != myNbSymbols * mySpacing) {
+        PDEBUG("Nb symbols: %zu\n", myNbSymbols);
+        PDEBUG("Nb carriers: %zu\n", myNbCarriers);
+        PDEBUG("Spacing: %zu\n", mySpacing);
+        PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * mySpacing);
+        throw std::runtime_error("OfdmGenerator::process output size not valid!");
+    }
+
+    ptrdiff_t iio_buf_size = (uint8_t*)iio_buffer_end(m_buf_in) - (uint8_t*)iio_buffer_start(m_buf_in);
+    if (iio_buf_size != (ssize_t)(mySpacing * sizeof(complexfix))) {
+        throw std::runtime_error("OfdmGenerator::process incorrect iio buffer size!");
+    }
+
+    for (size_t i = 0; i < myNbSymbols; i++) {
+        complexfix *fft_in = reinterpret_cast<complexfix*>(iio_buffer_start(m_buf_in));
+
+        /* For TM I this is:
+         * ZeroDst=769 ZeroSize=511
+         * PosSrc=0 PosDst=1 PosSize=768
+         * NegSrc=768 NegDst=1280 NegSize=768
+         */
+
+        fft_in[0] = static_cast<complexfix::value_type>(0);
+        for (size_t i = 0; i < myZeroSize; i++) {
+            fft_in[myZeroDst + i] = static_cast<complexfix::value_type>(0);
+        }
+
+        memcpy(&fft_in[myPosDst], &in[myPosSrc], myPosSize * sizeof(complexfix));
+        memcpy(&fft_in[myNegDst], &in[myNegSrc], myNegSize * sizeof(complexfix));
+
+        ssize_t nbytes_tx = iio_buffer_push(m_buf_in);
+        if (nbytes_tx < 0) {
+            throw std::runtime_error("OfdmGenerator::process error pushing IIO buffer!");
+        }
+
+        in += myNbCarriers;
+
+        // Keep one buffer in flight while we're doing shuffling data around here,
+        // this improves performance.
+        // I believe that, by default, IIO allocates four buffers in total.
+        if (i > 0) {
+            ssize_t nbytes_rx = iio_buffer_refill(m_buf_out);
+            if (nbytes_rx < 0) {
+                throw std::runtime_error("OfdmGenerator::process error refilling IIO buffer!");
+            }
+
+            ptrdiff_t p_inc = iio_buffer_step(m_buf_out);
+            if (p_inc != 1) {
+                throw std::runtime_error("OfdmGenerator::process Wrong p_inc");
+            }
+
+            // The FFT Accelerator takes 16-bit I + 16-bit Q, and outputs 32-bit I and 32-bit Q.
+            // The formatconvert will take care of this
+            const uint8_t *fft_out = (const uint8_t*)iio_buffer_first(m_buf_out, m_channel_out);
+            const uint8_t *fft_out_end = (const uint8_t*)iio_buffer_end(m_buf_out);
+            constexpr size_t sizeof_out_iq = sizeof(complexfix_wide);
+            if ((fft_out_end - fft_out) != (ssize_t)(mySpacing * sizeof_out_iq)) {
+                fprintf(stderr, "FFT_OUT: %p %p %zu %zu\n",
+                        fft_out, fft_out_end, (fft_out_end - fft_out),
+                        mySpacing * sizeof_out_iq);
+                throw std::runtime_error("OfdmGenerator::process fft_out length invalid!");
+            }
+
+            memcpy(out, fft_out, mySpacing * sizeof_out_iq);
+
+            out += mySpacing;
+        }
+    }
+
+    ssize_t nbytes_rx = iio_buffer_refill(m_buf_out);
+    if (nbytes_rx < 0) {
+        throw std::runtime_error("OfdmGenerator::process error refilling IIO buffer!");
+    }
+
+    ptrdiff_t p_inc = iio_buffer_step(m_buf_out);
+    if (p_inc != 1) {
+        throw std::runtime_error("OfdmGenerator::process Wrong p_inc");
+    }
+
+    // The FFT Accelerator takes 16-bit I + 16-bit Q, and outputs 32-bit I and 32-bit Q.
+    // The formatconvert will take care of this
+    const uint8_t *fft_out = (const uint8_t*)iio_buffer_first(m_buf_out, m_channel_out);
+    const uint8_t *fft_out_end = (const uint8_t*)iio_buffer_end(m_buf_out);
+    constexpr size_t sizeof_out_iq = sizeof(complexfix_wide);
+    if ((fft_out_end - fft_out) != (ssize_t)(mySpacing * sizeof_out_iq)) {
+        fprintf(stderr, "FFT_OUT: %p %p %zu %zu\n",
+                fft_out, fft_out_end, (fft_out_end - fft_out),
+                mySpacing * sizeof_out_iq);
+        throw std::runtime_error("OfdmGenerator::process fft_out length invalid!");
+    }
+
+    memcpy(out, fft_out, mySpacing * sizeof_out_iq);
+
+    return sizeOut;
+}
+
+#endif // HAVE_DEXTER
diff --git a/src/OfdmGenerator.h b/src/OfdmGenerator.h
index dc1ad46..475b2a4 100644
--- a/src/OfdmGenerator.h
+++ b/src/OfdmGenerator.h
@@ -2,7 +2,7 @@
    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty
    the Queen in Right of Canada (Communications Research Center Canada)
 
-   Copyright (C) 2023
+   Copyright (C) 2024
    Matthias P. Braendli, matthias.braendli@mpb.li
 
     http://opendigitalradio.org
@@ -33,27 +33,30 @@
 #include "ModPlugin.h"
 #include "RemoteControl.h"
 #include "PAPRStats.h"
-#include "fftw3.h"
+#include "kiss_fft.h"
+
 #include <cstddef>
-#include <vector>
-#include <complex>
 #include <atomic>
+#include <fftw3.h>
 
-typedef std::complex<float> complexf;
+#ifdef HAVE_DEXTER
+#   include <iio.h>
+#endif
 
-class OfdmGenerator : public ModCodec, public RemoteControllable
+// Complex Float uses FFTW
+class OfdmGeneratorCF32 : public ModCodec, public RemoteControllable
 {
     public:
-        OfdmGenerator(size_t nbSymbols,
+        OfdmGeneratorCF32(size_t nbSymbols,
                       size_t nbCarriers,
                       size_t spacing,
                       bool& enableCfr,
                       float& cfrClip,
                       float& cfrErrorClip,
                       bool inverse = true);
-        virtual ~OfdmGenerator();
-        OfdmGenerator(const OfdmGenerator&) = delete;
-        OfdmGenerator& operator=(const OfdmGenerator&) = delete;
+        virtual ~OfdmGeneratorCF32();
+        OfdmGeneratorCF32(const OfdmGeneratorCF32&) = delete;
+        OfdmGeneratorCF32& operator=(const OfdmGeneratorCF32&) = delete;
 
         int process(Buffer* const dataIn, Buffer* dataOut) override;
         const char* name() override { return "OfdmGenerator"; }
@@ -107,4 +110,76 @@ class OfdmGenerator : public ModCodec, public RemoteControllable
         std::deque<double> myMERs;
 };
 
+// Fixed point implementation uses KISS FFT with -DFIXED_POINT=32
+class OfdmGeneratorFixed : public ModCodec
+{
+    public:
+        OfdmGeneratorFixed(size_t nbSymbols,
+                      size_t nbCarriers,
+                      size_t spacing,
+                      bool inverse = true);
+        virtual ~OfdmGeneratorFixed();
+        OfdmGeneratorFixed(const OfdmGeneratorFixed&) = delete;
+        OfdmGeneratorFixed& operator=(const OfdmGeneratorFixed&) = delete;
+
+        int process(Buffer* const dataIn, Buffer* dataOut) override;
+        const char* name() override { return "OfdmGenerator"; }
+
+    private:
+        kiss_fft_cfg myKissCfg = nullptr;
+        kiss_fft_cpx *myFftIn, *myFftOut;
+
+        const size_t myNbSymbols;
+        const size_t myNbCarriers;
+        const size_t mySpacing;
+        unsigned myPosSrc;
+        unsigned myPosDst;
+        unsigned myPosSize;
+        unsigned myNegSrc;
+        unsigned myNegDst;
+        unsigned myNegSize;
+        unsigned myZeroDst;
+        unsigned myZeroSize;
+};
+
+#ifdef HAVE_DEXTER
+// The PrecisionWave DEXTER device contains an FFT accelerator in FPGA
+// It only does inverse FFTs
+class OfdmGeneratorDEXTER : public ModCodec
+{
+    public:
+        OfdmGeneratorDEXTER(size_t nbSymbols,
+                      size_t nbCarriers,
+                      size_t spacing);
+        virtual ~OfdmGeneratorDEXTER();
+        OfdmGeneratorDEXTER(const OfdmGeneratorDEXTER&) = delete;
+        OfdmGeneratorDEXTER& operator=(const OfdmGeneratorDEXTER&) = delete;
+
+        int process(Buffer* const dataIn, Buffer* dataOut) override;
+        const char* name() override { return "OfdmGenerator"; }
+
+    private:
+        struct iio_context *m_ctx = nullptr;
 
+        // "in" and "out" are from the point of view of the FFT Accelerator block
+        struct iio_device *m_dev_in = nullptr;
+        struct iio_channel *m_channel_in = nullptr;
+        struct iio_buffer *m_buf_in = nullptr;
+
+        struct iio_device *m_dev_out = nullptr;
+        struct iio_channel *m_channel_out = nullptr;
+        struct iio_buffer *m_buf_out = nullptr;
+
+        const size_t myNbSymbols;
+        const size_t myNbCarriers;
+        const size_t mySpacing;
+        unsigned myPosSrc;
+        unsigned myPosDst;
+        unsigned myPosSize;
+        unsigned myNegSrc;
+        unsigned myNegDst;
+        unsigned myNegSize;
+        unsigned myZeroDst;
+        unsigned myZeroSize;
+};
+#endif // HAVE_DEXTER
diff --git a/src/OutputMemory.cpp b/src/OutputMemory.cpp
index d6ef917..f673555 100644
--- a/src/OutputMemory.cpp
+++ b/src/OutputMemory.cpp
@@ -26,20 +26,14 @@
 
 #include "OutputMemory.h"
 #include "PcDebug.h"
-#include "Log.h"
-#include "TimestampDecoder.h"
-
-#include <stdexcept>
-#include <string.h>
-#include <math.h>
-
+#include <cmath>
 
 OutputMemory::OutputMemory(Buffer* dataOut)
     : ModOutput()
 {
     PDEBUG("OutputMemory::OutputMemory(%p) @ %p\n", dataOut, this);
 
-    setOutput(dataOut);
+    m_dataOut = dataOut;
 
 #if OUTPUT_MEM_HISTOGRAM
     myMax = 0.0f;
@@ -49,7 +43,6 @@ OutputMemory::OutputMemory(Buffer* dataOut)
 #endif
 }
 
-
 OutputMemory::~OutputMemory()
 {
 #if OUTPUT_MEM_HISTOGRAM
@@ -66,19 +59,12 @@ OutputMemory::~OutputMemory()
     PDEBUG("OutputMemory::~OutputMemory() @ %p\n", this);
 }
 
-
-void OutputMemory::setOutput(Buffer* dataOut)
-{
-    myDataOut = dataOut;
-}
-
-
 int OutputMemory::process(Buffer* dataIn)
 {
     PDEBUG("OutputMemory::process(dataIn: %p)\n",
             dataIn);
 
-    *myDataOut = *dataIn;
+    *m_dataOut = *dataIn;
 
 #if OUTPUT_MEM_HISTOGRAM
     const float* in = (const float*)dataIn->getData();
@@ -93,17 +79,17 @@ int OutputMemory::process(Buffer* dataIn)
     }
 #endif
 
-    return myDataOut->getLength();
+    return m_dataOut->getLength();
 }
 
 meta_vec_t OutputMemory::process_metadata(const meta_vec_t& metadataIn)
 {
-    myMetadata = metadataIn;
+    m_metadata = metadataIn;
     return {};
 }
 
 meta_vec_t OutputMemory::get_latest_metadata()
 {
-    return myMetadata;
+    return m_metadata;
 }
 
diff --git a/src/OutputMemory.h b/src/OutputMemory.h
index f0a5fbb..299d31d 100644
--- a/src/OutputMemory.h
+++ b/src/OutputMemory.h
@@ -61,11 +61,9 @@ public:
 
     meta_vec_t get_latest_metadata(void);
 
-    void setOutput(Buffer* dataOut);
-
 protected:
-    Buffer* myDataOut;
-    meta_vec_t myMetadata;
+    Buffer* m_dataOut;
+    meta_vec_t m_metadata;
 
 #if OUTPUT_MEM_HISTOGRAM
     // keep track of max value
diff --git a/src/PAPRStats.cpp b/src/PAPRStats.cpp
index 0c9764a..103f02f 100644
--- a/src/PAPRStats.cpp
+++ b/src/PAPRStats.cpp
@@ -33,7 +33,6 @@
 #  include <iostream>
 #endif
 
-
 PAPRStats::PAPRStats(size_t num_blocks_to_accumulate) :
     m_num_blocks_to_accumulate(num_blocks_to_accumulate)
 {
diff --git a/src/PAPRStats.h b/src/PAPRStats.h
index 86ad8b0..a4ded86 100644
--- a/src/PAPRStats.h
+++ b/src/PAPRStats.h
@@ -31,12 +31,9 @@
 #endif
 
 #include <cstddef>
-#include <vector>
 #include <deque>
 #include <complex>
 
-typedef std::complex<float> complexf;
-
 /* Helper class to calculate Peak-to-average-power ratio.
  * Definition of PAPR:
  *
@@ -53,6 +50,8 @@ typedef std::complex<float> complexf;
  */
 class PAPRStats
 {
+    typedef std::complex<float> complexf;
+
     public:
         PAPRStats(size_t num_blocks_to_accumulate);
 
diff --git a/src/PhaseReference.cpp b/src/PhaseReference.cpp
index 568e15e..71dec87 100644
--- a/src/PhaseReference.cpp
+++ b/src/PhaseReference.cpp
@@ -29,12 +29,10 @@
 
 #include <stdexcept>
 
-using complexf = std::complex<float>;
-
 /* ETSI EN 300 401 Table 43 (Clause 14.3.2)
  * Contains h_{i,k} values
  */
-const uint8_t PhaseReference::d_h[4][32] = {
+static const uint8_t d_h[4][32] = {
     /* h0 */ { 0, 2, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 2, 2, 1, 1,
         0, 2, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 2, 2, 1, 1 },
     /* h1 */ { 0, 3, 2, 3, 0, 1, 3, 0, 2, 1, 2, 3, 2, 3, 3, 0,
@@ -54,41 +52,80 @@ const uint8_t PhaseReference::d_h[4][32] = {
  * Tables 44 to 47 describe the frequency interleaving done in
  * FrequencyInterleaver.
  */
-PhaseReference::PhaseReference(unsigned int dabmode) :
+PhaseReference::PhaseReference(unsigned int dabmode, bool fixedPoint) :
     ModInput(),
-    d_dabmode(dabmode)
+    d_dabmode(dabmode),
+    d_fixedPoint(fixedPoint)
 {
     PDEBUG("PhaseReference::PhaseReference(%u) @ %p\n", dabmode, this);
 
     switch (d_dabmode) {
         case 1:
             d_carriers = 1536;
-            d_num = 2048;
             break;
         case 2:
             d_carriers = 384;
-            d_num = 512;
             break;
         case 3:
             d_carriers = 192;
-            d_num = 256;
             break;
         case 4:
             d_dabmode = 0;
         case 0:
             d_carriers = 768;
-            d_num = 1024;
             break;
         default:
             throw std::runtime_error(
                     "PhaseReference::PhaseReference DAB mode not valid!");
     }
-    d_dataIn.resize(d_carriers);
-    fillData();
+
+    if (d_fixedPoint) {
+        d_phaseRefFixed.fillData(d_dabmode, d_carriers);
+    }
+    else {
+        d_phaseRefCF32.fillData(d_dabmode, d_carriers);
+    }
 }
 
 
-complexf convert(uint8_t data) {
+static const int table[][48][2] = {
+    { // Mode 0/4
+        // Positive part
+        { 0, 0 }, { 3, 1 }, { 2, 0 }, { 1, 2 }, { 0, 0 }, { 3, 1 },
+        { 2, 2 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 3 }, { 1, 0 },
+        // Negative part
+        { 0, 0 }, { 1, 1 }, { 2, 1 }, { 3, 2 }, { 0, 2 }, { 1, 2 },
+        { 2, 0 }, { 3, 3 }, { 0, 3 }, { 1, 1 }, { 2, 3 }, { 3, 2 },
+    },
+    { // Mode 1
+        // Positive part
+        { 0, 3 }, { 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 2 }, { 3, 2 },
+        { 2, 1 }, { 1, 0 }, { 0, 2 }, { 3, 2 }, { 2, 3 }, { 1, 3 },
+        { 0, 0 }, { 3, 2 }, { 2, 1 }, { 1, 3 }, { 0, 3 }, { 3, 3 },
+        { 2, 3 }, { 1, 0 }, { 0, 3 }, { 3, 0 }, { 2, 1 }, { 1, 1 },
+        // Negative part
+        { 0, 1 }, { 1, 2 }, { 2, 0 }, { 3, 1 }, { 0, 3 }, { 1, 2 },
+        { 2, 2 }, { 3, 3 }, { 0, 2 }, { 1, 1 }, { 2, 2 }, { 3, 3 },
+        { 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 3 }, { 0, 2 }, { 1, 2 },
+        { 2, 2 }, { 3, 1 }, { 0, 1 }, { 1, 3 }, { 2, 1 }, { 3, 2 },
+    },
+    { // Mode 2
+        // Positive part
+        { 2, 0 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 0 }, { 1, 3 },
+        // Negative part
+        { 0, 2 }, { 1, 3 }, { 2, 2 }, { 3, 2 }, { 0, 1 }, { 1, 2 },
+    },
+    { // Mode 3
+        // Positive part
+        { 3, 2 }, { 2, 2 }, { 1, 2 },
+        // Negative part
+        { 0, 2 }, { 1, 3 }, { 2, 0 },
+    },
+};
+
+
+template <>
+complexf PhaseRefGen<complexf>::convert(uint8_t data) {
     const complexf value[] = {
         complexf(1, 0),
         complexf(0, 1),
@@ -98,62 +135,37 @@ complexf convert(uint8_t data) {
     return value[data % 4];
 }
 
+template <>
+complexfix PhaseRefGen<complexfix>::convert(uint8_t data) {
+    constexpr auto one = fixed_16{1};
+    constexpr auto zero = fixed_16{0};
 
-void PhaseReference::fillData()
-{
-    const int table[][48][2] = {
-        { // Mode 0/4
-            // Positive part
-            { 0, 0 }, { 3, 1 }, { 2, 0 }, { 1, 2 }, { 0, 0 }, { 3, 1 },
-            { 2, 2 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 3 }, { 1, 0 },
-            // Negative part
-            { 0, 0 }, { 1, 1 }, { 2, 1 }, { 3, 2 }, { 0, 2 }, { 1, 2 },
-            { 2, 0 }, { 3, 3 }, { 0, 3 }, { 1, 1 }, { 2, 3 }, { 3, 2 },
-        },
-        { // Mode 1
-            // Positive part
-            { 0, 3 }, { 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 2 }, { 3, 2 },
-            { 2, 1 }, { 1, 0 }, { 0, 2 }, { 3, 2 }, { 2, 3 }, { 1, 3 },
-            { 0, 0 }, { 3, 2 }, { 2, 1 }, { 1, 3 }, { 0, 3 }, { 3, 3 },
-            { 2, 3 }, { 1, 0 }, { 0, 3 }, { 3, 0 }, { 2, 1 }, { 1, 1 },
-            // Negative part
-            { 0, 1 }, { 1, 2 }, { 2, 0 }, { 3, 1 }, { 0, 3 }, { 1, 2 },
-            { 2, 2 }, { 3, 3 }, { 0, 2 }, { 1, 1 }, { 2, 2 }, { 3, 3 },
-            { 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 3 }, { 0, 2 }, { 1, 2 },
-            { 2, 2 }, { 3, 1 }, { 0, 1 }, { 1, 3 }, { 2, 1 }, { 3, 2 },
-        },
-        { // Mode 2
-            // Positive part
-            { 2, 0 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 0 }, { 1, 3 },
-            // Negative part
-            { 0, 2 }, { 1, 3 }, { 2, 2 }, { 3, 2 }, { 0, 1 }, { 1, 2 },
-        },
-        { // Mode 3
-            // Positive part
-            { 3, 2 }, { 2, 2 }, { 1, 2 },
-            // Negative part
-            { 0, 2 }, { 1, 3 }, { 2, 0 },
-        },
+    const complexfix value[] = {
+        complexfix(one, zero),
+        complexfix(zero, one),
+        complexfix(-one, zero),
+        complexfix(zero, -one),
     };
+    return value[data % 4];
+}
 
-    if (d_dabmode > 3) {
-        throw std::runtime_error(
-                "PhaseReference::fillData invalid DAB mode!");
-    }
-
-    if (d_dataIn.size() != d_carriers) {
+template <typename T>
+void PhaseRefGen<T>::fillData(unsigned int dabmode, size_t carriers)
+{
+    dataIn.resize(carriers);
+    if (dataIn.size() != carriers) {
         throw std::runtime_error(
-                "PhaseReference::fillData d_dataIn has incorrect size!");
+                "PhaseReference::fillData dataIn has incorrect size!");
     }
 
     for (size_t index = 0,
                 offset = 0;
-                index < d_dataIn.size();
+                index < dataIn.size();
                 ++offset) {
         for (size_t k = 0; k < 32; ++k) {
-            d_dataIn[index++] = convert(
-                    d_h[ table[d_dabmode][offset][0] ][k] +
-                    table[d_dabmode][offset][1] );
+            dataIn[index++] = convert(
+                    d_h[ table[dabmode][offset][0] ][k] +
+                    table[dabmode][offset][1] );
         }
     }
 }
@@ -163,7 +175,12 @@ int PhaseReference::process(Buffer* dataOut)
 {
     PDEBUG("PhaseReference::process(dataOut: %p)\n", dataOut);
 
-    dataOut->setData(&d_dataIn[0], d_carriers * sizeof(complexf));
+    if (d_fixedPoint) {
+        dataOut->setData(d_phaseRefFixed.dataIn.data(), d_carriers * sizeof(complexfix));
+    }
+    else {
+        dataOut->setData(d_phaseRefCF32.dataIn.data(), d_carriers * sizeof(complexf));
+    }
 
     return 1;
 }
diff --git a/src/PhaseReference.h b/src/PhaseReference.h
index 6ecdc4e..735009c 100644
--- a/src/PhaseReference.h
+++ b/src/PhaseReference.h
@@ -32,25 +32,33 @@
 
 #include "ModPlugin.h"
 
-#include <cstddef>
-#include <complex>
 #include <vector>
+#include <cstddef>
+
+template <typename T>
+struct PhaseRefGen {
+    std::vector<T> dataIn;
+    void fillData(unsigned int dabmode, size_t carriers);
+
+    private:
+    T convert(uint8_t data);
+};
+
 
 class PhaseReference : public ModInput
 {
     public:
-        PhaseReference(unsigned int dabmode);
+        PhaseReference(unsigned int dabmode, bool fixedPoint);
 
         int process(Buffer* dataOut) override;
         const char* name() override { return "PhaseReference"; }
 
     protected:
         unsigned int d_dabmode;
+        bool d_fixedPoint;
         size_t d_carriers;
-        size_t d_num;
-        const static uint8_t d_h[4][32];
-        std::vector<std::complex<float> > d_dataIn;
 
-        void fillData();
+        PhaseRefGen<complexf> d_phaseRefCF32;
+        PhaseRefGen<complexfix> d_phaseRefFixed;
 };
 
diff --git a/src/QpskSymbolMapper.cpp b/src/QpskSymbolMapper.cpp
index e26853a..c12ad80 100644
--- a/src/QpskSymbolMapper.cpp
+++ b/src/QpskSymbolMapper.cpp
@@ -23,7 +23,6 @@
 #include <cstdio>
 #include <cstring>
 #include <stdexcept>
-#include <complex>
 #include <cmath>
 #ifdef __SSE__
 #   include <xmmintrin.h>
@@ -32,12 +31,10 @@
 #include "QpskSymbolMapper.h"
 #include "PcDebug.h"
 
-
-typedef std::complex<float> complexf;
-
-QpskSymbolMapper::QpskSymbolMapper(size_t carriers) :
+QpskSymbolMapper::QpskSymbolMapper(size_t carriers, bool fixedPoint) :
     ModCodec(),
-    d_carriers(carriers) { }
+    m_fixedPoint(fixedPoint),
+    m_carriers(carriers) { }
 
 int QpskSymbolMapper::process(Buffer* const dataIn, Buffer* dataOut)
 {
@@ -45,112 +42,172 @@ int QpskSymbolMapper::process(Buffer* const dataIn, Buffer* dataOut)
             "(dataIn: %p, dataOut: %p)\n",
             dataIn, dataOut);
 
-    dataOut->setLength(dataIn->getLength() * 4 * 2 * sizeof(float));   // 4 output complex symbols per input byte
-#ifdef __SSE__
-    const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData());
-    __m128* out = reinterpret_cast<__m128*>(dataOut->getData());
-
-    if (dataIn->getLength() % (d_carriers / 4) != 0) {
-        throw std::runtime_error(
-                "QpskSymbolMapper::process input size not valid: " +
-                std::to_string(dataIn->getLength()) +
-                "(input size) % (" + std::to_string(d_carriers) +
-                " (carriers) / 4) != 0");
-    }
+    // 4 output complex symbols per input byte
+
+    if (m_fixedPoint) {
+        dataOut->setLength(dataIn->getLength() * 4 * sizeof(complexfix));
+
+        using fixed_t = complexfix::value_type;
 
-    const static __m128 symbols[16] = {
-        _mm_setr_ps( M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2),
-        _mm_setr_ps( M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2),
-        _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2),
-        _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2),
-        _mm_setr_ps( M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2),
-        _mm_setr_ps( M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
-        _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2),
-        _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
-        _mm_setr_ps(-M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2),
-        _mm_setr_ps(-M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2),
-        _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2),
-        _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2),
-        _mm_setr_ps(-M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2),
-        _mm_setr_ps(-M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
-        _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2),
-        _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2)
-    };
-    size_t inOffset = 0;
-    size_t outOffset = 0;
-    uint8_t tmp = 0;
-    for (size_t i = 0; i < dataIn->getLength(); i += d_carriers / 4) {
-        for (size_t j = 0; j < d_carriers / 8; ++j) {
-            tmp =  (in[inOffset] & 0xc0) >> 4;
-            tmp |= (in[inOffset + (d_carriers / 8)] & 0xc0) >> 6;
-            out[outOffset] = symbols[tmp];
-            tmp =  (in[inOffset] & 0x30) >> 2;
-            tmp |= (in[inOffset + (d_carriers / 8)] & 0x30) >> 4;
-            out[outOffset + 1] = symbols[tmp];
-            tmp =  (in[inOffset] & 0x0c);
-            tmp |= (in[inOffset + (d_carriers / 8)] & 0x0c) >> 2;
-            out[outOffset + 2] = symbols[tmp];
-            tmp =  (in[inOffset] & 0x03) << 2;
-            tmp |= (in[inOffset + (d_carriers / 8)] & 0x03);
-            out[outOffset + 3] = symbols[tmp];
-            ++inOffset;
-            outOffset += 4;
+        const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData());
+        fixed_t* out = reinterpret_cast<fixed_t*>(dataOut->getData());
+
+        if (dataIn->getLength() % (m_carriers / 4) != 0) {
+            throw std::runtime_error(
+                    "QpskSymbolMapper::process input size not valid!");
+        }
+
+        constexpr fixed_t v = static_cast<fixed_t>(M_SQRT1_2);
+
+        const static fixed_t symbols[16][4] = {
+            { v,  v,  v,  v},
+            { v,  v,  v, -v},
+            { v, -v,  v,  v},
+            { v, -v,  v, -v},
+            { v,  v, -v,  v},
+            { v,  v, -v, -v},
+            { v, -v, -v,  v},
+            { v, -v, -v, -v},
+            {-v,  v,  v,  v},
+            {-v,  v,  v, -v},
+            {-v, -v,  v,  v},
+            {-v, -v,  v, -v},
+            {-v,  v, -v,  v},
+            {-v,  v, -v, -v},
+            {-v, -v, -v,  v},
+            {-v, -v, -v, -v}
+        };
+        size_t inOffset = 0;
+        size_t outOffset = 0;
+        uint8_t tmp;
+        for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) {
+            for (size_t j = 0; j < m_carriers / 8; ++j) {
+                tmp =  (in[inOffset] & 0xc0) >> 4;
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6;
+                memcpy(&out[outOffset], symbols[tmp], sizeof(fixed_t) * 4);
+                tmp =  (in[inOffset] & 0x30) >> 2;
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4;
+                memcpy(&out[outOffset + 4], symbols[tmp], sizeof(fixed_t) * 4);
+                tmp =  (in[inOffset] & 0x0c);
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2;
+                memcpy(&out[outOffset + 8], symbols[tmp], sizeof(fixed_t) * 4);
+                tmp =  (in[inOffset] & 0x03) << 2;
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0x03);
+                memcpy(&out[outOffset + 12], symbols[tmp], sizeof(fixed_t) * 4);
+                ++inOffset;
+                outOffset += 4*4;
+            }
+            inOffset += m_carriers / 8;
         }
-        inOffset += d_carriers / 8;
     }
+    else {
+        dataOut->setLength(dataIn->getLength() * 4 * sizeof(complexf));
+#ifdef __SSE__
+        const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData());
+        __m128* out = reinterpret_cast<__m128*>(dataOut->getData());
+
+        if (dataIn->getLength() % (m_carriers / 4) != 0) {
+            throw std::runtime_error(
+                    "QpskSymbolMapper::process input size not valid: " +
+                    std::to_string(dataIn->getLength()) +
+                    "(input size) % (" + std::to_string(m_carriers) +
+                    " (carriers) / 4) != 0");
+        }
+
+        const static __m128 symbols[16] = {
+            _mm_setr_ps( M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2),
+            _mm_setr_ps( M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2),
+            _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2),
+            _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2),
+            _mm_setr_ps( M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2),
+            _mm_setr_ps( M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
+            _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2),
+            _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
+            _mm_setr_ps(-M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2),
+            _mm_setr_ps(-M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2),
+            _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2),
+            _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2),
+            _mm_setr_ps(-M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2),
+            _mm_setr_ps(-M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
+            _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2),
+            _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2)
+        };
+        size_t inOffset = 0;
+        size_t outOffset = 0;
+        uint8_t tmp = 0;
+        for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) {
+            for (size_t j = 0; j < m_carriers / 8; ++j) {
+                tmp =  (in[inOffset] & 0xc0) >> 4;
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6;
+                out[outOffset] = symbols[tmp];
+                tmp =  (in[inOffset] & 0x30) >> 2;
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4;
+                out[outOffset + 1] = symbols[tmp];
+                tmp =  (in[inOffset] & 0x0c);
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2;
+                out[outOffset + 2] = symbols[tmp];
+                tmp =  (in[inOffset] & 0x03) << 2;
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0x03);
+                out[outOffset + 3] = symbols[tmp];
+                ++inOffset;
+                outOffset += 4;
+            }
+            inOffset += m_carriers / 8;
+        }
 #else // !__SSE__
-    const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData());
-    float* out = reinterpret_cast<float*>(dataOut->getData());
-    if (dataIn->getLength() % (d_carriers / 4) != 0) {
-        throw std::runtime_error(
-                "QpskSymbolMapper::process input size not valid!");
-    }
-    if (dataOut->getLength() / sizeof(float) != dataIn->getLength() * 4 * 2) {    // 4 output complex symbols per input byte
-        throw std::runtime_error(
-                "QpskSymbolMapper::process output size not valid!");
-    }
+        const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData());
+        float* out = reinterpret_cast<float*>(dataOut->getData());
+        if (dataIn->getLength() % (m_carriers / 4) != 0) {
+            throw std::runtime_error(
+                    "QpskSymbolMapper::process input size not valid!");
+        }
+        if (dataOut->getLength() / sizeof(float) != dataIn->getLength() * 4 * 2) {    // 4 output complex symbols per input byte
+            throw std::runtime_error(
+                    "QpskSymbolMapper::process output size not valid!");
+        }
 
-    const static float symbols[16][4] = {
-        { M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2},
-        { M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2},
-        { M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2},
-        { M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2},
-        { M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2},
-        { M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
-        { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2},
-        { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
-        {-M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2},
-        {-M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2},
-        {-M_SQRT1_2,- M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2},
-        {-M_SQRT1_2,- M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2},
-        {-M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2},
-        {-M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
-        {-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2},
-        {-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}
-    };
-    size_t inOffset = 0;
-    size_t outOffset = 0;
-    uint8_t tmp;
-    for (size_t i = 0; i < dataIn->getLength(); i += d_carriers / 4) {
-        for (size_t j = 0; j < d_carriers / 8; ++j) {
-            tmp =  (in[inOffset] & 0xc0) >> 4;
-            tmp |= (in[inOffset + (d_carriers / 8)] & 0xc0) >> 6;
-            memcpy(&out[outOffset], symbols[tmp], sizeof(float) * 4);
-            tmp =  (in[inOffset] & 0x30) >> 2;
-            tmp |= (in[inOffset + (d_carriers / 8)] & 0x30) >> 4;
-            memcpy(&out[outOffset + 4], symbols[tmp], sizeof(float) * 4);
-            tmp =  (in[inOffset] & 0x0c);
-            tmp |= (in[inOffset + (d_carriers / 8)] & 0x0c) >> 2;
-            memcpy(&out[outOffset + 8], symbols[tmp], sizeof(float) * 4);
-            tmp =  (in[inOffset] & 0x03) << 2;
-            tmp |= (in[inOffset + (d_carriers / 8)] & 0x03);
-            memcpy(&out[outOffset + 12], symbols[tmp], sizeof(float) * 4);
-            ++inOffset;
-            outOffset += 4*4;
+        const static float symbols[16][4] = {
+            { M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2},
+            { M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2},
+            { M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2},
+            { M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2},
+            { M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2},
+            { M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
+            { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2},
+            { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
+            {-M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2},
+            {-M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2},
+            {-M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2,  M_SQRT1_2},
+            {-M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2},
+            {-M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2},
+            {-M_SQRT1_2,  M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
+            {-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2,  M_SQRT1_2},
+            {-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}
+        };
+        size_t inOffset = 0;
+        size_t outOffset = 0;
+        uint8_t tmp;
+        for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) {
+            for (size_t j = 0; j < m_carriers / 8; ++j) {
+                tmp =  (in[inOffset] & 0xc0) >> 4;
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6;
+                memcpy(&out[outOffset], symbols[tmp], sizeof(float) * 4);
+                tmp =  (in[inOffset] & 0x30) >> 2;
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4;
+                memcpy(&out[outOffset + 4], symbols[tmp], sizeof(float) * 4);
+                tmp =  (in[inOffset] & 0x0c);
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2;
+                memcpy(&out[outOffset + 8], symbols[tmp], sizeof(float) * 4);
+                tmp =  (in[inOffset] & 0x03) << 2;
+                tmp |= (in[inOffset + (m_carriers / 8)] & 0x03);
+                memcpy(&out[outOffset + 12], symbols[tmp], sizeof(float) * 4);
+                ++inOffset;
+                outOffset += 4*4;
+            }
+            inOffset += m_carriers / 8;
         }
-        inOffset += d_carriers / 8;
-    }
 #endif // __SSE__
+    }
 
     return 1;
 }
diff --git a/src/QpskSymbolMapper.h b/src/QpskSymbolMapper.h
index dbcf4dd..6cf7a2e 100644
--- a/src/QpskSymbolMapper.h
+++ b/src/QpskSymbolMapper.h
@@ -31,12 +31,13 @@
 class QpskSymbolMapper : public ModCodec
 {
 public:
-    QpskSymbolMapper(size_t carriers);
+    QpskSymbolMapper(size_t carriers, bool fixedPoint);
 
     int process(Buffer* const dataIn, Buffer* dataOut);
     const char* name() { return "QpskSymbolMapper"; }
 
 protected:
-    size_t d_carriers;
+    bool m_fixedPoint;
+    size_t m_carriers;
 };
 
diff --git a/src/Resampler.h b/src/Resampler.h
index d1a9f7a..2c810f6 100644
--- a/src/Resampler.h
+++ b/src/Resampler.h
@@ -37,9 +37,6 @@
 #define FFT_TYPE fftwf_complex
 #define FFT_PLAN fftwf_plan
 
-#include <complex>
-typedef std::complex<float> complexf;
-
 
 class Resampler : public ModCodec
 {
diff --git a/src/SignalMultiplexer.cpp b/src/SignalMultiplexer.cpp
index 1d95bdd..d4955d0 100644
--- a/src/SignalMultiplexer.cpp
+++ b/src/SignalMultiplexer.cpp
@@ -22,25 +22,20 @@
 #include "SignalMultiplexer.h"
 #include "PcDebug.h"
 
-#include <stdio.h>
-#include <stdexcept>
+#include <cstdio>
 #include <assert.h>
-#include <string.h>
 
 
-SignalMultiplexer::SignalMultiplexer(size_t framesize) :
-    ModMux(),
-    d_frameSize(framesize)
+SignalMultiplexer::SignalMultiplexer() :
+    ModMux()
 {
-    PDEBUG("SignalMultiplexer::SignalMultiplexer(%zu) @ %p\n", framesize, this);
-
+    PDEBUG("SignalMultiplexer::SignalMultiplexer() @ %p\n", this);
 }
 
 
 SignalMultiplexer::~SignalMultiplexer()
 {
     PDEBUG("SignalMultiplexer::~SignalMultiplexer() @ %p\n", this);
-
 }
 
 
diff --git a/src/SignalMultiplexer.h b/src/SignalMultiplexer.h
index 5186a8d..1f6bc12 100644
--- a/src/SignalMultiplexer.h
+++ b/src/SignalMultiplexer.h
@@ -36,7 +36,7 @@
 class SignalMultiplexer : public ModMux
 {
 public:
-    SignalMultiplexer(size_t frameSize);
+    SignalMultiplexer();
     virtual ~SignalMultiplexer();
     SignalMultiplexer(const SignalMultiplexer&);
     SignalMultiplexer& operator=(const SignalMultiplexer&);
@@ -44,8 +44,5 @@ public:
 
     int process(std::vector<Buffer*> dataIn, Buffer* dataOut);
     const char* name() { return "SignalMultiplexer"; }
-
-protected:
-    size_t d_frameSize;
 };
 
diff --git a/src/TII.cpp b/src/TII.cpp
index 2656cbf..bce15aa 100644
--- a/src/TII.cpp
+++ b/src/TII.cpp
@@ -2,7 +2,7 @@
    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty
    the Queen in Right of Canada (Communications Research Center Canada)
 
-   Copyright (C) 2023
+   Copyright (C) 2024
    Matthias P. Braendli, matthias.braendli@mpb.li
 
     http://opendigitalradio.org
@@ -27,11 +27,8 @@
 #include "TII.h"
 #include "PcDebug.h"
 
-#include <stdio.h>
-#include <stdexcept>
-#include <string.h>
-
-typedef std::complex<float> complexf;
+#include <cstdio>
+#include <cstring>
 
 /* TII pattern for TM I, II, IV */
 const int pattern_tm1_2_4[][8] = { // {{{
@@ -106,11 +103,12 @@ const int pattern_tm1_2_4[][8] = { // {{{
     {1,1,1,0,1,0,0,0},
     {1,1,1,1,0,0,0,0} }; // }}}
 
-TII::TII(unsigned int dabmode, tii_config_t& tii_config) :
+TII::TII(unsigned int dabmode, tii_config_t& tii_config, bool fixedPoint) :
     ModCodec(),
     RemoteControllable("tii"),
     m_dabmode(dabmode),
-    m_conf(tii_config)
+    m_conf(tii_config),
+    m_fixedPoint(fixedPoint)
 {
     PDEBUG("TII::TII(%u) @ %p\n", dabmode, this);
 
@@ -171,56 +169,72 @@ const char* TII::name()
     return m_name.c_str();
 }
 
+template<typename T>
+void do_process(size_t carriers, bool old_variant, const std::vector<bool>& Acp, Buffer* dataIn, Buffer* dataOut)
+{
+    const T* in = reinterpret_cast<const T*>(dataIn->getData());
+    T* out = reinterpret_cast<T*>(dataOut->getData());
+
+    /* Normalise the TII carrier power according to ETSI TR 101 496-3
+     * Clause 5.4.2.2 Paragraph 7:
+     *
+     * > The ratio of carriers in a TII symbol to a normal DAB symbol
+     * > is 1:48 for all Modes, so that the signal power in a TII symbol is
+     * > 16 dB below the signal power of the other symbols.
+     *
+     * This is because we only enable 32 out of 1536 carriers, not because
+     * every carrier is lower power.
+     */
+    for (size_t i = 0; i < Acp.size(); i++) {
+        /* See header file for an explanation of the old variant.
+         *
+         * A_{c,p}(k) and A_{c,p}(k-1) are never both simultaneously true,
+         * so instead of doing the sum inside z_{m,0,k}, we could do
+         *
+         * if (m_Acp[i]) out[i] = in[i];
+         * if (m_Acp[i-1]) out[i] = in[i-1]
+         *
+         * (Considering only the new variant)
+         *
+         * To avoid messing with indices, we substitute j = i-1
+         *
+         * if (m_Acp[i]) out[i] = in[i];
+         * if (m_Acp[j]) out[j+1] = in[j]
+         *
+         * and fuse the two conditionals together:
+         */
+        if (Acp[i]) {
+            out[i] = in[i];
+            out[i+1] = (old_variant ? in[i+1] : in[i]);
+        }
+    }
+}
 
 int TII::process(Buffer* dataIn, Buffer* dataOut)
 {
+    const size_t sizeof_samples = m_fixedPoint ? sizeof(complexfix) : sizeof(complexf);
+
     PDEBUG("TII::process(dataOut: %p)\n",
             dataOut);
     if (    (dataIn == NULL) or
-            (dataIn->getLength() != m_carriers * sizeof(complexf))) {
+            (dataIn->getLength() != m_carriers * sizeof_samples)) {
         throw TIIError("TII::process input size not valid!");
     }
 
-    dataOut->setLength(m_carriers * sizeof(complexf));
-    memset(dataOut->getData(), 0,  dataOut->getLength());
+    dataOut->setLength(m_carriers * sizeof_samples);
+    memset(dataOut->getData(), 0, dataOut->getLength());
 
     if (m_conf.enable and m_insert) {
         std::lock_guard<std::mutex> lock(m_enabled_carriers_mutex);
-        complexf* in = reinterpret_cast<complexf*>(dataIn->getData());
-        complexf* out = reinterpret_cast<complexf*>(dataOut->getData());
-
-        /* Normalise the TII carrier power according to ETSI TR 101 496-3
-         * Clause 5.4.2.2 Paragraph 7:
-         *
-         * > The ratio of carriers in a TII symbol to a normal DAB symbol
-         * > is 1:48 for all Modes, so that the signal power in a TII symbol is
-         * > 16 dB below the signal power of the other symbols.
-         *
-         * This is because we only enable 32 out of 1536 carriers, not because
-         * every carrier is lower power.
-         */
-        for (size_t i = 0; i < m_Acp.size(); i++) {
-            /* See header file for an explanation of the old variant.
-             *
-             * A_{c,p}(k) and A_{c,p}(k-1) are never both simultaneously true,
-             * so instead of doing the sum inside z_{m,0,k}, we could do
-             *
-             * if (m_Acp[i]) out[i] = in[i];
-             * if (m_Acp[i-1]) out[i] = in[i-1]
-             *
-             * (Considering only the new variant)
-             *
-             * To avoid messing with indices, we substitute j = i-1
-             *
-             * if (m_Acp[i]) out[i] = in[i];
-             * if (m_Acp[j]) out[j+1] = in[j]
-             *
-             * and fuse the two conditionals together:
-             */
-            if (m_Acp[i]) {
-                out[i] = in[i];
-                out[i+1] = (m_conf.old_variant ? in[i+1] : in[i]);
-            }
+        if (m_fixedPoint) {
+            do_process<complexfix>(
+                    m_carriers, m_conf.old_variant, m_Acp,
+                    dataIn, dataOut);
+        }
+        else {
+            do_process<complexf>(
+                    m_carriers, m_conf.old_variant, m_Acp,
+                    dataIn, dataOut);
         }
     }
 
diff --git a/src/TII.h b/src/TII.h
index f6de70b..6fe4d4f 100644
--- a/src/TII.h
+++ b/src/TII.h
@@ -2,7 +2,7 @@
    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty
    the Queen in Right of Canada (Communications Research Center Canada)
 
-   Copyright (C) 2023
+   Copyright (C) 2024
    Matthias P. Braendli, matthias.braendli@mpb.li
 
     http://opendigitalradio.org
@@ -36,8 +36,6 @@
 #include "RemoteControl.h"
 
 #include <cstddef>
-#include <thread>
-#include <complex>
 #include <vector>
 #include <string>
 
@@ -81,7 +79,7 @@ class TIIError : public std::runtime_error {
 class TII : public ModCodec, public RemoteControllable
 {
     public:
-        TII(unsigned int dabmode, tii_config_t& tii_config);
+        TII(unsigned int dabmode, tii_config_t& tii_config, bool fixedPoint);
         virtual ~TII() {}
 
         int process(Buffer* dataIn, Buffer* dataOut) override;
@@ -106,6 +104,8 @@ class TII : public ModCodec, public RemoteControllable
         // Remote-controllable settings
         tii_config_t& m_conf;
 
+        bool m_fixedPoint = false;
+
         // Internal flag when to insert TII
         bool m_insert = true;
 
diff --git a/src/Utils.cpp b/src/Utils.cpp
index fa2fd5d..f947acd 100644
--- a/src/Utils.cpp
+++ b/src/Utils.cpp
@@ -66,6 +66,9 @@ static void printHeader()
 #if defined(__SSE__)
         "SSE " <<
 #endif
+#if defined(__ARM_NEON)
+        "NEON " <<
+#endif
         "\n";
 }
 
diff --git a/src/output/Dexter.h b/src/output/Dexter.h
index d4f425f..f8a17ba 100644
--- a/src/output/Dexter.h
+++ b/src/output/Dexter.h
@@ -98,16 +98,16 @@ class Dexter : public Output::SDRDevice
 
         SDRDeviceConfig& m_conf;
 
-        struct iio_context* m_ctx = nullptr;
-        struct iio_device* m_dexter_dsp_tx = nullptr;
+        struct iio_context *m_ctx = nullptr;
+        struct iio_device *m_dexter_dsp_tx = nullptr;
 
-        struct iio_device* m_ad9957 = nullptr;
-        struct iio_device* m_ad9957_tx0 = nullptr;
-        struct iio_channel* m_tx_channel = nullptr;
+        struct iio_device *m_ad9957 = nullptr;
+        struct iio_device *m_ad9957_tx0 = nullptr;
+        struct iio_channel *m_tx_channel = nullptr;
         struct iio_buffer *m_buffer = nullptr;
 
         /* Underflows are counted in a separate thread */
-        struct iio_context* m_underflow_ctx = nullptr;
+        struct iio_context *m_underflow_ctx = nullptr;
         std::atomic<bool> m_running = ATOMIC_VAR_INIT(false);
         std::thread m_underflow_read_thread;
         void underflow_read_process();
diff --git a/src/output/SDR.cpp b/src/output/SDR.cpp
index 594171f..22398c7 100644
--- a/src/output/SDR.cpp
+++ b/src/output/SDR.cpp
@@ -34,6 +34,7 @@
 #include "RemoteControl.h"
 #include "Utils.h"
 
+#include <chrono>
 #include <cmath>
 #include <iostream>
 #include <assert.h>
diff --git a/src/output/SDR.h b/src/output/SDR.h
index 960de0c..86bf295 100644
--- a/src/output/SDR.h
+++ b/src/output/SDR.h
@@ -34,16 +34,12 @@ DESCRIPTION:
 #   include <config.h>
 #endif
 
-#include <chrono>
 #include "ModPlugin.h"
-#include "EtiReader.h"
 #include "output/SDRDevice.h"
 #include "output/Feedback.h"
 
 namespace Output {
 
-using complexf = std::complex<float>;
-
 class SDR : public ModOutput, public ModMetadata, public RemoteControllable {
     public:
         SDR(SDRDeviceConfig& config, std::shared_ptr<SDRDevice> device);
diff --git a/src/output/SDRDevice.h b/src/output/SDRDevice.h
index 378829c..ec9373d 100644
--- a/src/output/SDRDevice.h
+++ b/src/output/SDRDevice.h
@@ -38,9 +38,7 @@ DESCRIPTION:
 #include <string>
 #include <vector>
 #include <complex>
-#include <variant>
 #include <optional>
-#include <unordered_map>
 
 #include "TimestampDecoder.h"
 
@@ -59,6 +57,8 @@ struct SDRDeviceConfig {
     std::string tx_antenna;
     std::string rx_antenna;
 
+    bool fixedPoint = false;
+
     long masterClockRate = 32768000;
     unsigned sampleRate = 2048000;
     double frequency = 0.0;
diff --git a/src/output/UHD.cpp b/src/output/UHD.cpp
index e097692..b30f9e1 100644
--- a/src/output/UHD.cpp
+++ b/src/output/UHD.cpp
@@ -31,10 +31,7 @@
 //#define MDEBUG(fmt, args...) fprintf(LOG, fmt , ## args)
 #define MDEBUG(fmt, args...)
 
-#include "PcDebug.h"
 #include "Log.h"
-#include "RemoteControl.h"
-#include "Utils.h"
 
 #include <thread>
 #include <iomanip>
@@ -52,14 +49,12 @@
 # include <uhd/utils/thread_priority.hpp>
 #endif
 
-
-#include <cmath>
 #include <iostream>
-#include <assert.h>
+#include <cmath>
+#include <cassert>
 #include <stdexcept>
-#include <stdio.h>
+#include <cstdio>
 #include <time.h>
-#include <errno.h>
 #include <unistd.h>
 #include <pthread.h>
 
@@ -235,7 +230,8 @@ UHD::UHD(SDRDeviceConfig& config) :
     m_usrp->set_rx_gain(m_conf.rxgain);
     etiLog.log(debug, "OutputUHD:Actual RX Gain: %f", m_usrp->get_rx_gain());
 
-    const uhd::stream_args_t stream_args("fc32"); //complex floats
+    const uhd::stream_args_t stream_args(
+            m_conf.fixedPoint ? "sc16" : "fc32");
     m_rx_stream = m_usrp->get_rx_stream(stream_args);
     m_tx_stream = m_usrp->get_tx_stream(stream_args);
 
@@ -319,8 +315,9 @@ double UHD::get_bandwidth(void) const
 void UHD::transmit_frame(struct FrameData&& frame)
 {
     const double tx_timeout = 20.0;
-    const size_t sizeIn = frame.buf.size() / sizeof(complexf);
-    const complexf* in_data = reinterpret_cast<const complexf*>(&frame.buf[0]);
+
+    const size_t sample_size = m_conf.fixedPoint ? (2 * sizeof(int16_t)) : sizeof(complexf);
+    const size_t sizeIn = frame.buf.size() / sample_size;
 
     uhd::tx_metadata_t md_tx;
 
@@ -353,9 +350,9 @@ void UHD::transmit_frame(struct FrameData&& frame)
                 samps_to_send <= usrp_max_num_samps );
         m_require_timestamp_refresh = false;
 
-        //send a single packet
+        // send a single packet
         size_t num_tx_samps = m_tx_stream->send(
-                &in_data[num_acc_samps],
+                frame.buf.data() + sample_size * num_acc_samps,
                 samps_to_send, md_tx, tx_timeout);
         etiLog.log(trace, "UHD,sent %zu of %zu", num_tx_samps, samps_to_send);
 
diff --git a/src/output/UHD.h b/src/output/UHD.h
index 9891c7a..c4f1a45 100644
--- a/src/output/UHD.h
+++ b/src/output/UHD.h
@@ -45,12 +45,9 @@ DESCRIPTION:
 #include <atomic>
 #include <thread>
 
-#include "Log.h"
 #include "output/SDR.h"
 #include "output/USRPTime.h"
 #include "TimestampDecoder.h"
-#include "RemoteControl.h"
-#include "ThreadsafeQueue.h"
 
 #include <stdio.h>
 #include <sys/types.h>