aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile.am21
-rw-r--r--doc/example.ini2
-rw-r--r--fpm/LICENSE21
-rw-r--r--fpm/README.md48
-rw-r--r--fpm/fixed.hpp490
-rw-r--r--fpm/ios.hpp740
-rw-r--r--fpm/math.hpp684
-rw-r--r--kiss/CHANGELOG123
-rw-r--r--kiss/COPYING11
-rw-r--r--kiss/README.md245
-rw-r--r--kiss/_kiss_fft_guts.h167
-rw-r--r--kiss/kfc.c109
-rw-r--r--kiss/kfc.h54
-rw-r--r--kiss/kiss_fft.c420
-rw-r--r--kiss/kiss_fft.h160
-rw-r--r--kiss/kiss_fft_log.h36
-rw-r--r--kiss/kiss_fftnd.c188
-rw-r--r--kiss/kiss_fftnd.h26
-rw-r--r--kiss/kiss_fftndr.c120
-rw-r--r--kiss/kiss_fftndr.h55
-rw-r--r--kiss/kiss_fftr.c155
-rw-r--r--kiss/kiss_fftr.h54
-rw-r--r--m4/ax_cxx_compile_stdcxx.m489
-rw-r--r--src/Buffer.h10
-rw-r--r--src/CicEqualizer.h8
-rw-r--r--src/ConfigParser.cpp24
-rw-r--r--src/ConfigParser.h8
-rw-r--r--src/DabMod.cpp31
-rw-r--r--src/DabModulator.cpp97
-rw-r--r--src/DabModulator.h4
-rw-r--r--src/DifferentialModulator.cpp77
-rw-r--r--src/DifferentialModulator.h5
-rw-r--r--src/FIRFilter.h7
-rw-r--r--src/Flowgraph.cpp10
-rw-r--r--src/FormatConverter.cpp170
-rw-r--r--src/FormatConverter.h12
-rw-r--r--src/FrameMultiplexer.cpp12
-rw-r--r--src/FrequencyInterleaver.cpp90
-rw-r--r--src/FrequencyInterleaver.h9
-rw-r--r--src/GainControl.h4
-rw-r--r--src/GuardIntervalInserter.cpp324
-rw-r--r--src/GuardIntervalInserter.h32
-rw-r--r--src/MemlessPoly.h5
-rw-r--r--src/ModPlugin.h2
-rw-r--r--src/NullSymbol.cpp18
-rw-r--r--src/NullSymbol.h6
-rw-r--r--src/OfdmGenerator.cpp387
-rw-r--r--src/OfdmGenerator.h95
-rw-r--r--src/OutputMemory.cpp26
-rw-r--r--src/OutputMemory.h6
-rw-r--r--src/PAPRStats.cpp1
-rw-r--r--src/PAPRStats.h5
-rw-r--r--src/PhaseReference.cpp135
-rw-r--r--src/PhaseReference.h22
-rw-r--r--src/QpskSymbolMapper.cpp267
-rw-r--r--src/QpskSymbolMapper.h5
-rw-r--r--src/Resampler.h3
-rw-r--r--src/SignalMultiplexer.cpp13
-rw-r--r--src/SignalMultiplexer.h5
-rw-r--r--src/TII.cpp106
-rw-r--r--src/TII.h8
-rw-r--r--src/Utils.cpp3
-rw-r--r--src/output/Dexter.h12
-rw-r--r--src/output/SDR.cpp1
-rw-r--r--src/output/SDR.h4
-rw-r--r--src/output/SDRDevice.h4
-rw-r--r--src/output/UHD.cpp23
-rw-r--r--src/output/UHD.h3
68 files changed, 5370 insertions, 747 deletions
diff --git a/Makefile.am b/Makefile.am
index d29b530..87d553a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -35,10 +35,11 @@ endif
bin_PROGRAMS = odr-dabmod
-odr_dabmod_CFLAGS = -Wall -Isrc -Ilib \
- $(GITVERSION_FLAGS)
-odr_dabmod_CXXFLAGS = -Wall -Isrc -Ilib \
- $(GITVERSION_FLAGS) $(BOOST_CPPFLAGS)
+KISS_FLAGS=-DFIXED_POINT=16
+odr_dabmod_CFLAGS = -Wall -Isrc -Ilib -Ikiss \
+ $(GITVERSION_FLAGS) $(KISS_FLAGS)
+odr_dabmod_CXXFLAGS = -Wall -Isrc -Ilib -Ikiss \
+ $(GITVERSION_FLAGS) $(BOOST_CPPFLAGS) $(KISS_FLAGS)
odr_dabmod_LDADD = $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB) $(UHD_LIBS) $(LIMESDR_LIBS) $(ADDITIONAL_UHD_LIBS)
odr_dabmod_SOURCES = src/DabMod.cpp \
src/PcDebug.h \
@@ -175,7 +176,17 @@ odr_dabmod_SOURCES = src/DabMod.cpp \
src/PAPRStats.cpp \
src/PAPRStats.h \
src/TII.cpp \
- src/TII.h
+ src/TII.h \
+ kiss/kfc.h \
+ kiss/kfc.c \
+ kiss/kiss_fft.c \
+ kiss/kiss_fft.h \
+ kiss/kiss_fftnd.c \
+ kiss/kiss_fftnd.h \
+ kiss/kiss_fftndr.c \
+ kiss/kiss_fftndr.h \
+ kiss/kiss_fftr.c \
+ kiss/kiss_fftr.h
man_MANS = man/odr-dabmod.1
diff --git a/doc/example.ini b/doc/example.ini
index eda50a5..0d0f8e3 100644
--- a/doc/example.ini
+++ b/doc/example.ini
@@ -103,6 +103,8 @@ gainmode=var
; If not defined, use Transmission Mode 1
;mode=1
+fixed_point=1
+
; The digital gain is a value that is multiplied to each sample. It is used
; to tune the chain to make sure that no non-linearities appear up to the
; USRP daughterboard programmable gain amplifier (PGA).
diff --git a/fpm/LICENSE b/fpm/LICENSE
new file mode 100644
index 0000000..bb86b71
--- /dev/null
+++ b/fpm/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Mike Lankamp
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/fpm/README.md b/fpm/README.md
new file mode 100644
index 0000000..38ee444
--- /dev/null
+++ b/fpm/README.md
@@ -0,0 +1,48 @@
+# fpm
+A C++ header-only fixed-point math library. "fpm" stands for "fixed-point math".
+
+It is designed to serve as a drop-in replacement for floating-point types and aims to provide as much of the standard library's functionality as possible with exclusively integers. `fpm` requires C++11 or higher.
+
+[![Build Status](https://travis-ci.org/MikeLankamp/fpm.svg?branch=master)](https://travis-ci.org/MikeLankamp/fpm)
+[![Build status](https://ci.appveyor.com/api/projects/status/0velpwqk38spu412?svg=true)](https://ci.appveyor.com/project/MikeLankamp/fpm)
+
+`fpm` is designed to guard against accidental conversion to and from floats and supports many of the standard C++ maths functions, including trigonometry, power and logarithmic functions, with performance and accuracy generally comparable to alternative libraries.
+
+## Why use fixed-point math?
+There are several reasons why you can not or choose not to use floating-point math, but still want a similar type:
+* Your target platform lacks an FPU, does not support floating-point operations or its floating-point operations are
+ considerably slower than fixed-point integer operations.
+* You require deterministic calculations.
+
+If any of these reasons apply for you, and your problem domain has a clearly outlined range and required resolution,
+then fixed-point numbers might be a solution for you.
+
+## Quick Start
+To use `fpm`, include its header `<fpm/fixed.hpp>` and use the `fpm::fixed_16_16`, `fpm::fixed_24_8` or `fpm::fixed_8_24`
+types as if they were native floating-pointer types:
+```c++
+#include <fpm/fixed.hpp> // For fpm::fixed_16_16
+#include <fpm/math.hpp> // For fpm::cos
+#include <fpm/ios.hpp> // For fpm::operator<<
+#include <iostream> // For std::cin, std::cout
+
+int main() {
+ std::cout << "Please input a number: ";
+ fpm::fixed_16_16 x;
+ std::cin >> x;
+ std::cout << "The cosine of " << x << " radians is: " << cos(x) << std::endl;
+ return 0;
+}
+```
+
+To use the fixed-point equivalents of the `<math.h>` functions such as `sqrt`, `sin` and `log`, include the header `<fpm/math.hpp>`.
+To stream fixed-point values to or from streams, include the header `<fpm/ios.hpp>`.
+
+## Documentation
+Please refer to the [documentation](docs/index.md) for detailed information how to use `fpm`, or skip straight to the [performance](docs/performance.md) or [accuracy](docs/accuracy.md) results.
+
+## Contributions
+This library is a work-in-progress. We welcome any contributions that improve the functional coverage or the performance or accuracy of the mathematical functions.
+
+## License
+See the [LICENSE](LICENSE) file
diff --git a/fpm/fixed.hpp b/fpm/fixed.hpp
new file mode 100644
index 0000000..e2e71bf
--- /dev/null
+++ b/fpm/fixed.hpp
@@ -0,0 +1,490 @@
+#ifndef FPM_FIXED_HPP
+#define FPM_FIXED_HPP
+
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <type_traits>
+
+namespace fpm
+{
+
+//! Fixed-point number type
+//! \tparam BaseType the base integer type used to store the fixed-point number. This can be a signed or unsigned type.
+//! \tparam IntermediateType the integer type used to store intermediate results during calculations.
+//! \tparam FractionBits the number of bits of the BaseType used to store the fraction
+//! \tparam EnableRounding enable rounding of LSB for multiplication, division, and type conversion
+template <typename BaseType, typename IntermediateType, unsigned int FractionBits, bool EnableRounding = true>
+class fixed
+{
+ static_assert(std::is_integral<BaseType>::value, "BaseType must be an integral type");
+ static_assert(FractionBits > 0, "FractionBits must be greater than zero");
+ static_assert(FractionBits <= sizeof(BaseType) * 8 - 1, "BaseType must at least be able to contain entire fraction, with space for at least one integral bit");
+ static_assert(sizeof(IntermediateType) > sizeof(BaseType), "IntermediateType must be larger than BaseType");
+ static_assert(std::is_signed<IntermediateType>::value == std::is_signed<BaseType>::value, "IntermediateType must have same signedness as BaseType");
+
+ // Although this value fits in the BaseType in terms of bits, if there's only one integral bit, this value
+ // is incorrect (flips from positive to negative), so we must extend the size to IntermediateType.
+ static constexpr IntermediateType FRACTION_MULT = IntermediateType(1) << FractionBits;
+
+ struct raw_construct_tag {};
+ constexpr inline fixed(BaseType val, raw_construct_tag) noexcept : m_value(val) {}
+
+public:
+ inline fixed() noexcept = default;
+
+ // Converts an integral number to the fixed-point type.
+ // Like static_cast, this truncates bits that don't fit.
+ template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+ constexpr inline explicit fixed(T val) noexcept
+ : m_value(static_cast<BaseType>(val * FRACTION_MULT))
+ {}
+
+ // Converts an floating-point number to the fixed-point type.
+ // Like static_cast, this truncates bits that don't fit.
+ template <typename T, typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr>
+ constexpr inline explicit fixed(T val) noexcept
+ : m_value(static_cast<BaseType>((EnableRounding) ?
+ (val >= 0.0) ? (val * FRACTION_MULT + T{0.5}) : (val * FRACTION_MULT - T{0.5})
+ : (val * FRACTION_MULT)))
+ {}
+
+ // Constructs from another fixed-point type with possibly different underlying representation.
+ // Like static_cast, this truncates bits that don't fit.
+ template <typename B, typename I, unsigned int F, bool R>
+ constexpr inline explicit fixed(fixed<B,I,F,R> val) noexcept
+ : m_value(from_fixed_point<F>(val.raw_value()).raw_value())
+ {}
+
+ // Explicit conversion to a floating-point type
+ template <typename T, typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr>
+ constexpr inline explicit operator T() const noexcept
+ {
+ return static_cast<T>(m_value) / FRACTION_MULT;
+ }
+
+ // Explicit conversion to an integral type
+ template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+ constexpr inline explicit operator T() const noexcept
+ {
+ return static_cast<T>(m_value / FRACTION_MULT);
+ }
+
+ // Returns the raw underlying value of this type.
+ // Do not use this unless you know what you're doing.
+ constexpr inline BaseType raw_value() const noexcept
+ {
+ return m_value;
+ }
+
+ //! Constructs a fixed-point number from another fixed-point number.
+ //! \tparam NumFractionBits the number of bits used by the fraction in \a value.
+ //! \param value the integer fixed-point number
+ template <unsigned int NumFractionBits, typename T, typename std::enable_if<(NumFractionBits > FractionBits)>::type* = nullptr>
+ static constexpr inline fixed from_fixed_point(T value) noexcept
+ {
+ // To correctly round the last bit in the result, we need one more bit of information.
+ // We do this by multiplying by two before dividing and adding the LSB to the real result.
+ return (EnableRounding) ? fixed(static_cast<BaseType>(
+ value / (T(1) << (NumFractionBits - FractionBits)) +
+ (value / (T(1) << (NumFractionBits - FractionBits - 1)) % 2)),
+ raw_construct_tag{}) :
+ fixed(static_cast<BaseType>(value / (T(1) << (NumFractionBits - FractionBits))),
+ raw_construct_tag{});
+ }
+
+ template <unsigned int NumFractionBits, typename T, typename std::enable_if<(NumFractionBits <= FractionBits)>::type* = nullptr>
+ static constexpr inline fixed from_fixed_point(T value) noexcept
+ {
+ return fixed(static_cast<BaseType>(
+ value * (T(1) << (FractionBits - NumFractionBits))),
+ raw_construct_tag{});
+ }
+
+ // Constructs a fixed-point number from its raw underlying value.
+ // Do not use this unless you know what you're doing.
+ static constexpr inline fixed from_raw_value(BaseType value) noexcept
+ {
+ return fixed(value, raw_construct_tag{});
+ }
+
+ //
+ // Constants
+ //
+ static constexpr fixed e() { return from_fixed_point<61>(6267931151224907085ll); }
+ static constexpr fixed pi() { return from_fixed_point<61>(7244019458077122842ll); }
+ static constexpr fixed half_pi() { return from_fixed_point<62>(7244019458077122842ll); }
+ static constexpr fixed two_pi() { return from_fixed_point<60>(7244019458077122842ll); }
+
+ //
+ // Arithmetic member operators
+ //
+
+ constexpr inline fixed operator-() const noexcept
+ {
+ return fixed::from_raw_value(-m_value);
+ }
+
+ inline fixed& operator+=(const fixed& y) noexcept
+ {
+ m_value += y.m_value;
+ return *this;
+ }
+
+ template <typename I, typename std::enable_if<std::is_integral<I>::value>::type* = nullptr>
+ inline fixed& operator+=(I y) noexcept
+ {
+ m_value += y * FRACTION_MULT;
+ return *this;
+ }
+
+ inline fixed& operator-=(const fixed& y) noexcept
+ {
+ m_value -= y.m_value;
+ return *this;
+ }
+
+ template <typename I, typename std::enable_if<std::is_integral<I>::value>::type* = nullptr>
+ inline fixed& operator-=(I y) noexcept
+ {
+ m_value -= y * FRACTION_MULT;
+ return *this;
+ }
+
+ inline fixed& operator*=(const fixed& y) noexcept
+ {
+ if (EnableRounding){
+ // Normal fixed-point multiplication is: x * y / 2**FractionBits.
+ // To correctly round the last bit in the result, we need one more bit of information.
+ // We do this by multiplying by two before dividing and adding the LSB to the real result.
+ auto value = (static_cast<IntermediateType>(m_value) * y.m_value) / (FRACTION_MULT / 2);
+ m_value = static_cast<BaseType>((value / 2) + (value % 2));
+ } else {
+ auto value = (static_cast<IntermediateType>(m_value) * y.m_value) / FRACTION_MULT;
+ m_value = static_cast<BaseType>(value);
+ }
+ return *this;
+ }
+
+ template <typename I, typename std::enable_if<std::is_integral<I>::value>::type* = nullptr>
+ inline fixed& operator*=(I y) noexcept
+ {
+ m_value *= y;
+ return *this;
+ }
+
+ inline fixed& operator/=(const fixed& y) noexcept
+ {
+ assert(y.m_value != 0);
+ if (EnableRounding){
+ // Normal fixed-point division is: x * 2**FractionBits / y.
+ // To correctly round the last bit in the result, we need one more bit of information.
+ // We do this by multiplying by two before dividing and adding the LSB to the real result.
+ auto value = (static_cast<IntermediateType>(m_value) * FRACTION_MULT * 2) / y.m_value;
+ m_value = static_cast<BaseType>((value / 2) + (value % 2));
+ } else {
+ auto value = (static_cast<IntermediateType>(m_value) * FRACTION_MULT) / y.m_value;
+ m_value = static_cast<BaseType>(value);
+ }
+ return *this;
+ }
+
+ template <typename I, typename std::enable_if<std::is_integral<I>::value>::type* = nullptr>
+ inline fixed& operator/=(I y) noexcept
+ {
+ m_value /= y;
+ return *this;
+ }
+
+private:
+ BaseType m_value;
+};
+
+//
+// Convenience typedefs
+//
+
+using fixed_16_16 = fixed<std::int32_t, std::int64_t, 16>;
+using fixed_24_8 = fixed<std::int32_t, std::int64_t, 8>;
+using fixed_8_24 = fixed<std::int32_t, std::int64_t, 24>;
+
+//
+// Addition
+//
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline fixed<B, I, F, R> operator+(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept
+{
+ return fixed<B, I, F, R>(x) += y;
+}
+
+template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+constexpr inline fixed<B, I, F, R> operator+(const fixed<B, I, F, R>& x, T y) noexcept
+{
+ return fixed<B, I, F, R>(x) += y;
+}
+
+template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+constexpr inline fixed<B, I, F, R> operator+(T x, const fixed<B, I, F, R>& y) noexcept
+{
+ return fixed<B, I, F, R>(y) += x;
+}
+
+//
+// Subtraction
+//
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline fixed<B, I, F, R> operator-(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept
+{
+ return fixed<B, I, F, R>(x) -= y;
+}
+
+template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+constexpr inline fixed<B, I, F, R> operator-(const fixed<B, I, F, R>& x, T y) noexcept
+{
+ return fixed<B, I, F, R>(x) -= y;
+}
+
+template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+constexpr inline fixed<B, I, F, R> operator-(T x, const fixed<B, I, F, R>& y) noexcept
+{
+ return fixed<B, I, F, R>(x) -= y;
+}
+
+//
+// Multiplication
+//
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline fixed<B, I, F, R> operator*(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept
+{
+ return fixed<B, I, F, R>(x) *= y;
+}
+
+template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+constexpr inline fixed<B, I, F, R> operator*(const fixed<B, I, F, R>& x, T y) noexcept
+{
+ return fixed<B, I, F, R>(x) *= y;
+}
+
+template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+constexpr inline fixed<B, I, F, R> operator*(T x, const fixed<B, I, F, R>& y) noexcept
+{
+ return fixed<B, I, F, R>(y) *= x;
+}
+
+//
+// Division
+//
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline fixed<B, I, F, R> operator/(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept
+{
+ return fixed<B, I, F, R>(x) /= y;
+}
+
+template <typename B, typename I, unsigned int F, typename T, bool R, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+constexpr inline fixed<B, I, F, R> operator/(const fixed<B, I, F, R>& x, T y) noexcept
+{
+ return fixed<B, I, F, R>(x) /= y;
+}
+
+template <typename B, typename I, unsigned int F, typename T, bool R, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+constexpr inline fixed<B, I, F, R> operator/(T x, const fixed<B, I, F, R>& y) noexcept
+{
+ return fixed<B, I, F, R>(x) /= y;
+}
+
+//
+// Comparison operators
+//
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool operator==(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept
+{
+ return x.raw_value() == y.raw_value();
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool operator!=(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept
+{
+ return x.raw_value() != y.raw_value();
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool operator<(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept
+{
+ return x.raw_value() < y.raw_value();
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool operator>(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept
+{
+ return x.raw_value() > y.raw_value();
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool operator<=(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept
+{
+ return x.raw_value() <= y.raw_value();
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool operator>=(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept
+{
+ return x.raw_value() >= y.raw_value();
+}
+
+namespace detail
+{
+// Number of base-10 digits required to fully represent a number of bits
+static constexpr int max_digits10(int bits)
+{
+ // 8.24 fixed-point equivalent of (int)ceil(bits * std::log10(2));
+ using T = long long;
+ return static_cast<int>((T{bits} * 5050445 + (T{1} << 24) - 1) >> 24);
+}
+
+// Number of base-10 digits that can be fully represented by a number of bits
+static constexpr int digits10(int bits)
+{
+ // 8.24 fixed-point equivalent of (int)(bits * std::log10(2));
+ using T = long long;
+ return static_cast<int>((T{bits} * 5050445) >> 24);
+}
+
+} // namespace detail
+} // namespace fpm
+
+// Specializations for customization points
+namespace std
+{
+
+template <typename B, typename I, unsigned int F, bool R>
+struct hash<fpm::fixed<B,I,F,R>>
+{
+ using argument_type = fpm::fixed<B, I, F, R>;
+ using result_type = std::size_t;
+
+ result_type operator()(argument_type arg) const noexcept(noexcept(std::declval<std::hash<B>>()(arg.raw_value()))) {
+ return m_hash(arg.raw_value());
+ }
+
+private:
+ std::hash<B> m_hash;
+};
+
+template <typename B, typename I, unsigned int F, bool R>
+struct numeric_limits<fpm::fixed<B,I,F,R>>
+{
+ static constexpr bool is_specialized = true;
+ static constexpr bool is_signed = std::numeric_limits<B>::is_signed;
+ static constexpr bool is_integer = false;
+ static constexpr bool is_exact = true;
+ static constexpr bool has_infinity = false;
+ static constexpr bool has_quiet_NaN = false;
+ static constexpr bool has_signaling_NaN = false;
+ static constexpr std::float_denorm_style has_denorm = std::denorm_absent;
+ static constexpr bool has_denorm_loss = false;
+ static constexpr std::float_round_style round_style = std::round_to_nearest;
+ static constexpr bool is_iec_559 = false;
+ static constexpr bool is_bounded = true;
+ static constexpr bool is_modulo = std::numeric_limits<B>::is_modulo;
+ static constexpr int digits = std::numeric_limits<B>::digits;
+
+ // Any number with `digits10` significant base-10 digits (that fits in
+ // the range of the type) is guaranteed to be convertible from text and
+ // back without change. Worst case, this is 0.000...001, so we can only
+ // guarantee this case. Nothing more.
+ static constexpr int digits10 = 1;
+
+ // This is equal to max_digits10 for the integer and fractional part together.
+ static constexpr int max_digits10 =
+ fpm::detail::max_digits10(std::numeric_limits<B>::digits - F) + fpm::detail::max_digits10(F);
+
+ static constexpr int radix = 2;
+ static constexpr int min_exponent = 1 - F;
+ static constexpr int min_exponent10 = -fpm::detail::digits10(F);
+ static constexpr int max_exponent = std::numeric_limits<B>::digits - F;
+ static constexpr int max_exponent10 = fpm::detail::digits10(std::numeric_limits<B>::digits - F);
+ static constexpr bool traps = true;
+ static constexpr bool tinyness_before = false;
+
+ static constexpr fpm::fixed<B,I,F,R> lowest() noexcept {
+ return fpm::fixed<B,I,F,R>::from_raw_value(std::numeric_limits<B>::lowest());
+ };
+
+ static constexpr fpm::fixed<B,I,F,R> min() noexcept {
+ return lowest();
+ }
+
+ static constexpr fpm::fixed<B,I,F,R> max() noexcept {
+ return fpm::fixed<B,I,F,R>::from_raw_value(std::numeric_limits<B>::max());
+ };
+
+ static constexpr fpm::fixed<B,I,F,R> epsilon() noexcept {
+ return fpm::fixed<B,I,F,R>::from_raw_value(1);
+ };
+
+ static constexpr fpm::fixed<B,I,F,R> round_error() noexcept {
+ return fpm::fixed<B,I,F,R>(1) / 2;
+ };
+
+ static constexpr fpm::fixed<B,I,F,R> denorm_min() noexcept {
+ return min();
+ }
+};
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_specialized;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_signed;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_integer;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_exact;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::has_infinity;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::has_quiet_NaN;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::has_signaling_NaN;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr std::float_denorm_style numeric_limits<fpm::fixed<B,I,F,R>>::has_denorm;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::has_denorm_loss;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr std::float_round_style numeric_limits<fpm::fixed<B,I,F,R>>::round_style;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_iec_559;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_bounded;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_modulo;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::digits;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::digits10;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::max_digits10;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::radix;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::min_exponent;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::min_exponent10;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::max_exponent;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::max_exponent10;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::traps;
+template <typename B, typename I, unsigned int F, bool R>
+constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::tinyness_before;
+
+}
+
+#endif
diff --git a/fpm/ios.hpp b/fpm/ios.hpp
new file mode 100644
index 0000000..69581fb
--- /dev/null
+++ b/fpm/ios.hpp
@@ -0,0 +1,740 @@
+#ifndef FPM_IOS_HPP
+#define FPM_IOS_HPP
+
+#include "fixed.hpp"
+#include "math.hpp"
+#include <array>
+#include <algorithm>
+#include <cctype>
+#include <climits>
+#include <limits>
+#include <ios>
+#include <vector>
+
+namespace fpm
+{
+
+template <typename CharT, typename B, typename I, unsigned int F, bool R>
+std::basic_ostream<CharT>& operator<<(std::basic_ostream<CharT>& os, fixed<B, I, F, R> x) noexcept
+{
+ const auto uppercase = ((os.flags() & std::ios_base::uppercase) != 0);
+ const auto showpoint = ((os.flags() & std::ios_base::showpoint) != 0);
+ const auto adjustfield = (os.flags() & std::ios_base::adjustfield);
+ const auto width = os.width();
+ const auto& ctype = std::use_facet<std::ctype<CharT>>(os.getloc());
+ const auto& numpunct = std::use_facet<std::numpunct<CharT>>(os.getloc());
+
+ auto floatfield = (os.flags() & std::ios_base::floatfield);
+ auto precision = os.precision();
+ auto show_trailing_zeros = true;
+ auto use_significant_digits = false;
+
+ // Invalid precision? Reset to the default
+ if (precision < 0)
+ {
+ precision = 6;
+ }
+
+ // Output buffer. Needs to be big enough for the formatted number without padding.
+ // Optional prefixes (i.e. "+"/"-", decimal separator, exponent "e+/-" and/or "0x").
+ constexpr auto worst_case_constant_size = 6;
+ // Maximum number of digits from the base type (covers integral + fractional digits)
+ constexpr auto worst_case_digit_count = std::numeric_limits<B>::digits10 + 2;
+ // Exponent suffixes (i.e. maximum digits based on log of the base type size).
+ // Needs a log10, but that isn't constexpr, so we're over-allocating on the stack. Can't hurt.
+ constexpr auto worst_case_suffix_size = std::numeric_limits<B>::digits;
+ // Double the digit count: in the worst case the thousands grouping add a character per digit.
+ using buffer_t = std::array<CharT, worst_case_constant_size + worst_case_digit_count * 2 + worst_case_suffix_size>;
+ buffer_t buffer;
+
+ // Output cursor
+ auto end = buffer.begin();
+
+ // Keep track of the start of "internal" padding
+ typename buffer_t::iterator internal_pad = buffer.end();
+
+ // Representation of a number.
+ // The value of the number is: raw / divisor * (10|2) ^ exponent
+ // The base of the exponent is 2 in hexfloat mode, or 10 otherwise.
+ struct number_t {
+ I raw; // raw fixed-point value
+ I divisor; // the divisor indicating the place of the decimal point
+ int exponent; // the exponent applied
+ };
+
+ // Convert a value without exponent to scientific representation
+ // where the part before the decimal point is less than 10.
+ const auto as_scientific = [](number_t value) {
+ assert(value.exponent == 0);
+ if (value.raw > 0)
+ {
+ while (value.raw / 10 >= value.divisor) {
+ value.divisor *= 10;
+ ++value.exponent;
+ }
+ while (value.raw < value.divisor) {
+ value.raw *= 10;
+ --value.exponent;
+ }
+ }
+ return value;
+ };
+
+ number_t value = { x.raw_value(), I{1} << F, 0};
+
+ auto base = B{10};
+
+ // First write the sign
+ if (value.raw < 0)
+ {
+ *end++ = ctype.widen('-');
+ value.raw = -value.raw;
+ internal_pad = end;
+ }
+ else if (os.flags() & std::ios_base::showpos)
+ {
+ *end++ = ctype.widen('+');
+ internal_pad = end;
+ }
+ assert(value.raw >= 0);
+
+ switch (floatfield)
+ {
+ case std::ios_base::fixed | std::ios_base::scientific:
+ // Hexadecimal mode: figure out the hexadecimal exponent and write "0x"
+ if (value.raw > 0)
+ {
+ auto bit = detail::find_highest_bit(value.raw);
+ value.exponent = bit - F; // exponent is applied to base 2
+ value.divisor = I{1} << bit; // divisor is at the highest bit, ensuring it starts with "1."
+ precision = (bit + 3) / 4; // precision is number of nibbles, so we show all of them
+ }
+ base = 16;
+ show_trailing_zeros = false; // Always strip trailing zeros in hexfloat mode
+
+ *end++ = ctype.widen('0');
+ *end++ = ctype.widen(uppercase ? 'X' : 'x');
+ break;
+
+ case std::ios_base::scientific:
+ // Scientific mode, normalize value to scientific notation
+ value = as_scientific(value);
+ break;
+
+ case std::ios_base::fixed:
+ // Fixed mode. Nothing to do.
+ break;
+
+ default:
+ {
+ // "auto" mode: figure out the exponent
+ const number_t sci_value = as_scientific(value);
+
+ // Now `precision` indicates the number of *significant digits* (not fractional digits).
+ use_significant_digits = true;
+ precision = std::max<std::streamsize>(precision, 1);
+
+ if (sci_value.exponent >= precision || sci_value.exponent < -4) {
+ // Display as scientific format
+ floatfield = std::ios_base::scientific;
+ value = sci_value;
+ } else {
+ // Display as fixed format.
+ // "showpoint" indicates whether or not we show trailing zeros
+ floatfield = std::ios_base::fixed;
+ show_trailing_zeros = showpoint;
+ }
+ break;
+ }
+ };
+
+ // If we didn't write a sign, any internal padding starts here
+ // (after a potential "0x" for hexfloats).
+ if (internal_pad == buffer.end()) {
+ internal_pad = end;
+ }
+
+ // Separate out the integral part of the number
+ I integral = value.raw / value.divisor;
+ value.raw %= value.divisor;
+
+ // Here we start printing the number itself
+ const char* const digits = uppercase ? "0123456789ABCDEF" : "0123456789abcdef";
+ const auto digits_start = end;
+
+ // Are we already printing significant digits? (yes if we're not counting significant digits)
+ bool significant_digits = !use_significant_digits;
+
+ // Print the integral part
+ int last_digit = 0;
+ if (integral == 0) {
+ *end++ = ctype.widen('0');
+ if (value.raw == 0) {
+ // If the fraction is zero too, all zeros including the integral count
+ // as significant digits.
+ significant_digits = true;
+ }
+ } else {
+ while (integral > 0) {
+ last_digit = integral % base;
+ *end++ = ctype.widen(digits[last_digit]);
+ integral /= base;
+ }
+ std::reverse(digits_start, end);
+ significant_digits = true;
+ }
+
+ if (use_significant_digits && significant_digits)
+ {
+ // Apparently the integral part was significant; subtract its
+ // length from the remaining significant digits.
+ precision -= (end - digits_start);
+ }
+
+ // At this point, `value` contains only the fraction and
+ // `precision` holds the number of digits to print.
+ assert(value.raw < value.divisor);
+ assert(precision >= 0);
+
+ // Location of decimal point
+ typename buffer_t::iterator point = buffer.end();
+
+ // Start (and length) of the trailing zeros to insert while printing
+ // By tracking this to print them later instead of actually printing them now,
+ // we can support large precisions with a small printing buffer.
+ typename buffer_t::iterator trailing_zeros_start = buffer.end();
+ std::streamsize trailing_zeros_count = 0;
+
+ if (precision > 0)
+ {
+ // Print the fractional part
+ *(point = end++) = numpunct.decimal_point();
+
+ for (int i = 0; i < precision; ++i)
+ {
+ if (value.raw == 0)
+ {
+ // The rest of the digits are all zeros, mark them
+ // to be printed in this spot.
+ trailing_zeros_start = end;
+ trailing_zeros_count = precision - i;
+ break;
+ }
+
+ // Shift the divisor if we can to avoid overflow on the value
+ if (value.divisor % base == 0) {
+ value.divisor /= base;
+ } else {
+ value.raw *= base;
+ }
+ assert(value.divisor > 0);
+ assert(value.raw >= 0);
+ last_digit = (value.raw / value.divisor) % base;
+ value.raw %= value.divisor;
+ *end++ = ctype.widen(digits[last_digit]);
+
+ if (!significant_digits) {
+ // We're still finding the first significant digit
+ if (last_digit != 0) {
+ // Found it
+ significant_digits = true;
+ } else {
+ // Not yet; increment number of digits to print
+ ++precision;
+ }
+ }
+ }
+ }
+ else if (showpoint)
+ {
+ // No fractional part to print, but we still want the point
+ *(point = end++) = numpunct.decimal_point();
+ }
+
+ // Insert `ch` into the output at `position`, updating all references accordingly
+ const auto insert_character = [&](typename buffer_t::iterator position, CharT ch) {
+ assert(position >= buffer.begin() && position < end);
+ std::move_backward(position, end, end + 1);
+ if (point != buffer.end() && position < point) {
+ ++point;
+ }
+ if (trailing_zeros_start != buffer.end() && position < trailing_zeros_start) {
+ ++trailing_zeros_start;
+ }
+ ++end;
+ *position = ch;
+ };
+
+ // Round the number: round to nearest
+ bool increment = false;
+ if (value.raw > value.divisor / 2) {
+ // Round up
+ increment = true;
+ } else if (value.raw == value.divisor / 2) {
+ // It's a tie (i.e. "xyzw.5"): round to even
+ increment = ((last_digit % 2) == 1);
+ }
+
+ if (increment)
+ {
+ auto p = end - 1;
+ // Increment all digits backwards while we see "9"
+ while (p >= digits_start) {
+ if (p == point) {
+ // Skip over the decimal point
+ --p;
+ }
+ if ((*p)++ != ctype.widen('9')) {
+ break;
+ }
+ *p-- = ctype.widen('0');
+ }
+
+ if (p < digits_start) {
+ // We've incremented all the way to the start (all 9's), we need to insert the
+ // carried-over 1 from incrementing the last 9.
+ assert(p == digits_start - 1);
+ insert_character(++p, ctype.widen('1'));
+
+ if (floatfield == std::ios::scientific)
+ {
+ // We just made the integral part equal to 10, so we shift the decimal point
+ // back one place (if any) and tweak the exponent, so that we keep the integer part
+ // less than 10.
+ if (point != buffer.end()) {
+ assert(p + 2 == point);
+ std::swap(*(point - 1), *point);
+ --point;
+ }
+ ++value.exponent;
+
+ // We've introduced an extra digit so we need to strip the last digit
+ // to maintain the same precision
+ --end;
+ }
+ }
+
+ if (use_significant_digits && *p == ctype.widen('1') && point != buffer.end()) {
+ // We've converted a leading zero to a 1 so we need to strip the last digit
+ // (behind the decimal point) to maintain the same significant digit count.
+ --end;
+ }
+ }
+
+ if (point != buffer.end())
+ {
+ if (!show_trailing_zeros)
+ {
+ // Remove trailing zeros
+ while (*(end - 1) == ctype.widen('0')) {
+ --end;
+ }
+
+ // Also clear the "trailing zeros to append during printing" range
+ trailing_zeros_start = buffer.end();
+ trailing_zeros_count = 0;
+ }
+
+ if (end - 1 == point && trailing_zeros_count == 0 && !showpoint) {
+ // Remove the decimal point, too
+ --end;
+ }
+ }
+
+ // Apply thousands grouping
+ const auto& grouping = numpunct.grouping();
+ if (!grouping.empty())
+ {
+ // Step backwards from the end or decimal point, inserting the
+ // thousands separator at every group interval.
+ const CharT thousands_sep = ctype.widen(numpunct.thousands_sep());
+ std::size_t group = 0;
+ auto p = point != buffer.end() ? point : end;
+ auto size = static_cast<int>(grouping[group]);
+ while (size > 0 && size < CHAR_MAX && p - digits_start > size) {
+ p -= size;
+ insert_character(p, thousands_sep);
+ if (group < grouping.size() - 1) {
+ size = static_cast<int>(grouping[++group]);
+ }
+ }
+ }
+
+ // Print the exponent if required
+ assert(floatfield != 0);
+ if (floatfield & std::ios_base::scientific)
+ {
+ // Hexadecimal (%a/%A) or decimal (%e/%E) scientific notation
+ if (floatfield & std::ios_base::fixed) {
+ *end++ = ctype.widen(uppercase ? 'P' : 'p');
+ } else {
+ *end++ = ctype.widen(uppercase ? 'E' : 'e');
+ }
+
+ if (value.exponent < 0) {
+ *end++ = ctype.widen('-');
+ value.exponent = -value.exponent;
+ } else {
+ *end++ = ctype.widen('+');
+ }
+
+ if (floatfield == std::ios_base::scientific) {
+ // In decimal scientific notation (%e/%E), the exponent is at least two digits
+ if (value.exponent < 10) {
+ *end++ = ctype.widen('0');
+ }
+ }
+
+ const auto exponent_start = end;
+ if (value.exponent == 0) {
+ *end++ = ctype.widen('0');
+ } else while (value.exponent > 0) {
+ *end++ = ctype.widen(digits[value.exponent % 10]);
+ value.exponent /= 10;
+ }
+ std::reverse(exponent_start, end);
+ }
+
+ // Write character `ch` `count` times to the stream
+ const auto sputcn = [&](CharT ch, std::streamsize count){
+ // Fill a buffer to output larger chunks
+ constexpr std::streamsize chunk_size = 64;
+ std::array<CharT, chunk_size> fill_buffer;
+ std::fill_n(fill_buffer.begin(), std::min(count, chunk_size), ch);
+
+ for (std::streamsize size, left = count; left > 0; left -= size) {
+ size = std::min(chunk_size, left);
+ os.rdbuf()->sputn(&fill_buffer[0], size);
+ }
+ };
+
+ // Outputs a range of characters, making sure to output the trailing zeros range
+ // if it lies in the specified range
+ const auto put_range = [&](typename buffer_t::const_iterator begin, typename buffer_t::const_iterator end) {
+ assert(end >= begin);
+ if (trailing_zeros_start >= begin && trailing_zeros_start <= end) {
+ // Print range with trailing zeros range in the middle
+ assert(trailing_zeros_count > 0);
+ os.rdbuf()->sputn(&*begin, trailing_zeros_start - begin);
+ sputcn(ctype.widen('0'), trailing_zeros_count);
+ os.rdbuf()->sputn(&*trailing_zeros_start, end - trailing_zeros_start);
+ } else {
+ // Print range as-is
+ os.rdbuf()->sputn(&*begin, end - begin);
+ }
+ };
+
+ // Pad the buffer if necessary.
+ // Note that the length of trailing zeros is counted towards the length of the content.
+ const auto content_size = end - buffer.begin() + trailing_zeros_count;
+ if (content_size >= width)
+ {
+ // Buffer needs no padding, output as-is
+ put_range(buffer.begin(), end);
+ }
+ else
+ {
+ const auto pad_size = width - content_size;
+ switch (adjustfield)
+ {
+ case std::ios_base::left:
+ // Content is left-aligned, so output the buffer, followed by the padding
+ put_range(buffer.begin(), end);
+ sputcn(os.fill(), pad_size);
+ break;
+ case std::ios_base::internal:
+ // Content is internally aligned, so output the buffer up to the "internal pad"
+ // point, followed by the padding, followed by the remainder of the buffer.
+ put_range(buffer.begin(), internal_pad);
+ sputcn(os.fill(), pad_size);
+ put_range(internal_pad, end);
+ break;
+ default:
+ // Content is right-aligned, so output the padding, followed by the buffer
+ sputcn(os.fill(), pad_size);
+ put_range(buffer.begin(), end);
+ break;
+ }
+ }
+
+ // Width is reset after every write
+ os.width(0);
+
+ return os;
+}
+
+
+template <typename CharT, class Traits, typename B, typename I, unsigned int F, bool R>
+std::basic_istream<CharT, Traits>& operator>>(std::basic_istream<CharT, Traits>& is, fixed<B, I, F, R>& x)
+{
+ typename std::basic_istream<CharT, Traits>::sentry sentry(is);
+ if (!sentry)
+ {
+ return is;
+ }
+
+ const auto& ctype = std::use_facet<std::ctype<CharT>>(is.getloc());
+ const auto& numpunct = std::use_facet<std::numpunct<CharT>>(is.getloc());
+
+ bool thousands_separator_allowed = false;
+ const bool supports_thousands_separators = !numpunct.grouping().empty();
+
+ const auto& is_valid_character = [](char ch) {
+ // Note: allowing ['p', 'i', 'n', 't', 'y'] is technically in violation of the spec (we are emulating std::num_get),
+ // but otherwise we cannot parse hexfloats and "infinity". This is a known issue with the spec (LWG #2381).
+ return std::isxdigit(ch) ||
+ ch == 'x' || ch == 'X' || ch == 'p' || ch == 'P' ||
+ ch == 'i' || ch == 'I' || ch == 'n' || ch == 'N' ||
+ ch == 't' || ch == 'T' || ch == 'y' || ch == 'Y' ||
+ ch == '-' || ch == '+';
+ };
+
+ const auto& peek = [&]() {
+ for(;;) {
+ auto ch = is.rdbuf()->sgetc();
+ if (ch == Traits::eof()) {
+ is.setstate(std::ios::eofbit);
+ return '\0';
+ }
+ if (ch == numpunct.decimal_point()) {
+ return '.';
+ }
+ if (ch == numpunct.thousands_sep())
+ {
+ if (!supports_thousands_separators || !thousands_separator_allowed) {
+ return '\0';
+ }
+ // Ignore valid thousands separators
+ is.rdbuf()->sbumpc();
+ continue;
+ }
+ auto res = ctype.narrow(ch, 0);
+ if (!is_valid_character(res)) {
+ // Invalid character: end input
+ return '\0';
+ }
+ return res;
+ }
+ };
+
+ const auto& bump = [&]() {
+ is.rdbuf()->sbumpc();
+ };
+
+ const auto& next = [&]() {
+ bump();
+ return peek();
+ };
+
+ bool negate = false;
+ auto ch = peek();
+ if (ch == '-') {
+ negate = true;
+ ch = next();
+ } else if (ch == '+') {
+ ch = next();
+ }
+
+ const char infinity[] = "infinity";
+ // Must be "inf" or "infinity"
+ int i = 0;
+ while (i < 8 && ch == infinity[i]) {
+ ++i;
+ ch = next();
+ }
+
+ if (i > 0) {
+ if (i == 3 || i == 8) {
+ x = negate ? std::numeric_limits<fixed<B, I, F, R>>::min() : std::numeric_limits<fixed<B, I, F, R>>::max();
+ } else {
+ is.setstate(std::ios::failbit);
+ }
+ return is;
+ }
+
+ char exponent_char = 'e';
+ int base = 10;
+
+ constexpr auto NoFraction = std::numeric_limits<std::size_t>::max();
+ std::size_t fraction_start = NoFraction;
+ std::vector<unsigned char> significand;
+
+ if (ch == '0') {
+ ch = next();
+ if (ch == 'x' || ch == 'X') {
+ // Hexfloat
+ exponent_char = 'p';
+ base = 16;
+ ch = next();
+ } else {
+ significand.push_back(0);
+ }
+ }
+
+ // Parse the significand
+ thousands_separator_allowed = true;
+ for (;; ch = next()) {
+ if (ch == '.') {
+ if (fraction_start != NoFraction) {
+ // Double decimal point. Stop parsing.
+ break;
+ }
+ fraction_start = significand.size();
+ thousands_separator_allowed = false;
+ } else {
+ unsigned char val = base;
+ if (ch >= '0' && ch <= '9') {
+ val = ch - '0';
+ } else if (ch >= 'a' && ch <= 'f') {
+ val = ch - 'a' + 10;
+ } else if (ch >= 'A' && ch <= 'F') {
+ val = ch - 'A' + 10;
+ }
+ if (val < 0 || val >= base) {
+ break;
+ }
+ significand.push_back(val);
+ }
+ }
+ if (significand.empty()) {
+ // We need a significand
+ is.setstate(std::ios::failbit);
+ return is;
+ }
+ thousands_separator_allowed = false;
+
+ if (fraction_start == NoFraction) {
+ // If we haven't seen a fraction yet, place it at the end of the significand
+ fraction_start = significand.size();
+ }
+
+ // Parse the exponent
+ bool exponent_overflow = false;
+ std::size_t exponent = 0;
+ bool exponent_negate = false;
+ if (std::tolower(ch) == exponent_char)
+ {
+ ch = next();
+ if (ch == '-') {
+ exponent_negate = true;
+ ch = next();
+ } else if (ch == '+') {
+ ch = next();
+ }
+
+ bool parsed = false;
+ while (std::isdigit(ch)) {
+ if (exponent <= std::numeric_limits<int>::max() / 10) {
+ exponent = exponent * 10 + (ch - '0');
+ } else {
+ exponent_overflow = true;
+ }
+ parsed = true;
+ ch = next();
+ }
+ if (!parsed) {
+ // If the exponent character is given, the exponent value may not be empty
+ is.setstate(std::ios::failbit);
+ return is;
+ }
+ }
+
+ // We've parsed all we need. Construct the value.
+ if (exponent_overflow) {
+ // Absolute exponent is too large
+ if (std::all_of(significand.begin(), significand.end(), [](unsigned char x){ return x == 0; })) {
+ // Significand is zero. Exponent doesn't matter.
+ x = fixed<B, I, F, R>(0);
+ } else if (exponent_negate) {
+ // A huge negative exponent approaches 0.
+ x = fixed<B, I, F, R>::from_raw_value(0);
+ } else {
+ // A huge positive exponent approaches infinity.
+ x = std::numeric_limits<fixed<B, I, F, R>>::max();
+ }
+ return is;
+ }
+
+ // Shift the fraction offset according to exponent
+ {
+ const auto exponent_mult = (base == 10) ? 1: 4;
+ if (exponent_negate) {
+ const auto adjust = std::min(exponent / exponent_mult, fraction_start);
+ fraction_start -= adjust;
+ exponent -= adjust * exponent_mult;
+ } else {
+ const auto adjust = std::min(exponent / exponent_mult, significand.size() - fraction_start);
+ fraction_start += adjust;
+ exponent -= adjust * exponent_mult;
+ }
+ }
+
+ constexpr auto IsSigned = std::is_signed<B>::value;
+ constexpr auto IntBits = sizeof(B) * 8 - F - (IsSigned ? 1 : 0);
+ constexpr auto MaxInt = (I{1} << IntBits) - 1;
+ constexpr auto MaxFraction = (I{1} << F) - 1;
+ constexpr auto MaxValue = (I{1} << sizeof(B) * 8) - 1;
+
+ // Parse the integer part
+ I integer = 0;
+ for (std::size_t i = 0; i < fraction_start; ++i) {
+ if (integer > MaxInt / base) {
+ // Overflow
+ x = negate ? std::numeric_limits<fixed<B, I, F, R>>::min() : std::numeric_limits<fixed<B, I, F, R>>::max();
+ return is;
+ }
+ assert(significand[i] < base);
+ integer = integer * base + significand[i];
+ }
+
+ // Parse the fractional part
+ I fraction = 0;
+ I divisor = 1;
+ for (std::size_t i = fraction_start; i < significand.size(); ++i) {
+ assert(significand[i] < base);
+ if (divisor > MaxFraction / base) {
+ // We're done
+ break;
+ }
+ fraction = fraction * base + significand[i];
+ divisor *= base;
+ }
+
+ // Construct the value from the parsed parts
+ I raw_value = (integer << F) + (fraction << F) / divisor;
+
+ // Apply remaining exponent
+ if (exponent_char == 'p') {
+ // Base-2 exponent
+ if (exponent_negate) {
+ raw_value >>= exponent;
+ } else {
+ raw_value <<= exponent;
+ }
+ } else {
+ // Base-10 exponent
+ if (exponent_negate) {
+ I remainder = 0;
+ for (std::size_t e = 0; e < exponent; ++e) {
+ remainder = raw_value % 10;
+ raw_value /= 10;
+ }
+ raw_value += remainder / 5;
+ } else {
+ for (std::size_t e = 0; e < exponent; ++e) {
+ if (raw_value > MaxValue / 10) {
+ // Overflow
+ x = negate ? std::numeric_limits<fixed<B, I, F, R>>::min() : std::numeric_limits<fixed<B, I, F, R>>::max();
+ return is;
+ }
+ raw_value *= 10;
+ }
+ }
+ }
+ x = fixed<B, I, F, R>::from_raw_value(static_cast<B>(negate ? -raw_value : raw_value));
+ return is;
+}
+
+}
+
+#endif
diff --git a/fpm/math.hpp b/fpm/math.hpp
new file mode 100644
index 0000000..7a76349
--- /dev/null
+++ b/fpm/math.hpp
@@ -0,0 +1,684 @@
+#ifndef FPM_MATH_HPP
+#define FPM_MATH_HPP
+
+#include "fixed.hpp"
+#include <cmath>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+namespace fpm
+{
+
+//
+// Helper functions
+//
+namespace detail
+{
+
+// Returns the index of the most-signifcant set bit
+inline long find_highest_bit(unsigned long long value) noexcept
+{
+ assert(value != 0);
+#if defined(_MSC_VER)
+ unsigned long index;
+#if defined(_WIN64)
+ _BitScanReverse64(&index, value);
+#else
+ if (_BitScanReverse(&index, static_cast<unsigned long>(value >> 32)) != 0) {
+ index += 32;
+ } else {
+ _BitScanReverse(&index, static_cast<unsigned long>(value & 0xfffffffflu));
+ }
+#endif
+ return index;
+#elif defined(__GNUC__) || defined(__clang__)
+ return sizeof(value) * 8 - 1 - __builtin_clzll(value);
+#else
+# error "your platform does not support find_highest_bit()"
+#endif
+}
+
+}
+
+//
+// Classification methods
+//
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline int fpclassify(fixed<B, I, F, R> x) noexcept
+{
+ return (x.raw_value() == 0) ? FP_ZERO : FP_NORMAL;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool isfinite(fixed<B, I, F, R>) noexcept
+{
+ return true;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool isinf(fixed<B, I, F, R>) noexcept
+{
+ return false;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool isnan(fixed<B, I, F, R>) noexcept
+{
+ return false;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool isnormal(fixed<B, I, F, R> x) noexcept
+{
+ return x.raw_value() != 0;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool signbit(fixed<B, I, F, R> x) noexcept
+{
+ return x.raw_value() < 0;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool isgreater(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept
+{
+ return x > y;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool isgreaterequal(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept
+{
+ return x >= y;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool isless(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept
+{
+ return x < y;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool islessequal(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept
+{
+ return x <= y;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool islessgreater(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept
+{
+ return x != y;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline bool isunordered(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept
+{
+ return false;
+}
+
+//
+// Nearest integer operations
+//
+template <typename B, typename I, unsigned int F, bool R>
+inline fixed<B, I, F, R> ceil(fixed<B, I, F, R> x) noexcept
+{
+ constexpr auto FRAC = B(1) << F;
+ auto value = x.raw_value();
+ if (value > 0) value += FRAC - 1;
+ return fixed<B, I, F, R>::from_raw_value(value / FRAC * FRAC);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+inline fixed<B, I, F, R> floor(fixed<B, I, F, R> x) noexcept
+{
+ constexpr auto FRAC = B(1) << F;
+ auto value = x.raw_value();
+ if (value < 0) value -= FRAC - 1;
+ return fixed<B, I, F, R>::from_raw_value(value / FRAC * FRAC);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+inline fixed<B, I, F, R> trunc(fixed<B, I, F, R> x) noexcept
+{
+ constexpr auto FRAC = B(1) << F;
+ return fixed<B, I, F, R>::from_raw_value(x.raw_value() / FRAC * FRAC);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+inline fixed<B, I, F, R> round(fixed<B, I, F, R> x) noexcept
+{
+ constexpr auto FRAC = B(1) << F;
+ auto value = x.raw_value() / (FRAC / 2);
+ return fixed<B, I, F, R>::from_raw_value(((value / 2) + (value % 2)) * FRAC);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> nearbyint(fixed<B, I, F, R> x) noexcept
+{
+ // Rounding mode is assumed to be FE_TONEAREST
+ constexpr auto FRAC = B(1) << F;
+ auto value = x.raw_value();
+ const bool is_half = std::abs(value % FRAC) == FRAC / 2;
+ value /= FRAC / 2;
+ value = (value / 2) + (value % 2);
+ value -= (value % 2) * is_half;
+ return fixed<B, I, F, R>::from_raw_value(value * FRAC);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline fixed<B, I, F, R> rint(fixed<B, I, F, R> x) noexcept
+{
+ // Rounding mode is assumed to be FE_TONEAREST
+ return nearbyint(x);
+}
+
+//
+// Mathematical functions
+//
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline fixed<B, I, F, R> abs(fixed<B, I, F, R> x) noexcept
+{
+ return (x >= fixed<B, I, F, R>{0}) ? x : -x;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline fixed<B, I, F, R> fmod(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept
+{
+ return
+ assert(y.raw_value() != 0),
+ fixed<B, I, F, R>::from_raw_value(x.raw_value() % y.raw_value());
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline fixed<B, I, F, R> remainder(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept
+{
+ return
+ assert(y.raw_value() != 0),
+ x - nearbyint(x / y) * y;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+inline fixed<B, I, F, R> remquo(fixed<B, I, F, R> x, fixed<B, I, F, R> y, int* quo) noexcept
+{
+ assert(y.raw_value() != 0);
+ assert(quo != nullptr);
+ *quo = x.raw_value() / y.raw_value();
+ return fixed<B, I, F, R>::from_raw_value(x.raw_value() % y.raw_value());
+}
+
+//
+// Manipulation functions
+//
+
+template <typename B, typename I, unsigned int F, bool R, typename C, typename J, unsigned int G, bool S>
+constexpr inline fixed<B, I, F, R> copysign(fixed<B, I, F, R> x, fixed<C, J, G, S> y) noexcept
+{
+ return
+ x = abs(x),
+ (y >= fixed<C, J, G, S>{0}) ? x : -x;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline fixed<B, I, F, R> nextafter(fixed<B, I, F, R> from, fixed<B, I, F, R> to) noexcept
+{
+ return from == to ? to :
+ to > from ? fixed<B, I, F, R>::from_raw_value(from.raw_value() + 1)
+ : fixed<B, I, F, R>::from_raw_value(from.raw_value() - 1);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+constexpr inline fixed<B, I, F, R> nexttoward(fixed<B, I, F, R> from, fixed<B, I, F, R> to) noexcept
+{
+ return nextafter(from, to);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+inline fixed<B, I, F, R> modf(fixed<B, I, F, R> x, fixed<B, I, F, R>* iptr) noexcept
+{
+ const auto raw = x.raw_value();
+ constexpr auto FRAC = B{1} << F;
+ *iptr = fixed<B, I, F, R>::from_raw_value(raw / FRAC * FRAC);
+ return fixed<B, I, F, R>::from_raw_value(raw % FRAC);
+}
+
+
+//
+// Power functions
+//
+
+template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+fixed<B, I, F, R> pow(fixed<B, I, F, R> base, T exp) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+
+ if (base == Fixed(0)) {
+ assert(exp > 0);
+ return Fixed(0);
+ }
+
+ Fixed result {1};
+ if (exp < 0)
+ {
+ for (Fixed intermediate = base; exp != 0; exp /= 2, intermediate *= intermediate)
+ {
+ if ((exp % 2) != 0)
+ {
+ result /= intermediate;
+ }
+ }
+ }
+ else
+ {
+ for (Fixed intermediate = base; exp != 0; exp /= 2, intermediate *= intermediate)
+ {
+ if ((exp % 2) != 0)
+ {
+ result *= intermediate;
+ }
+ }
+ }
+ return result;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> pow(fixed<B, I, F, R> base, fixed<B, I, F, R> exp) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+
+ if (base == Fixed(0)) {
+ assert(exp > Fixed(0));
+ return Fixed(0);
+ }
+
+ if (exp < Fixed(0))
+ {
+ return 1 / pow(base, -exp);
+ }
+
+ constexpr auto FRAC = B(1) << F;
+ if (exp.raw_value() % FRAC == 0)
+ {
+ // Non-fractional exponents are easier to calculate
+ return pow(base, exp.raw_value() / FRAC);
+ }
+
+ // For negative bases we do not support fractional exponents.
+ // Technically fractions with odd denominators could work,
+ // but that's too much work to figure out.
+ assert(base > Fixed(0));
+ return exp2(log2(base) * exp);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> exp(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ if (x < Fixed(0)) {
+ return 1 / exp(-x);
+ }
+ constexpr auto FRAC = B(1) << F;
+ const B x_int = x.raw_value() / FRAC;
+ x -= x_int;
+ assert(x >= Fixed(0) && x < Fixed(1));
+
+ constexpr auto fA = Fixed::template from_fixed_point<63>( 128239257017632854ll); // 1.3903728105644451e-2
+ constexpr auto fB = Fixed::template from_fixed_point<63>( 320978614890280666ll); // 3.4800571158543038e-2
+ constexpr auto fC = Fixed::template from_fixed_point<63>(1571680799599592947ll); // 1.7040197373796334e-1
+ constexpr auto fD = Fixed::template from_fixed_point<63>(4603349000587966862ll); // 4.9909609871464493e-1
+ constexpr auto fE = Fixed::template from_fixed_point<62>(4612052447974689712ll); // 1.0000794567422495
+ constexpr auto fF = Fixed::template from_fixed_point<63>(9223361618412247875ll); // 9.9999887043019773e-1
+ return pow(Fixed::e(), x_int) * (((((fA * x + fB) * x + fC) * x + fD) * x + fE) * x + fF);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> exp2(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ if (x < Fixed(0)) {
+ return 1 / exp2(-x);
+ }
+ constexpr auto FRAC = B(1) << F;
+ const B x_int = x.raw_value() / FRAC;
+ x -= x_int;
+ assert(x >= Fixed(0) && x < Fixed(1));
+
+ constexpr auto fA = Fixed::template from_fixed_point<63>( 17491766697771214ll); // 1.8964611454333148e-3
+ constexpr auto fB = Fixed::template from_fixed_point<63>( 82483038782406547ll); // 8.9428289841091295e-3
+ constexpr auto fC = Fixed::template from_fixed_point<63>( 515275173969157690ll); // 5.5866246304520701e-2
+ constexpr auto fD = Fixed::template from_fixed_point<63>(2214897896212987987ll); // 2.4013971109076949e-1
+ constexpr auto fE = Fixed::template from_fixed_point<63>(6393224161192452326ll); // 6.9315475247516736e-1
+ constexpr auto fF = Fixed::template from_fixed_point<63>(9223371050976163566ll); // 9.9999989311082668e-1
+ return Fixed(1 << x_int) * (((((fA * x + fB) * x + fC) * x + fD) * x + fE) * x + fF);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> expm1(fixed<B, I, F, R> x) noexcept
+{
+ return exp(x) - 1;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> log2(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ assert(x > Fixed(0));
+
+ // Normalize input to the [1:2] domain
+ B value = x.raw_value();
+ const long highest = detail::find_highest_bit(value);
+ if (highest >= F) {
+ value >>= (highest - F);
+ } else {
+ value <<= (F - highest);
+ }
+ x = Fixed::from_raw_value(value);
+ assert(x >= Fixed(1) && x < Fixed(2));
+
+ constexpr auto fA = Fixed::template from_fixed_point<63>( 413886001457275979ll); // 4.4873610194131727e-2
+ constexpr auto fB = Fixed::template from_fixed_point<63>(-3842121857793256941ll); // -4.1656368651734915e-1
+ constexpr auto fC = Fixed::template from_fixed_point<62>( 7522345947206307744ll); // 1.6311487636297217
+ constexpr auto fD = Fixed::template from_fixed_point<61>(-8187571043052183818ll); // -3.5507929249026341
+ constexpr auto fE = Fixed::template from_fixed_point<60>( 5870342889289496598ll); // 5.0917108110420042
+ constexpr auto fF = Fixed::template from_fixed_point<61>(-6457199832668582866ll); // -2.8003640347009253
+ return Fixed(highest - F) + (((((fA * x + fB) * x + fC) * x + fD) * x + fE) * x + fF);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> log(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ return log2(x) / log2(Fixed::e());
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> log10(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ return log2(x) / log2(Fixed(10));
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> log1p(fixed<B, I, F, R> x) noexcept
+{
+ return log(1 + x);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> cbrt(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+
+ if (x == Fixed(0))
+ {
+ return x;
+ }
+ if (x < Fixed(0))
+ {
+ return -cbrt(-x);
+ }
+ assert(x >= Fixed(0));
+
+ // Finding the cube root of an integer, taken from Hacker's Delight,
+ // based on the square root algorithm.
+
+ // We start at the greatest power of eight that's less than the argument.
+ int ofs = ((detail::find_highest_bit(x.raw_value()) + 2*F) / 3 * 3);
+ I num = I{x.raw_value()};
+ I res = 0;
+
+ const auto do_round = [&]
+ {
+ for (; ofs >= 0; ofs -= 3)
+ {
+ res += res;
+ const I val = (3*res*(res + 1) + 1) << ofs;
+ if (num >= val)
+ {
+ num -= val;
+ res++;
+ }
+ }
+ };
+
+ // We should shift by 2*F (since there are two multiplications), but that
+ // could overflow even the intermediate type, so we have to split the
+ // algorithm up in two rounds of F bits each. Each round will deplete
+ // 'num' digit by digit, so after a round we can shift it again.
+ num <<= F;
+ ofs -= F;
+ do_round();
+
+ num <<= F;
+ ofs += F;
+ do_round();
+
+ return Fixed::from_raw_value(static_cast<B>(res));
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> sqrt(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+
+ assert(x >= Fixed(0));
+ if (x == Fixed(0))
+ {
+ return x;
+ }
+
+ // Finding the square root of an integer in base-2, from:
+ // https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Binary_numeral_system_.28base_2.29
+
+ // Shift by F first because it's fixed-point.
+ I num = I{x.raw_value()} << F;
+ I res = 0;
+
+ // "bit" starts at the greatest power of four that's less than the argument.
+ for (I bit = I{1} << ((detail::find_highest_bit(x.raw_value()) + F) / 2 * 2); bit != 0; bit >>= 2)
+ {
+ const I val = res + bit;
+ res >>= 1;
+ if (num >= val)
+ {
+ num -= val;
+ res += bit;
+ }
+ }
+
+ // Round the last digit up if necessary
+ if (num > res)
+ {
+ res++;
+ }
+
+ return Fixed::from_raw_value(static_cast<B>(res));
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> hypot(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept
+{
+ assert(x != 0 || y != 0);
+ return sqrt(x*x + y*y);
+}
+
+//
+// Trigonometry functions
+//
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> sin(fixed<B, I, F, R> x) noexcept
+{
+ // This sine uses a fifth-order curve-fitting approximation originally
+ // described by Jasper Vijn on coranac.com which has a worst-case
+ // relative error of 0.07% (over [-pi:pi]).
+ using Fixed = fixed<B, I, F, R>;
+
+ // Turn x from [0..2*PI] domain into [0..4] domain
+ x = fmod(x, Fixed::two_pi());
+ x = x / Fixed::half_pi();
+
+ // Take x modulo one rotation, so [-4..+4].
+ if (x < Fixed(0)) {
+ x += Fixed(4);
+ }
+
+ int sign = +1;
+ if (x > Fixed(2)) {
+ // Reduce domain to [0..2].
+ sign = -1;
+ x -= Fixed(2);
+ }
+
+ if (x > Fixed(1)) {
+ // Reduce domain to [0..1].
+ x = Fixed(2) - x;
+ }
+
+ const Fixed x2 = x*x;
+ return sign * x * (Fixed::pi() - x2*(Fixed::two_pi() - 5 - x2*(Fixed::pi() - 3)))/2;
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+inline fixed<B, I, F, R> cos(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ if (x > Fixed(0)) { // Prevent an overflow due to the addition of π/2
+ return sin(x - (Fixed::two_pi() - Fixed::half_pi()));
+ } else {
+ return sin(Fixed::half_pi() + x);
+ }
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+inline fixed<B, I, F, R> tan(fixed<B, I, F, R> x) noexcept
+{
+ auto cx = cos(x);
+
+ // Tangent goes to infinity at 90 and -90 degrees.
+ // We can't represent that with fixed-point maths.
+ assert(abs(cx).raw_value() > 1);
+
+ return sin(x) / cx;
+}
+
+namespace detail {
+
+// Calculates atan(x) assuming that x is in the range [0,1]
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> atan_sanitized(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ assert(x >= Fixed(0) && x <= Fixed(1));
+
+ constexpr auto fA = Fixed::template from_fixed_point<63>( 716203666280654660ll); // 0.0776509570923569
+ constexpr auto fB = Fixed::template from_fixed_point<63>(-2651115102768076601ll); // -0.287434475393028
+ constexpr auto fC = Fixed::template from_fixed_point<63>( 9178930894564541004ll); // 0.995181681698119 (PI/4 - A - B)
+
+ const auto xx = x * x;
+ return ((fA*xx + fB)*xx + fC)*x;
+}
+
+// Calculate atan(y / x), assuming x != 0.
+//
+// If x is very, very small, y/x can easily overflow the fixed-point range.
+// If q = y/x and q > 1, atan(q) would calculate atan(1/q) as intermediate step
+// anyway. We can shortcut that here and avoid the loss of information, thus
+// improving the accuracy of atan(y/x) for very small x.
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> atan_div(fixed<B, I, F, R> y, fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ assert(x != Fixed(0));
+
+ // Make sure y and x are positive.
+ // If y / x is negative (when y or x, but not both, are negative), negate the result to
+ // keep the correct outcome.
+ if (y < Fixed(0)) {
+ if (x < Fixed(0)) {
+ return atan_div(-y, -x);
+ }
+ return -atan_div(-y, x);
+ }
+ if (x < Fixed(0)) {
+ return -atan_div(y, -x);
+ }
+ assert(y >= Fixed(0));
+ assert(x > Fixed(0));
+
+ if (y > x) {
+ return Fixed::half_pi() - detail::atan_sanitized(x / y);
+ }
+ return detail::atan_sanitized(y / x);
+}
+
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> atan(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ if (x < Fixed(0))
+ {
+ return -atan(-x);
+ }
+
+ if (x > Fixed(1))
+ {
+ return Fixed::half_pi() - detail::atan_sanitized(Fixed(1) / x);
+ }
+
+ return detail::atan_sanitized(x);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> asin(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ assert(x >= Fixed(-1) && x <= Fixed(+1));
+
+ const auto yy = Fixed(1) - x * x;
+ if (yy == Fixed(0))
+ {
+ return copysign(Fixed::half_pi(), x);
+ }
+ return detail::atan_div(x, sqrt(yy));
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> acos(fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ assert(x >= Fixed(-1) && x <= Fixed(+1));
+
+ if (x == Fixed(-1))
+ {
+ return Fixed::pi();
+ }
+ const auto yy = Fixed(1) - x * x;
+ return Fixed(2)*detail::atan_div(sqrt(yy), Fixed(1) + x);
+}
+
+template <typename B, typename I, unsigned int F, bool R>
+fixed<B, I, F, R> atan2(fixed<B, I, F, R> y, fixed<B, I, F, R> x) noexcept
+{
+ using Fixed = fixed<B, I, F, R>;
+ if (x == Fixed(0))
+ {
+ assert(y != Fixed(0));
+ return (y > Fixed(0)) ? Fixed::half_pi() : -Fixed::half_pi();
+ }
+
+ auto ret = detail::atan_div(y, x);
+
+ if (x < Fixed(0))
+ {
+ return (y >= Fixed(0)) ? ret + Fixed::pi() : ret - Fixed::pi();
+ }
+ return ret;
+}
+
+}
+
+#endif
diff --git a/kiss/CHANGELOG b/kiss/CHANGELOG
new file mode 100644
index 0000000..2dd3603
--- /dev/null
+++ b/kiss/CHANGELOG
@@ -0,0 +1,123 @@
+1.3.0 2012-07-18
+ removed non-standard malloc.h from kiss_fft.h
+
+ moved -lm to end of link line
+
+ checked various return values
+
+ converted python Numeric code to NumPy
+
+ fixed test of int32_t on 64 bit OS
+
+ added padding in a couple of places to allow SIMD alignment of structs
+
+1.2.9 2010-05-27
+ threadsafe ( including OpenMP )
+
+ first edition of kissfft.hh the C++ template fft engine
+
+1.2.8
+ Changed memory.h to string.h -- apparently more standard
+
+ Added openmp extensions. This can have fairly linear speedups for larger FFT sizes.
+
+1.2.7
+ Shrank the real-fft memory footprint. Thanks to Galen Seitz.
+
+1.2.6 (Nov 14, 2006) The "thanks to GenArts" release.
+ Added multi-dimensional real-optimized FFT, see tools/kiss_fftndr
+ Thanks go to GenArts, Inc. for sponsoring the development.
+
+1.2.5 (June 27, 2006) The "release for no good reason" release.
+ Changed some harmless code to make some compilers' warnings go away.
+ Added some more digits to pi -- why not.
+ Added kiss_fft_next_fast_size() function to help people decide how much to pad.
+ Changed multidimensional test from 8 dimensions to only 3 to avoid testing
+ problems with fixed point (sorry Buckaroo Banzai).
+
+1.2.4 (Oct 27, 2005) The "oops, inverse fixed point real fft was borked" release.
+ Fixed scaling bug for inverse fixed point real fft -- also fixed test code that should've been failing.
+ Thanks to Jean-Marc Valin for bug report.
+
+ Use sys/types.h for more portable types than short,int,long => int16_t,int32_t,int64_t
+ If your system does not have these, you may need to define them -- but at least it breaks in a
+ loud and easily fixable way -- unlike silently using the wrong size type.
+
+ Hopefully tools/psdpng.c is fixed -- thanks to Steve Kellog for pointing out the weirdness.
+
+1.2.3 (June 25, 2005) The "you want to use WHAT as a sample" release.
+ Added ability to use 32 bit fixed point samples -- requires a 64 bit intermediate result, a la 'long long'
+
+ Added ability to do 4 FFTs in parallel by using SSE SIMD instructions. This is accomplished by
+ using the __m128 (vector of 4 floats) as kiss_fft_scalar. Define USE_SIMD to use this.
+
+ I know, I know ... this is drifting a bit from the "kiss" principle, but the speed advantages
+ make it worth it for some. Also recent gcc makes it SOO easy to use vectors of 4 floats like a POD type.
+
+1.2.2 (May 6, 2005) The Matthew release
+ Replaced fixed point division with multiply&shift. Thanks to Jean-Marc Valin for
+ discussions regarding. Considerable speedup for fixed-point.
+
+ Corrected overflow protection in real fft routines when using fixed point.
+ Finder's Credit goes to Robert Oschler of robodance for pointing me at the bug.
+ This also led to the CHECK_OVERFLOW_OP macro.
+
+1.2.1 (April 4, 2004)
+ compiles cleanly with just about every -W warning flag under the sun
+
+ reorganized kiss_fft_state so it could be read-only/const. This may be useful for embedded systems
+ that are willing to predeclare twiddle factors, factorization.
+
+ Fixed C_MUL,S_MUL on 16-bit platforms.
+
+ tmpbuf will only be allocated if input & output buffers are same
+ scratchbuf will only be allocated for ffts that are not multiples of 2,3,5
+
+ NOTE: The tmpbuf,scratchbuf changes may require synchronization code for multi-threaded apps.
+
+
+1.2 (Feb 23, 2004)
+ interface change -- cfg object is forward declaration of struct instead of void*
+ This maintains type saftey and lets the compiler warn/error about stupid mistakes.
+ (prompted by suggestion from Erik de Castro Lopo)
+
+ small speed improvements
+
+ added psdpng.c -- sample utility that will create png spectrum "waterfalls" from an input file
+ ( not terribly useful yet)
+
+1.1.1 (Feb 1, 2004 )
+ minor bug fix -- only affects odd rank, in-place, multi-dimensional FFTs
+
+1.1 : (Jan 30,2004)
+ split sample_code/ into test/ and tools/
+
+ Removed 2-D fft and added N-D fft (arbitrary)
+
+ modified fftutil.c to allow multi-d FFTs
+
+ Modified core fft routine to allow an input stride via kiss_fft_stride()
+ (eased support of multi-D ffts)
+
+ Added fast convolution filtering (FIR filtering using overlap-scrap method, with tail scrap)
+
+ Add kfc.[ch]: the KISS FFT Cache. It takes care of allocs for you ( suggested by Oscar Lesta ).
+
+1.0.1 (Dec 15, 2003)
+ fixed bug that occurred when nfft==1. Thanks to Steven Johnson.
+
+1.0 : (Dec 14, 2003)
+ changed kiss_fft function from using a single buffer, to two buffers.
+ If the same buffer pointer is supplied for both in and out, kiss will
+ manage the buffer copies.
+
+ added kiss_fft2d and kiss_fftr as separate source files (declarations in kiss_fft.h )
+
+0.4 :(Nov 4,2003) optimized for radix 2,3,4,5
+
+0.3 :(Oct 28, 2003) woops, version 2 didn't actually factor out any radices other than 2.
+ Thanks to Steven Johnson for finding this one.
+
+0.2 :(Oct 27, 2003) added mixed radix, only radix 2,4 optimized versions
+
+0.1 :(May 19 2003) initial release, radix 2 only
diff --git a/kiss/COPYING b/kiss/COPYING
new file mode 100644
index 0000000..6b4b622
--- /dev/null
+++ b/kiss/COPYING
@@ -0,0 +1,11 @@
+Copyright (c) 2003-2010 Mark Borgerding . All rights reserved.
+
+KISS FFT is provided under:
+
+ SPDX-License-Identifier: BSD-3-Clause
+
+Being under the terms of the BSD 3-clause "New" or "Revised" License,
+according with:
+
+ LICENSES/BSD-3-Clause
+
diff --git a/kiss/README.md b/kiss/README.md
new file mode 100644
index 0000000..1138a0c
--- /dev/null
+++ b/kiss/README.md
@@ -0,0 +1,245 @@
+# KISS FFT [![Build Status](https://travis-ci.com/mborgerding/kissfft.svg?branch=master)](https://travis-ci.com/mborgerding/kissfft)
+
+KISS FFT - A mixed-radix Fast Fourier Transform based up on the principle,
+"Keep It Simple, Stupid."
+
+There are many great fft libraries already around. Kiss FFT is not trying
+to be better than any of them. It only attempts to be a reasonably efficient,
+moderately useful FFT that can use fixed or floating data types and can be
+incorporated into someone's C program in a few minutes with trivial licensing.
+
+## USAGE:
+
+The basic usage for 1-d complex FFT is:
+
+```c
+ #include "kiss_fft.h"
+ kiss_fft_cfg cfg = kiss_fft_alloc( nfft ,is_inverse_fft ,0,0 );
+ while ...
+
+ ... // put kth sample in cx_in[k].r and cx_in[k].i
+
+ kiss_fft( cfg , cx_in , cx_out );
+
+ ... // transformed. DC is in cx_out[0].r and cx_out[0].i
+
+ kiss_fft_free(cfg);
+```
+ - **Note**: frequency-domain data is stored from dc up to 2pi.
+ so cx_out[0] is the dc bin of the FFT
+ and cx_out[nfft/2] is the Nyquist bin (if exists)
+
+Declarations are in "kiss_fft.h", along with a brief description of the
+functions you'll need to use.
+
+Code definitions for 1d complex FFTs are in kiss_fft.c.
+
+You can do other cool stuff with the extras you'll find in tools/
+> - multi-dimensional FFTs
+> - real-optimized FFTs (returns the positive half-spectrum:
+ (nfft/2+1) complex frequency bins)
+> - fast convolution FIR filtering (not available for fixed point)
+> - spectrum image creation
+
+The core fft and most tools/ code can be compiled to use float, double,
+ Q15 short or Q31 samples. The default is float.
+
+## BUILDING:
+
+There are two functionally-equivalent build systems supported by kissfft:
+
+ - Make (traditional Makefiles for Unix / Linux systems)
+ - CMake (more modern and feature-rich build system developed by Kitware)
+
+To build kissfft, the following build environment can be used:
+
+ - GNU build environment with GCC, Clang and GNU Make or CMake (>= 3.6)
+ - Microsoft Visual C++ (MSVC) with CMake (>= 3.6)
+
+Additional libraries required to build and test kissfft include:
+
+ - libpng for psdpng tool,
+ - libfftw3 to validate kissfft results against it,
+ - python 2/3 with Numpy to validate kissfft results against it.
+ - OpenMP supported by GCC, Clang or MSVC for multi-core FFT transformations
+
+Environments like Cygwin and MinGW can be highly likely used to build kissfft
+targeting Windows platform, but no tests were performed to the date.
+
+Both Make and CMake builds are easily configurable:
+
+ - `KISSFFT_DATATYPE=<datatype>` (for Make) or `-DKISSFFT_DATATYPE=<datatype>`
+ (for CMake) denote the principal datatype used by kissfft. It can be one
+ of the following:
+
+ - float (default)
+ - double
+ - int16_t
+ - int32_t
+ - SIMD (requires SSE instruction set support on target CPU)
+
+ - `KISSFFT_OPENMP=1` (for Make) or `-DKISSFFT_OPENMP=ON` (for CMake) builds kissfft
+ with OpenMP support. Please note that a supported compiler is required and this
+ option is turned off by default.
+
+ - `KISSFFT_STATIC=1` (for Make) or `-DKISSFFT_STATIC=ON` (for CMake) instructs
+ the builder to create static library ('.lib' for Windows / '.a' for Unix or Linux).
+ By default, this option is turned off and the shared library is created
+ ('.dll' for Windows, '.so' for Linux or Unix, '.dylib' for Mac OSX)
+
+ - `-DKISSFFT_TEST=OFF` (for CMake) disables building tests for kissfft. On Make,
+ building tests is done separately by 'make testall' or 'make testsingle', so
+ no specific setting is required.
+
+ - `KISSFFT_TOOLS=0` (for Make) or `-DKISSFFT_TOOLS=OFF` (for CMake) builds kissfft
+ without command-line tools like 'fastconv'. By default the tools are built.
+
+ - `KISSFFT_USE_ALLOCA=1` (for Make) or `-DKISSFFT_USE_ALLOCA=ON` (for CMake)
+ build kissfft with 'alloca' usage instead of 'malloc' / 'free'.
+
+ - `PREFIX=/full/path/to/installation/prefix/directory` (for Make) or
+ `-DCMAKE_INSTALL_PREFIX=/full/path/to/installation/prefix/directory` (for CMake)
+ specifies the prefix directory to install kissfft into.
+
+For example, to build kissfft as a static library with 'int16_t' datatype and
+OpenMP support using Make, run the command from kissfft source tree:
+
+```
+make KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 all
+```
+
+The same configuration for CMake is:
+
+```
+mkdir build && cd build
+cmake -DKISSFFT_DATATYPE=int16_t -DKISSFFT_STATIC=ON -DKISSFFT_OPENMP=ON ..
+make all
+```
+
+To specify '/tmp/1234' as installation prefix directory, run:
+
+
+```
+make PREFIX=/tmp/1234 KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 install
+```
+
+or
+
+```
+mkdir build && cd build
+cmake -DCMAKE_INSTALL_PREFIX=/tmp/1234 -DKISSFFT_DATATYPE=int16_t -DKISSFFT_STATIC=ON -DKISSFFT_OPENMP=ON ..
+make all
+make install
+```
+
+## TESTING:
+
+To validate the build configured as an example above, run the following command from
+kissfft source tree:
+
+```
+make KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 testsingle
+```
+
+if using Make, or:
+
+```
+make test
+```
+
+if using CMake.
+
+To test all possible build configurations, please run an extended testsuite from
+kissfft source tree:
+
+```
+sh test/kissfft-testsuite.sh
+```
+
+Please note that the extended testsuite takes around 20-40 minutes depending on device
+it runs on. This testsuite is useful for reporting bugs or testing the pull requests.
+
+## BACKGROUND
+
+I started coding this because I couldn't find a fixed point FFT that didn't
+use assembly code. I started with floating point numbers so I could get the
+theory straight before working on fixed point issues. In the end, I had a
+little bit of code that could be recompiled easily to do ffts with short, float
+or double (other types should be easy too).
+
+Once I got my FFT working, I was curious about the speed compared to
+a well respected and highly optimized fft library. I don't want to criticize
+this great library, so let's call it FFT_BRANDX.
+During this process, I learned:
+
+> 1. FFT_BRANDX has more than 100K lines of code. The core of kiss_fft is about 500 lines (cpx 1-d).
+> 2. It took me an embarrassingly long time to get FFT_BRANDX working.
+> 3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB (without optimizing for size).
+> 4. FFT_BRANDX is roughly twice as fast as KISS FFT in default mode.
+
+It is wonderful that free, highly optimized libraries like FFT_BRANDX exist.
+But such libraries carry a huge burden of complexity necessary to extract every
+last bit of performance.
+
+**Sometimes simpler is better, even if it's not better.**
+
+## FREQUENTLY ASKED QUESTIONS:
+> Q: Can I use kissfft in a project with a ___ license?</br>
+> A: Yes. See LICENSE below.
+
+> Q: Why don't I get the output I expect?</br>
+> A: The two most common causes of this are
+> 1) scaling : is there a constant multiplier between what you got and what you want?
+> 2) mixed build environment -- all code must be compiled with same preprocessor
+> definitions for FIXED_POINT and kiss_fft_scalar
+
+> Q: Will you write/debug my code for me?</br>
+> A: Probably not unless you pay me. I am happy to answer pointed and topical questions, but
+> I may refer you to a book, a forum, or some other resource.
+
+
+## PERFORMANCE
+ (on Athlon XP 2100+, with gcc 2.96, float data type)
+
+Kiss performed 10000 1024-pt cpx ffts in .63 s of cpu time.
+For comparison, it took md5sum twice as long to process the same amount of data.
+Transforming 5 minutes of CD quality audio takes less than a second (nfft=1024).
+
+**DO NOT:**
+- use Kiss if you need the Fastest Fourier Transform in the World
+- ask me to add features that will bloat the code
+
+## UNDER THE HOOD
+
+Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT. If you give it an input buffer
+and output buffer that are the same, a temporary buffer will be created to hold the data.
+
+No static data is used. The core routines of kiss_fft are thread-safe (but not all of the tools directory).[
+
+No scaling is done for the floating point version (for speed).
+Scaling is done both ways for the fixed-point version (for overflow prevention).
+
+Optimized butterflies are used for factors 2,3,4, and 5.
+
+The real (i.e. not complex) optimization code only works for even length ffts. It does two half-length
+FFTs in parallel (packed into real&imag), and then combines them via twiddling. The result is
+nfft/2+1 complex frequency bins from DC to Nyquist. If you don't know what this means, search the web.
+
+The fast convolution filtering uses the overlap-scrap method, slightly
+modified to put the scrap at the tail.
+
+## LICENSE
+ Revised BSD License, see COPYING for verbiage.
+ Basically, "free to use&change, give credit where due, no guarantees"
+ Note this license is compatible with GPL at one end of the spectrum and closed, commercial software at
+ the other end. See http://www.fsf.org/licensing/licenses
+
+## TODO
+ - Add real optimization for odd length FFTs
+ - Document/revisit the input/output fft scaling
+ - Make doc describing the overlap (tail) scrap fast convolution filtering in kiss_fastfir.c
+ - Test all the ./tools/ code with fixed point (kiss_fastfir.c doesn't work, maybe others)
+
+## AUTHOR
+ Mark Borgerding
+ Mark@Borgerding.net
diff --git a/kiss/_kiss_fft_guts.h b/kiss/_kiss_fft_guts.h
new file mode 100644
index 0000000..4bd8d1c
--- /dev/null
+++ b/kiss/_kiss_fft_guts.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+/* kiss_fft.h
+ defines kiss_fft_scalar as either short or a float type
+ and defines
+ typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
+
+#ifndef _kiss_fft_guts_h
+#define _kiss_fft_guts_h
+
+#include "kiss_fft.h"
+#include "kiss_fft_log.h"
+#include <limits.h>
+
+#define MAXFACTORS 32
+/* e.g. an fft of length 128 has 4 factors
+ as far as kissfft is concerned
+ 4*4*4*2
+ */
+
+struct kiss_fft_state{
+ int nfft;
+ int inverse;
+ int factors[2*MAXFACTORS];
+ kiss_fft_cpx twiddles[1];
+};
+
+/*
+ Explanation of macros dealing with complex math:
+
+ C_MUL(m,a,b) : m = a*b
+ C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise
+ C_SUB( res, a,b) : res = a - b
+ C_SUBFROM( res , a) : res -= a
+ C_ADDTO( res , a) : res += a
+ * */
+#ifdef FIXED_POINT
+#include <stdint.h>
+#if (FIXED_POINT==32)
+# define FRACBITS 31
+# define SAMPPROD int64_t
+#define SAMP_MAX INT32_MAX
+#define SAMP_MIN INT32_MIN
+#else
+# define FRACBITS 15
+# define SAMPPROD int32_t
+#define SAMP_MAX INT16_MAX
+#define SAMP_MIN INT16_MIN
+#endif
+
+#if defined(CHECK_OVERFLOW)
+# define CHECK_OVERFLOW_OP(a,op,b) \
+ if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
+ KISS_FFT_WARNING("overflow (%d " #op" %d) = %ld", (a),(b),(SAMPPROD)(a) op (SAMPPROD)(b)); }
+#endif
+
+
+# define smul(a,b) ( (SAMPPROD)(a)*(b) )
+# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
+
+# define S_MUL(a,b) sround( smul(a,b) )
+
+# define C_MUL(m,a,b) \
+ do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
+ (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
+
+# define DIVSCALAR(x,k) \
+ (x) = sround( smul( x, SAMP_MAX/k ) )
+
+# define C_FIXDIV(c,div) \
+ do { DIVSCALAR( (c).r , div); \
+ DIVSCALAR( (c).i , div); }while (0)
+
+# define C_MULBYSCALAR( c, s ) \
+ do{ (c).r = sround( smul( (c).r , s ) ) ;\
+ (c).i = sround( smul( (c).i , s ) ) ; }while(0)
+
+#else /* not FIXED_POINT*/
+
+# define S_MUL(a,b) ( (a)*(b) )
+#define C_MUL(m,a,b) \
+ do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
+ (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
+# define C_FIXDIV(c,div) /* NOOP */
+# define C_MULBYSCALAR( c, s ) \
+ do{ (c).r *= (s);\
+ (c).i *= (s); }while(0)
+#endif
+
+#ifndef CHECK_OVERFLOW_OP
+# define CHECK_OVERFLOW_OP(a,op,b) /* noop */
+#endif
+
+#define C_ADD( res, a,b)\
+ do { \
+ CHECK_OVERFLOW_OP((a).r,+,(b).r)\
+ CHECK_OVERFLOW_OP((a).i,+,(b).i)\
+ (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \
+ }while(0)
+#define C_SUB( res, a,b)\
+ do { \
+ CHECK_OVERFLOW_OP((a).r,-,(b).r)\
+ CHECK_OVERFLOW_OP((a).i,-,(b).i)\
+ (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \
+ }while(0)
+#define C_ADDTO( res , a)\
+ do { \
+ CHECK_OVERFLOW_OP((res).r,+,(a).r)\
+ CHECK_OVERFLOW_OP((res).i,+,(a).i)\
+ (res).r += (a).r; (res).i += (a).i;\
+ }while(0)
+
+#define C_SUBFROM( res , a)\
+ do {\
+ CHECK_OVERFLOW_OP((res).r,-,(a).r)\
+ CHECK_OVERFLOW_OP((res).i,-,(a).i)\
+ (res).r -= (a).r; (res).i -= (a).i; \
+ }while(0)
+
+
+#ifdef FIXED_POINT
+# define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase))
+# define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase))
+# define HALF_OF(x) ((x)>>1)
+#elif defined(USE_SIMD)
+# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
+# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
+# define HALF_OF(x) ((x)*_mm_set1_ps(.5))
+#else
+# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
+# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
+# define HALF_OF(x) ((x)*((kiss_fft_scalar).5))
+#endif
+
+#define kf_cexp(x,phase) \
+ do{ \
+ (x)->r = KISS_FFT_COS(phase);\
+ (x)->i = KISS_FFT_SIN(phase);\
+ }while(0)
+
+
+/* a debugging function */
+#define pcpx(c)\
+ KISS_FFT_DEBUG("%g + %gi\n",(double)((c)->r),(double)((c)->i))
+
+
+#ifdef KISS_FFT_USE_ALLOCA
+// define this to allow use of alloca instead of malloc for temporary buffers
+// Temporary buffers are used in two case:
+// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
+// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform.
+#include <alloca.h>
+#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
+#define KISS_FFT_TMP_FREE(ptr)
+#else
+#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
+#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
+#endif
+
+#endif /* _kiss_fft_guts_h */
+
diff --git a/kiss/kfc.c b/kiss/kfc.c
new file mode 100644
index 0000000..a405d9b
--- /dev/null
+++ b/kiss/kfc.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#include "kfc.h"
+
+typedef struct cached_fft *kfc_cfg;
+
+struct cached_fft
+{
+ int nfft;
+ int inverse;
+ kiss_fft_cfg cfg;
+ kfc_cfg next;
+};
+
+static kfc_cfg cache_root=NULL;
+static int ncached=0;
+
+static kiss_fft_cfg find_cached_fft(int nfft,int inverse)
+{
+ size_t len;
+ kfc_cfg cur=cache_root;
+ kfc_cfg prev=NULL;
+ while ( cur ) {
+ if ( cur->nfft == nfft && inverse == cur->inverse )
+ break;/*found the right node*/
+ prev = cur;
+ cur = prev->next;
+ }
+ if (cur== NULL) {
+ /* no cached node found, need to create a new one*/
+ kiss_fft_alloc(nfft,inverse,0,&len);
+#ifdef USE_SIMD
+ int padding = (16-sizeof(struct cached_fft)) & 15;
+ // make sure the cfg aligns on a 16 byte boundary
+ len += padding;
+#endif
+ cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len ));
+ if (cur == NULL)
+ return NULL;
+ cur->cfg = (kiss_fft_cfg)(cur+1);
+#ifdef USE_SIMD
+ cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding);
+#endif
+ kiss_fft_alloc(nfft,inverse,cur->cfg,&len);
+ cur->nfft=nfft;
+ cur->inverse=inverse;
+ cur->next = NULL;
+ if ( prev )
+ prev->next = cur;
+ else
+ cache_root = cur;
+ ++ncached;
+ }
+ return cur->cfg;
+}
+
+void kfc_cleanup(void)
+{
+ kfc_cfg cur=cache_root;
+ kfc_cfg next=NULL;
+ while (cur){
+ next = cur->next;
+ free(cur);
+ cur=next;
+ }
+ ncached=0;
+ cache_root = NULL;
+}
+void kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout)
+{
+ kiss_fft( find_cached_fft(nfft,0),fin,fout );
+}
+
+void kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout)
+{
+ kiss_fft( find_cached_fft(nfft,1),fin,fout );
+}
+
+#ifdef KFC_TEST
+static void check(int nc)
+{
+ if (ncached != nc) {
+ fprintf(stderr,"ncached should be %d,but it is %d\n",nc,ncached);
+ exit(1);
+ }
+}
+
+int main(void)
+{
+ kiss_fft_cpx buf1[1024],buf2[1024];
+ memset(buf1,0,sizeof(buf1));
+ check(0);
+ kfc_fft(512,buf1,buf2);
+ check(1);
+ kfc_fft(512,buf1,buf2);
+ check(1);
+ kfc_ifft(512,buf1,buf2);
+ check(2);
+ kfc_cleanup();
+ check(0);
+ return 0;
+}
+#endif
diff --git a/kiss/kfc.h b/kiss/kfc.h
new file mode 100644
index 0000000..d7d8c1b
--- /dev/null
+++ b/kiss/kfc.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef KFC_H
+#define KFC_H
+#include "kiss_fft.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+KFC -- Kiss FFT Cache
+
+Not needing to deal with kiss_fft_alloc and a config
+object may be handy for a lot of programs.
+
+KFC uses the underlying KISS FFT functions, but caches the config object.
+The first time kfc_fft or kfc_ifft for a given FFT size, the cfg
+object is created for it. All subsequent calls use the cached
+configuration object.
+
+NOTE:
+You should probably not use this if your program will be using a lot
+of various sizes of FFTs. There is a linear search through the
+cached objects. If you are only using one or two FFT sizes, this
+will be negligible. Otherwise, you may want to use another method
+of managing the cfg objects.
+
+ There is no automated cleanup of the cached objects. This could lead
+to large memory usage in a program that uses a lot of *DIFFERENT*
+sized FFTs. If you want to force all cached cfg objects to be freed,
+call kfc_cleanup.
+
+ */
+
+/*forward complex FFT */
+void KISS_FFT_API kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout);
+/*reverse complex FFT */
+void KISS_FFT_API kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout);
+
+/*free all cached objects*/
+void KISS_FFT_API kfc_cleanup(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/kiss/kiss_fft.c b/kiss/kiss_fft.c
new file mode 100644
index 0000000..58c24a0
--- /dev/null
+++ b/kiss/kiss_fft.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+
+#include "_kiss_fft_guts.h"
+/* The guts header contains all the multiplication and addition macros that are defined for
+ fixed or floating point complex numbers. It also delares the kf_ internal functions.
+ */
+
+static void kf_bfly2(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ kiss_fft_cpx * Fout2;
+ kiss_fft_cpx * tw1 = st->twiddles;
+ kiss_fft_cpx t;
+ Fout2 = Fout + m;
+ do{
+ C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
+
+ C_MUL (t, *Fout2 , *tw1);
+ tw1 += fstride;
+ C_SUB( *Fout2 , *Fout , t );
+ C_ADDTO( *Fout , t );
+ ++Fout2;
+ ++Fout;
+ }while (--m);
+}
+
+static void kf_bfly4(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ const size_t m
+ )
+{
+ kiss_fft_cpx *tw1,*tw2,*tw3;
+ kiss_fft_cpx scratch[6];
+ size_t k=m;
+ const size_t m2=2*m;
+ const size_t m3=3*m;
+
+
+ tw3 = tw2 = tw1 = st->twiddles;
+
+ do {
+ C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
+
+ C_MUL(scratch[0],Fout[m] , *tw1 );
+ C_MUL(scratch[1],Fout[m2] , *tw2 );
+ C_MUL(scratch[2],Fout[m3] , *tw3 );
+
+ C_SUB( scratch[5] , *Fout, scratch[1] );
+ C_ADDTO(*Fout, scratch[1]);
+ C_ADD( scratch[3] , scratch[0] , scratch[2] );
+ C_SUB( scratch[4] , scratch[0] , scratch[2] );
+ C_SUB( Fout[m2], *Fout, scratch[3] );
+ tw1 += fstride;
+ tw2 += fstride*2;
+ tw3 += fstride*3;
+ C_ADDTO( *Fout , scratch[3] );
+
+ if(st->inverse) {
+ Fout[m].r = scratch[5].r - scratch[4].i;
+ Fout[m].i = scratch[5].i + scratch[4].r;
+ Fout[m3].r = scratch[5].r + scratch[4].i;
+ Fout[m3].i = scratch[5].i - scratch[4].r;
+ }else{
+ Fout[m].r = scratch[5].r + scratch[4].i;
+ Fout[m].i = scratch[5].i - scratch[4].r;
+ Fout[m3].r = scratch[5].r - scratch[4].i;
+ Fout[m3].i = scratch[5].i + scratch[4].r;
+ }
+ ++Fout;
+ }while(--k);
+}
+
+static void kf_bfly3(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ size_t m
+ )
+{
+ size_t k=m;
+ const size_t m2 = 2*m;
+ kiss_fft_cpx *tw1,*tw2;
+ kiss_fft_cpx scratch[5];
+ kiss_fft_cpx epi3;
+ epi3 = st->twiddles[fstride*m];
+
+ tw1=tw2=st->twiddles;
+
+ do{
+ C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
+
+ C_MUL(scratch[1],Fout[m] , *tw1);
+ C_MUL(scratch[2],Fout[m2] , *tw2);
+
+ C_ADD(scratch[3],scratch[1],scratch[2]);
+ C_SUB(scratch[0],scratch[1],scratch[2]);
+ tw1 += fstride;
+ tw2 += fstride*2;
+
+ Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+ Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+ C_MULBYSCALAR( scratch[0] , epi3.i );
+
+ C_ADDTO(*Fout,scratch[3]);
+
+ Fout[m2].r = Fout[m].r + scratch[0].i;
+ Fout[m2].i = Fout[m].i - scratch[0].r;
+
+ Fout[m].r -= scratch[0].i;
+ Fout[m].i += scratch[0].r;
+
+ ++Fout;
+ }while(--k);
+}
+
+static void kf_bfly5(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+ int u;
+ kiss_fft_cpx scratch[13];
+ kiss_fft_cpx * twiddles = st->twiddles;
+ kiss_fft_cpx *tw;
+ kiss_fft_cpx ya,yb;
+ ya = twiddles[fstride*m];
+ yb = twiddles[fstride*2*m];
+
+ Fout0=Fout;
+ Fout1=Fout0+m;
+ Fout2=Fout0+2*m;
+ Fout3=Fout0+3*m;
+ Fout4=Fout0+4*m;
+
+ tw=st->twiddles;
+ for ( u=0; u<m; ++u ) {
+ C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+ scratch[0] = *Fout0;
+
+ C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+ C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+ C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+ C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+ C_ADD( scratch[7],scratch[1],scratch[4]);
+ C_SUB( scratch[10],scratch[1],scratch[4]);
+ C_ADD( scratch[8],scratch[2],scratch[3]);
+ C_SUB( scratch[9],scratch[2],scratch[3]);
+
+ Fout0->r += scratch[7].r + scratch[8].r;
+ Fout0->i += scratch[7].i + scratch[8].i;
+
+ scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+ scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+ scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
+ scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
+
+ C_SUB(*Fout1,scratch[5],scratch[6]);
+ C_ADD(*Fout4,scratch[5],scratch[6]);
+
+ scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+ scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+ scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
+ scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
+
+ C_ADD(*Fout2,scratch[11],scratch[12]);
+ C_SUB(*Fout3,scratch[11],scratch[12]);
+
+ ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+ }
+}
+
+/* perform the butterfly for one stage of a mixed radix FFT */
+static void kf_bfly_generic(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m,
+ int p
+ )
+{
+ int u,k,q1,q;
+ kiss_fft_cpx * twiddles = st->twiddles;
+ kiss_fft_cpx t;
+ int Norig = st->nfft;
+
+ kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
+ if (scratch == NULL){
+ KISS_FFT_ERROR("Memory allocation failed.");
+ return;
+ }
+
+ for ( u=0; u<m; ++u ) {
+ k=u;
+ for ( q1=0 ; q1<p ; ++q1 ) {
+ scratch[q1] = Fout[ k ];
+ C_FIXDIV(scratch[q1],p);
+ k += m;
+ }
+
+ k=u;
+ for ( q1=0 ; q1<p ; ++q1 ) {
+ int twidx=0;
+ Fout[ k ] = scratch[0];
+ for (q=1;q<p;++q ) {
+ twidx += fstride * k;
+ if (twidx>=Norig) twidx-=Norig;
+ C_MUL(t,scratch[q] , twiddles[twidx] );
+ C_ADDTO( Fout[ k ] ,t);
+ }
+ k += m;
+ }
+ }
+ KISS_FFT_TMP_FREE(scratch);
+}
+
+static
+void kf_work(
+ kiss_fft_cpx * Fout,
+ const kiss_fft_cpx * f,
+ const size_t fstride,
+ int in_stride,
+ int * factors,
+ const kiss_fft_cfg st
+ )
+{
+ kiss_fft_cpx * Fout_beg=Fout;
+ const int p=*factors++; /* the radix */
+ const int m=*factors++; /* stage's fft length/p */
+ const kiss_fft_cpx * Fout_end = Fout + p*m;
+
+#ifdef _OPENMP
+ // use openmp extensions at the
+ // top-level (not recursive)
+ if (fstride==1 && p<=5 && m!=1)
+ {
+ int k;
+
+ // execute the p different work units in different threads
+# pragma omp parallel for
+ for (k=0;k<p;++k)
+ kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
+ // all threads have joined by this point
+
+ switch (p) {
+ case 2: kf_bfly2(Fout,fstride,st,m); break;
+ case 3: kf_bfly3(Fout,fstride,st,m); break;
+ case 4: kf_bfly4(Fout,fstride,st,m); break;
+ case 5: kf_bfly5(Fout,fstride,st,m); break;
+ default: kf_bfly_generic(Fout,fstride,st,m,p); break;
+ }
+ return;
+ }
+#endif
+
+ if (m==1) {
+ do{
+ *Fout = *f;
+ f += fstride*in_stride;
+ }while(++Fout != Fout_end );
+ }else{
+ do{
+ // recursive call:
+ // DFT of size m*p performed by doing
+ // p instances of smaller DFTs of size m,
+ // each one takes a decimated version of the input
+ kf_work( Fout , f, fstride*p, in_stride, factors,st);
+ f += fstride*in_stride;
+ }while( (Fout += m) != Fout_end );
+ }
+
+ Fout=Fout_beg;
+
+ // recombine the p smaller DFTs
+ switch (p) {
+ case 2: kf_bfly2(Fout,fstride,st,m); break;
+ case 3: kf_bfly3(Fout,fstride,st,m); break;
+ case 4: kf_bfly4(Fout,fstride,st,m); break;
+ case 5: kf_bfly5(Fout,fstride,st,m); break;
+ default: kf_bfly_generic(Fout,fstride,st,m,p); break;
+ }
+}
+
+/* facbuf is populated by p1,m1,p2,m2, ...
+ where
+ p[i] * m[i] = m[i-1]
+ m0 = n */
+static
+void kf_factor(int n,int * facbuf)
+{
+ int p=4;
+ double floor_sqrt;
+ floor_sqrt = floor( sqrt((double)n) );
+
+ /*factor out powers of 4, powers of 2, then any remaining primes */
+ do {
+ while (n % p) {
+ switch (p) {
+ case 4: p = 2; break;
+ case 2: p = 3; break;
+ default: p += 2; break;
+ }
+ if (p > floor_sqrt)
+ p = n; /* no more factors, skip to end */
+ }
+ n /= p;
+ *facbuf++ = p;
+ *facbuf++ = n;
+ } while (n > 1);
+}
+
+/*
+ *
+ * User-callable function to allocate all necessary storage space for the fft.
+ *
+ * The return value is a contiguous block of memory, allocated with malloc. As such,
+ * It can be freed with free(), rather than a kiss_fft-specific function.
+ * */
+kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
+{
+ KISS_FFT_ALIGN_CHECK(mem)
+
+ kiss_fft_cfg st=NULL;
+ size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fft_state)
+ + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/
+
+ if ( lenmem==NULL ) {
+ st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
+ }else{
+ if (mem != NULL && *lenmem >= memneeded)
+ st = (kiss_fft_cfg)mem;
+ *lenmem = memneeded;
+ }
+ if (st) {
+ int i;
+ st->nfft=nfft;
+ st->inverse = inverse_fft;
+
+ for (i=0;i<nfft;++i) {
+ const double pi=3.141592653589793238462643383279502884197169399375105820974944;
+ double phase = -2*pi*i / nfft;
+ if (st->inverse)
+ phase *= -1;
+ kf_cexp(st->twiddles+i, phase );
+ }
+
+ kf_factor(nfft,st->factors);
+ }
+ return st;
+}
+
+
+void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
+{
+ if (fin == fout) {
+ //NOTE: this is not really an in-place FFT algorithm.
+ //It just performs an out-of-place FFT into a temp buffer
+ if (fout == NULL){
+ KISS_FFT_ERROR("fout buffer NULL.");
+ return;
+ }
+
+ kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
+ if (tmpbuf == NULL){
+ KISS_FFT_ERROR("Memory allocation error.");
+ return;
+ }
+
+
+
+ kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
+ memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
+ KISS_FFT_TMP_FREE(tmpbuf);
+ }else{
+ kf_work( fout, fin, 1,in_stride, st->factors,st );
+ }
+}
+
+void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+ kiss_fft_stride(cfg,fin,fout,1);
+}
+
+
+void kiss_fft_cleanup(void)
+{
+ // nothing needed any more
+}
+
+int kiss_fft_next_fast_size(int n)
+{
+ while(1) {
+ int m=n;
+ while ( (m%2) == 0 ) m/=2;
+ while ( (m%3) == 0 ) m/=3;
+ while ( (m%5) == 0 ) m/=5;
+ if (m<=1)
+ break; /* n is completely factorable by twos, threes, and fives */
+ n++;
+ }
+ return n;
+}
diff --git a/kiss/kiss_fft.h b/kiss/kiss_fft.h
new file mode 100644
index 0000000..dce1034
--- /dev/null
+++ b/kiss/kiss_fft.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef KISS_FFT_H
+#define KISS_FFT_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+
+// Define KISS_FFT_SHARED macro to properly export symbols
+#ifdef KISS_FFT_SHARED
+# ifdef _WIN32
+# ifdef KISS_FFT_BUILD
+# define KISS_FFT_API __declspec(dllexport)
+# else
+# define KISS_FFT_API __declspec(dllimport)
+# endif
+# else
+# define KISS_FFT_API __attribute__ ((visibility ("default")))
+# endif
+#else
+# define KISS_FFT_API
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ ATTENTION!
+ If you would like a :
+ -- a utility that will handle the caching of fft objects
+ -- real-only (no imaginary time component ) FFT
+ -- a multi-dimensional FFT
+ -- a command-line utility to perform ffts
+ -- a command-line utility to perform fast-convolution filtering
+
+ Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c
+ in the tools/ directory.
+*/
+
+/* User may override KISS_FFT_MALLOC and/or KISS_FFT_FREE. */
+#ifdef USE_SIMD
+# include <xmmintrin.h>
+# define kiss_fft_scalar __m128
+# ifndef KISS_FFT_MALLOC
+# define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
+# define KISS_FFT_ALIGN_CHECK(ptr)
+# define KISS_FFT_ALIGN_SIZE_UP(size) ((size + 15UL) & ~0xFUL)
+# endif
+# ifndef KISS_FFT_FREE
+# define KISS_FFT_FREE _mm_free
+# endif
+#else
+# define KISS_FFT_ALIGN_CHECK(ptr)
+# define KISS_FFT_ALIGN_SIZE_UP(size) (size)
+# ifndef KISS_FFT_MALLOC
+# define KISS_FFT_MALLOC malloc
+# endif
+# ifndef KISS_FFT_FREE
+# define KISS_FFT_FREE free
+# endif
+#endif
+
+
+#ifdef FIXED_POINT
+#include <stdint.h>
+# if (FIXED_POINT == 32)
+# define kiss_fft_scalar int32_t
+# else
+# define kiss_fft_scalar int16_t
+# endif
+#else
+# ifndef kiss_fft_scalar
+/* default is float */
+# define kiss_fft_scalar float
+# endif
+#endif
+
+typedef struct {
+ kiss_fft_scalar r;
+ kiss_fft_scalar i;
+}kiss_fft_cpx;
+
+typedef struct kiss_fft_state* kiss_fft_cfg;
+
+/*
+ * kiss_fft_alloc
+ *
+ * Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
+ *
+ * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
+ *
+ * The return value from fft_alloc is a cfg buffer used internally
+ * by the fft routine or NULL.
+ *
+ * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
+ * The returned value should be free()d when done to avoid memory leaks.
+ *
+ * The state can be placed in a user supplied buffer 'mem':
+ * If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
+ * then the function places the cfg in mem and the size used in *lenmem
+ * and returns mem.
+ *
+ * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
+ * then the function returns NULL and places the minimum cfg
+ * buffer size in *lenmem.
+ * */
+
+kiss_fft_cfg KISS_FFT_API kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem);
+
+/*
+ * kiss_fft(cfg,in_out_buf)
+ *
+ * Perform an FFT on a complex input buffer.
+ * for a forward FFT,
+ * fin should be f[0] , f[1] , ... ,f[nfft-1]
+ * fout will be F[0] , F[1] , ... ,F[nfft-1]
+ * Note that each element is complex and can be accessed like
+ f[k].r and f[k].i
+ * */
+void KISS_FFT_API kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+/*
+ A more generic version of the above function. It reads its input from every Nth sample.
+ * */
+void KISS_FFT_API kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride);
+
+/* If kiss_fft_alloc allocated a buffer, it is one contiguous
+ buffer and can be simply free()d when no longer needed*/
+#define kiss_fft_free KISS_FFT_FREE
+
+/*
+ Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up
+ your compiler output to call this before you exit.
+*/
+void KISS_FFT_API kiss_fft_cleanup(void);
+
+
+/*
+ * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
+ */
+int KISS_FFT_API kiss_fft_next_fast_size(int n);
+
+/* for real ffts, we need an even size */
+#define kiss_fftr_next_fast_size_real(n) \
+ (kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/kiss/kiss_fft_log.h b/kiss/kiss_fft_log.h
new file mode 100644
index 0000000..b5b631a
--- /dev/null
+++ b/kiss/kiss_fft_log.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef kiss_fft_log_h
+#define kiss_fft_log_h
+
+#define ERROR 1
+#define WARNING 2
+#define INFO 3
+#define DEBUG 4
+
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+
+#if defined(NDEBUG)
+# define KISS_FFT_LOG_MSG(severity, ...) ((void)0)
+#else
+# define KISS_FFT_LOG_MSG(severity, ...) \
+ fprintf(stderr, "[" #severity "] " __FILE__ ":" TOSTRING(__LINE__) " "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, "\n")
+#endif
+
+#define KISS_FFT_ERROR(...) KISS_FFT_LOG_MSG(ERROR, __VA_ARGS__)
+#define KISS_FFT_WARNING(...) KISS_FFT_LOG_MSG(WARNING, __VA_ARGS__)
+#define KISS_FFT_INFO(...) KISS_FFT_LOG_MSG(INFO, __VA_ARGS__)
+#define KISS_FFT_DEBUG(...) KISS_FFT_LOG_MSG(DEBUG, __VA_ARGS__)
+
+
+
+#endif /* kiss_fft_log_h */ \ No newline at end of file
diff --git a/kiss/kiss_fftnd.c b/kiss/kiss_fftnd.c
new file mode 100644
index 0000000..5d5b089
--- /dev/null
+++ b/kiss/kiss_fftnd.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#include "kiss_fftnd.h"
+#include "_kiss_fft_guts.h"
+
+struct kiss_fftnd_state{
+ int dimprod; /* dimsum would be mighty tasty right now */
+ int ndims;
+ int *dims;
+ kiss_fft_cfg *states; /* cfg states for each dimension */
+ kiss_fft_cpx * tmpbuf; /*buffer capable of hold the entire input */
+};
+
+kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem)
+{
+ KISS_FFT_ALIGN_CHECK(mem)
+
+ kiss_fftnd_cfg st = NULL;
+ int i;
+ int dimprod=1;
+ size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state));
+ char * ptr = NULL;
+
+ for (i=0;i<ndims;++i) {
+ size_t sublen=0;
+ kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen);
+ memneeded += sublen; /* st->states[i] */
+ dimprod *= dims[i];
+ }
+ memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims);/* st->dims */
+ memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims);/* st->states */
+ memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod); /* st->tmpbuf */
+
+ if (lenmem == NULL) {/* allocate for the caller*/
+ ptr = (char *) malloc (memneeded);
+ } else { /* initialize supplied buffer if big enough */
+ if (*lenmem >= memneeded)
+ ptr = (char *) mem;
+ *lenmem = memneeded; /*tell caller how big struct is (or would be) */
+ }
+ if (!ptr)
+ return NULL; /*malloc failed or buffer too small */
+
+ st = (kiss_fftnd_cfg) ptr;
+ st->dimprod = dimprod;
+ st->ndims = ndims;
+ ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state));
+
+ st->states = (kiss_fft_cfg *)ptr;
+ ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims);
+
+ st->dims = (int*)ptr;
+ ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims);
+
+ st->tmpbuf = (kiss_fft_cpx*)ptr;
+ ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod);
+
+ for (i=0;i<ndims;++i) {
+ size_t len;
+ st->dims[i] = dims[i];
+ kiss_fft_alloc (st->dims[i], inverse_fft, NULL, &len);
+ st->states[i] = kiss_fft_alloc (st->dims[i], inverse_fft, ptr,&len);
+ ptr += len;
+ }
+ /*
+Hi there!
+
+If you're looking at this particular code, it probably means you've got a brain-dead bounds checker
+that thinks the above code overwrites the end of the array.
+
+It doesn't.
+
+-- Mark
+
+P.S.
+The below code might give you some warm fuzzies and help convince you.
+ */
+ if ( ptr - (char*)st != (int)memneeded ) {
+ fprintf(stderr,
+ "################################################################################\n"
+ "Internal error! Memory allocation miscalculation\n"
+ "################################################################################\n"
+ );
+ }
+ return st;
+}
+
+/*
+ This works by tackling one dimension at a time.
+
+ In effect,
+ Each stage starts out by reshaping the matrix into a DixSi 2d matrix.
+ A Di-sized fft is taken of each column, transposing the matrix as it goes.
+
+Here's a 3-d example:
+Take a 2x3x4 matrix, laid out in memory as a contiguous buffer
+ [ [ [ a b c d ] [ e f g h ] [ i j k l ] ]
+ [ [ m n o p ] [ q r s t ] [ u v w x ] ] ]
+
+Stage 0 ( D=2): treat the buffer as a 2x12 matrix
+ [ [a b ... k l]
+ [m n ... w x] ]
+
+ FFT each column with size 2.
+ Transpose the matrix at the same time using kiss_fft_stride.
+
+ [ [ a+m a-m ]
+ [ b+n b-n]
+ ...
+ [ k+w k-w ]
+ [ l+x l-x ] ]
+
+ Note fft([x y]) == [x+y x-y]
+
+Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix,
+ [ [ a+m a-m b+n b-n c+o c-o d+p d-p ]
+ [ e+q e-q f+r f-r g+s g-s h+t h-t ]
+ [ i+u i-u j+v j-v k+w k-w l+x l-x ] ]
+
+ And perform FFTs (size=3) on each of the columns as above, transposing
+ the matrix as it goes. The output of stage 1 is
+ (Legend: ap = [ a+m e+q i+u ]
+ am = [ a-m e-q i-u ] )
+
+ [ [ sum(ap) fft(ap)[0] fft(ap)[1] ]
+ [ sum(am) fft(am)[0] fft(am)[1] ]
+ [ sum(bp) fft(bp)[0] fft(bp)[1] ]
+ [ sum(bm) fft(bm)[0] fft(bm)[1] ]
+ [ sum(cp) fft(cp)[0] fft(cp)[1] ]
+ [ sum(cm) fft(cm)[0] fft(cm)[1] ]
+ [ sum(dp) fft(dp)[0] fft(dp)[1] ]
+ [ sum(dm) fft(dm)[0] fft(dm)[1] ] ]
+
+Stage 2 ( D=4) treats this buffer as a 4*6 matrix,
+ [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ]
+ [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ]
+ [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ]
+ [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ] ]
+
+ Then FFTs each column, transposing as it goes.
+
+ The resulting matrix is the 3d FFT of the 2x3x4 input matrix.
+
+ Note as a sanity check that the first element of the final
+ stage's output (DC term) is
+ sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] )
+ , i.e. the summation of all 24 input elements.
+
+*/
+void kiss_fftnd(kiss_fftnd_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+ int i,k;
+ const kiss_fft_cpx * bufin=fin;
+ kiss_fft_cpx * bufout;
+
+ /*arrange it so the last bufout == fout*/
+ if ( st->ndims & 1 ) {
+ bufout = fout;
+ if (fin==fout) {
+ memcpy( st->tmpbuf, fin, sizeof(kiss_fft_cpx) * st->dimprod );
+ bufin = st->tmpbuf;
+ }
+ }else
+ bufout = st->tmpbuf;
+
+ for ( k=0; k < st->ndims; ++k) {
+ int curdim = st->dims[k];
+ int stride = st->dimprod / curdim;
+
+ for ( i=0 ; i<stride ; ++i )
+ kiss_fft_stride( st->states[k], bufin+i , bufout+i*curdim, stride );
+
+ /*toggle back and forth between the two buffers*/
+ if (bufout == st->tmpbuf){
+ bufout = fout;
+ bufin = st->tmpbuf;
+ }else{
+ bufout = st->tmpbuf;
+ bufin = fout;
+ }
+ }
+}
diff --git a/kiss/kiss_fftnd.h b/kiss/kiss_fftnd.h
new file mode 100644
index 0000000..956ba94
--- /dev/null
+++ b/kiss/kiss_fftnd.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef KISS_FFTND_H
+#define KISS_FFTND_H
+
+#include "kiss_fft.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct kiss_fftnd_state * kiss_fftnd_cfg;
+
+kiss_fftnd_cfg KISS_FFT_API kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem);
+void KISS_FFT_API kiss_fftnd(kiss_fftnd_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/kiss/kiss_fftndr.c b/kiss/kiss_fftndr.c
new file mode 100644
index 0000000..e979d03
--- /dev/null
+++ b/kiss/kiss_fftndr.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#include "kiss_fftndr.h"
+#include "_kiss_fft_guts.h"
+#define MAX(x,y) ( ( (x)<(y) )?(y):(x) )
+
+struct kiss_fftndr_state
+{
+ int dimReal;
+ int dimOther;
+ kiss_fftr_cfg cfg_r;
+ kiss_fftnd_cfg cfg_nd;
+ void * tmpbuf;
+};
+
+static int prod(const int *dims, int ndims)
+{
+ int x=1;
+ while (ndims--)
+ x *= *dims++;
+ return x;
+}
+
+kiss_fftndr_cfg kiss_fftndr_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem)
+{
+ KISS_FFT_ALIGN_CHECK(mem)
+
+ kiss_fftndr_cfg st = NULL;
+ size_t nr=0 , nd=0,ntmp=0;
+ int dimReal = dims[ndims-1];
+ int dimOther = prod(dims,ndims-1);
+ size_t memneeded;
+ char * ptr = NULL;
+
+ (void)kiss_fftr_alloc(dimReal,inverse_fft,NULL,&nr);
+ (void)kiss_fftnd_alloc(dims,ndims-1,inverse_fft,NULL,&nd);
+ ntmp =
+ MAX( 2*dimOther , dimReal+2) * sizeof(kiss_fft_scalar) // freq buffer for one pass
+ + dimOther*(dimReal+2) * sizeof(kiss_fft_scalar); // large enough to hold entire input in case of in-place
+
+ memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof( struct kiss_fftndr_state )) + KISS_FFT_ALIGN_SIZE_UP(nr) + KISS_FFT_ALIGN_SIZE_UP(nd) + KISS_FFT_ALIGN_SIZE_UP(ntmp);
+
+ if (lenmem==NULL) {
+ ptr = (char*) malloc(memneeded);
+ }else{
+ if (*lenmem >= memneeded)
+ ptr = (char *)mem;
+ *lenmem = memneeded;
+ }
+ if (ptr==NULL)
+ return NULL;
+
+ st = (kiss_fftndr_cfg) ptr;
+ memset( st , 0 , memneeded);
+ ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftndr_state));
+
+ st->dimReal = dimReal;
+ st->dimOther = dimOther;
+ st->cfg_r = kiss_fftr_alloc( dimReal,inverse_fft,ptr,&nr);
+ ptr += KISS_FFT_ALIGN_SIZE_UP(nr);
+ st->cfg_nd = kiss_fftnd_alloc(dims,ndims-1,inverse_fft, ptr,&nd);
+ ptr += KISS_FFT_ALIGN_SIZE_UP(nd);
+ st->tmpbuf = ptr;
+
+ return st;
+}
+
+void kiss_fftndr(kiss_fftndr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
+{
+ int k1,k2;
+ int dimReal = st->dimReal;
+ int dimOther = st->dimOther;
+ int nrbins = dimReal/2+1;
+
+ kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf;
+ kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther);
+
+ // timedata is N0 x N1 x ... x Nk real
+
+ // take a real chunk of data, fft it and place the output at correct intervals
+ for (k1=0;k1<dimOther;++k1) {
+ kiss_fftr( st->cfg_r, timedata + k1*dimReal , tmp1 ); // tmp1 now holds nrbins complex points
+ for (k2=0;k2<nrbins;++k2)
+ tmp2[ k2*dimOther+k1 ] = tmp1[k2];
+ }
+
+ for (k2=0;k2<nrbins;++k2) {
+ kiss_fftnd(st->cfg_nd, tmp2+k2*dimOther, tmp1); // tmp1 now holds dimOther complex points
+ for (k1=0;k1<dimOther;++k1)
+ freqdata[ k1*(nrbins) + k2] = tmp1[k1];
+ }
+}
+
+void kiss_fftndri(kiss_fftndr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
+{
+ int k1,k2;
+ int dimReal = st->dimReal;
+ int dimOther = st->dimOther;
+ int nrbins = dimReal/2+1;
+ kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf;
+ kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther);
+
+ for (k2=0;k2<nrbins;++k2) {
+ for (k1=0;k1<dimOther;++k1)
+ tmp1[k1] = freqdata[ k1*(nrbins) + k2 ];
+ kiss_fftnd(st->cfg_nd, tmp1, tmp2+k2*dimOther);
+ }
+
+ for (k1=0;k1<dimOther;++k1) {
+ for (k2=0;k2<nrbins;++k2)
+ tmp1[k2] = tmp2[ k2*dimOther+k1 ];
+ kiss_fftri( st->cfg_r,tmp1,timedata + k1*dimReal);
+ }
+}
diff --git a/kiss/kiss_fftndr.h b/kiss/kiss_fftndr.h
new file mode 100644
index 0000000..0d56a1f
--- /dev/null
+++ b/kiss/kiss_fftndr.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef KISS_NDR_H
+#define KISS_NDR_H
+
+#include "kiss_fft.h"
+#include "kiss_fftr.h"
+#include "kiss_fftnd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct kiss_fftndr_state *kiss_fftndr_cfg;
+
+
+kiss_fftndr_cfg KISS_FFT_API kiss_fftndr_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem);
+/*
+ dims[0] must be even
+
+ If you don't care to allocate space, use mem = lenmem = NULL
+*/
+
+
+void KISS_FFT_API kiss_fftndr(
+ kiss_fftndr_cfg cfg,
+ const kiss_fft_scalar *timedata,
+ kiss_fft_cpx *freqdata);
+/*
+ input timedata has dims[0] X dims[1] X ... X dims[ndims-1] scalar points
+ output freqdata has dims[0] X dims[1] X ... X dims[ndims-1]/2+1 complex points
+*/
+
+void KISS_FFT_API kiss_fftndri(
+ kiss_fftndr_cfg cfg,
+ const kiss_fft_cpx *freqdata,
+ kiss_fft_scalar *timedata);
+/*
+ input and output dimensions are the exact opposite of kiss_fftndr
+*/
+
+
+#define kiss_fftndr_free free
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/kiss/kiss_fftr.c b/kiss/kiss_fftr.c
new file mode 100644
index 0000000..778a9a6
--- /dev/null
+++ b/kiss/kiss_fftr.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#include "kiss_fftr.h"
+#include "_kiss_fft_guts.h"
+
+struct kiss_fftr_state{
+ kiss_fft_cfg substate;
+ kiss_fft_cpx * tmpbuf;
+ kiss_fft_cpx * super_twiddles;
+#ifdef USE_SIMD
+ void * pad;
+#endif
+};
+
+kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
+{
+ KISS_FFT_ALIGN_CHECK(mem)
+
+ int i;
+ kiss_fftr_cfg st = NULL;
+ size_t subsize = 0, memneeded;
+
+ if (nfft & 1) {
+ KISS_FFT_ERROR("Real FFT optimization must be even.");
+ return NULL;
+ }
+ nfft >>= 1;
+
+ kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
+ memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2);
+
+ if (lenmem == NULL) {
+ st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
+ } else {
+ if (*lenmem >= memneeded)
+ st = (kiss_fftr_cfg) mem;
+ *lenmem = memneeded;
+ }
+ if (!st)
+ return NULL;
+
+ st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
+ st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
+ st->super_twiddles = st->tmpbuf + nfft;
+ kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
+
+ for (i = 0; i < nfft/2; ++i) {
+ double phase =
+ -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5);
+ if (inverse_fft)
+ phase *= -1;
+ kf_cexp (st->super_twiddles+i,phase);
+ }
+ return st;
+}
+
+void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
+{
+ /* input buffer timedata is stored row-wise */
+ int k,ncfft;
+ kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
+
+ if ( st->substate->inverse) {
+ KISS_FFT_ERROR("kiss fft usage error: improper alloc");
+ return;/* The caller did not call the correct function */
+ }
+
+ ncfft = st->substate->nfft;
+
+ /*perform the parallel fft of two real signals packed in real,imag*/
+ kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
+ /* The real part of the DC element of the frequency spectrum in st->tmpbuf
+ * contains the sum of the even-numbered elements of the input time sequence
+ * The imag part is the sum of the odd-numbered elements
+ *
+ * The sum of tdc.r and tdc.i is the sum of the input time sequence.
+ * yielding DC of input time sequence
+ * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
+ * yielding Nyquist bin of input time sequence
+ */
+
+ tdc.r = st->tmpbuf[0].r;
+ tdc.i = st->tmpbuf[0].i;
+ C_FIXDIV(tdc,2);
+ CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
+ CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
+ freqdata[0].r = tdc.r + tdc.i;
+ freqdata[ncfft].r = tdc.r - tdc.i;
+#ifdef USE_SIMD
+ freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
+#else
+ freqdata[ncfft].i = freqdata[0].i = 0;
+#endif
+
+ for ( k=1;k <= ncfft/2 ; ++k ) {
+ fpk = st->tmpbuf[k];
+ fpnk.r = st->tmpbuf[ncfft-k].r;
+ fpnk.i = - st->tmpbuf[ncfft-k].i;
+ C_FIXDIV(fpk,2);
+ C_FIXDIV(fpnk,2);
+
+ C_ADD( f1k, fpk , fpnk );
+ C_SUB( f2k, fpk , fpnk );
+ C_MUL( tw , f2k , st->super_twiddles[k-1]);
+
+ freqdata[k].r = HALF_OF(f1k.r + tw.r);
+ freqdata[k].i = HALF_OF(f1k.i + tw.i);
+ freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
+ freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
+ }
+}
+
+void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
+{
+ /* input buffer timedata is stored row-wise */
+ int k, ncfft;
+
+ if (st->substate->inverse == 0) {
+ KISS_FFT_ERROR("kiss fft usage error: improper alloc");
+ return;/* The caller did not call the correct function */
+ }
+
+ ncfft = st->substate->nfft;
+
+ st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
+ st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
+ C_FIXDIV(st->tmpbuf[0],2);
+
+ for (k = 1; k <= ncfft / 2; ++k) {
+ kiss_fft_cpx fk, fnkc, fek, fok, tmp;
+ fk = freqdata[k];
+ fnkc.r = freqdata[ncfft - k].r;
+ fnkc.i = -freqdata[ncfft - k].i;
+ C_FIXDIV( fk , 2 );
+ C_FIXDIV( fnkc , 2 );
+
+ C_ADD (fek, fk, fnkc);
+ C_SUB (tmp, fk, fnkc);
+ C_MUL (fok, tmp, st->super_twiddles[k-1]);
+ C_ADD (st->tmpbuf[k], fek, fok);
+ C_SUB (st->tmpbuf[ncfft - k], fek, fok);
+#ifdef USE_SIMD
+ st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
+#else
+ st->tmpbuf[ncfft - k].i *= -1;
+#endif
+ }
+ kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
+}
diff --git a/kiss/kiss_fftr.h b/kiss/kiss_fftr.h
new file mode 100644
index 0000000..7fd73d2
--- /dev/null
+++ b/kiss/kiss_fftr.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef KISS_FTR_H
+#define KISS_FTR_H
+
+#include "kiss_fft.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+
+ Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
+
+
+
+ */
+
+typedef struct kiss_fftr_state *kiss_fftr_cfg;
+
+
+kiss_fftr_cfg KISS_FFT_API kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
+/*
+ nfft must be even
+
+ If you don't care to allocate space, use mem = lenmem = NULL
+*/
+
+
+void KISS_FFT_API kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata);
+/*
+ input timedata has nfft scalar points
+ output freqdata has nfft/2+1 complex points
+*/
+
+void KISS_FFT_API kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata);
+/*
+ input freqdata has nfft/2+1 complex points
+ output timedata has nfft scalar points
+*/
+
+#define kiss_fftr_free KISS_FFT_FREE
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4
index 43087b2..8edf515 100644
--- a/m4/ax_cxx_compile_stdcxx.m4
+++ b/m4/ax_cxx_compile_stdcxx.m4
@@ -10,13 +10,13 @@
#
# Check for baseline language coverage in the compiler for the specified
# version of the C++ standard. If necessary, add switches to CXX and
-# CXXCPP to enable support. VERSION may be '11' (for the C++11 standard)
-# or '14' (for the C++14 standard).
+# CXXCPP to enable support. VERSION may be '11', '14', '17', or '20' for
+# the respective C++ standard version.
#
# The second argument, if specified, indicates whether you insist on an
# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g.
# -std=c++11). If neither is specified, you get whatever works, with
-# preference for an extended mode.
+# preference for no added switch, and then for an extended mode.
#
# The third argument, if specified 'mandatory' or if left unspecified,
# indicates that baseline support for the specified C++ standard is
@@ -35,13 +35,15 @@
# Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu>
# Copyright (c) 2016, 2018 Krzesimir Nowak <qdlacz@gmail.com>
# Copyright (c) 2019 Enji Cooper <yaneurabeya@gmail.com>
+# Copyright (c) 2020 Jason Merrill <jason@redhat.com>
+# Copyright (c) 2021 Jörn Heusipp <osmanx@problemloesungsmaschine.de>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
-#serial 11
+#serial 18
dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro
dnl (serial version number 13).
@@ -50,6 +52,7 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
m4_if([$1], [11], [ax_cxx_compile_alternatives="11 0x"],
[$1], [14], [ax_cxx_compile_alternatives="14 1y"],
[$1], [17], [ax_cxx_compile_alternatives="17 1z"],
+ [$1], [20], [ax_cxx_compile_alternatives="20"],
[m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl
m4_if([$2], [], [],
[$2], [ext], [],
@@ -62,6 +65,16 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
AC_LANG_PUSH([C++])dnl
ac_success=no
+ m4_if([$2], [], [dnl
+ AC_CACHE_CHECK(whether $CXX supports C++$1 features by default,
+ ax_cv_cxx_compile_cxx$1,
+ [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
+ [ax_cv_cxx_compile_cxx$1=yes],
+ [ax_cv_cxx_compile_cxx$1=no])])
+ if test x$ax_cv_cxx_compile_cxx$1 = xyes; then
+ ac_success=yes
+ fi])
+
m4_if([$2], [noext], [], [dnl
if test x$ac_success = xno; then
for alternative in ${ax_cxx_compile_alternatives}; do
@@ -91,9 +104,18 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
dnl HP's aCC needs +std=c++11 according to:
dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf
dnl Cray's crayCC needs "-h std=c++11"
+ dnl MSVC needs -std:c++NN for C++17 and later (default is C++14)
for alternative in ${ax_cxx_compile_alternatives}; do
- for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do
- cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
+ for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}" MSVC; do
+ if test x"$switch" = xMSVC; then
+ dnl AS_TR_SH maps both `:` and `=` to `_` so -std:c++17 would collide
+ dnl with -std=c++17. We suffix the cache variable name with _MSVC to
+ dnl avoid this.
+ switch=-std:c++${alternative}
+ cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_${switch}_MSVC])
+ else
+ cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
+ fi
AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
$cachevar,
[ac_save_CXX="$CXX"
@@ -140,7 +162,6 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11],
_AX_CXX_COMPILE_STDCXX_testbody_new_in_11
)
-
dnl Test body for checking C++14 support
m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14],
@@ -148,12 +169,24 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14],
_AX_CXX_COMPILE_STDCXX_testbody_new_in_14
)
+dnl Test body for checking C++17 support
+
m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17],
_AX_CXX_COMPILE_STDCXX_testbody_new_in_11
_AX_CXX_COMPILE_STDCXX_testbody_new_in_14
_AX_CXX_COMPILE_STDCXX_testbody_new_in_17
)
+dnl Test body for checking C++20 support
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_20],
+ _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
+ _AX_CXX_COMPILE_STDCXX_testbody_new_in_14
+ _AX_CXX_COMPILE_STDCXX_testbody_new_in_17
+ _AX_CXX_COMPILE_STDCXX_testbody_new_in_20
+)
+
+
dnl Tests for new features in C++11
m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[
@@ -165,7 +198,11 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[
#error "This is not a C++ compiler"
-#elif __cplusplus < 201103L
+// MSVC always sets __cplusplus to 199711L in older versions; newer versions
+// only set it correctly if /Zc:__cplusplus is specified as well as a
+// /std:c++NN switch:
+// https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/
+#elif __cplusplus < 201103L && !defined _MSC_VER
#error "This is not a C++11 compiler"
@@ -456,7 +493,7 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[
#error "This is not a C++ compiler"
-#elif __cplusplus < 201402L
+#elif __cplusplus < 201402L && !defined _MSC_VER
#error "This is not a C++14 compiler"
@@ -580,7 +617,7 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[
#error "This is not a C++ compiler"
-#elif __cplusplus < 201703L
+#elif __cplusplus < 201703L && !defined _MSC_VER
#error "This is not a C++17 compiler"
@@ -946,6 +983,36 @@ namespace cxx17
} // namespace cxx17
-#endif // __cplusplus < 201703L
+#endif // __cplusplus < 201703L && !defined _MSC_VER
+
+]])
+
+
+dnl Tests for new features in C++20
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_20], [[
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 202002L && !defined _MSC_VER
+
+#error "This is not a C++20 compiler"
+
+#else
+
+#include <version>
+
+namespace cxx20
+{
+
+// As C++20 supports feature test macros in the standard, there is no
+// immediate need to actually test for feature availability on the
+// Autoconf side.
+
+} // namespace cxx20
+
+#endif // __cplusplus < 202002L && !defined _MSC_VER
]])
diff --git a/src/Buffer.h b/src/Buffer.h
index af52e93..2c2a65e 100644
--- a/src/Buffer.h
+++ b/src/Buffer.h
@@ -33,9 +33,17 @@
#include <vector>
#include <memory>
+#include <complex>
+#include "fpm/fixed.hpp"
+
+typedef std::complex<float> complexf;
+
+using fixed_16 = fpm::fixed<std::int16_t, std::int32_t, 14>;
+typedef std::complex<fixed_16> complexfix;
+typedef std::complex<fpm::fixed_16_16> complexfix_wide;
/* Buffer is a container for a byte array, which is memory-aligned
- * to 32 bytes for SSE performance.
+ * to 32 bytes for SIMD performance.
*
* The allocation/freeing of the data is handled internally.
*/
diff --git a/src/CicEqualizer.h b/src/CicEqualizer.h
index 792da02..4510d0c 100644
--- a/src/CicEqualizer.h
+++ b/src/CicEqualizer.h
@@ -25,18 +25,10 @@
# include <config.h>
#endif
-
#include "ModPlugin.h"
#include <vector>
#include <sys/types.h>
-#include <complex>
-#ifdef __SSE__
-# include <xmmintrin.h>
-#endif
-
-
-typedef std::complex<float> complexf;
class CicEqualizer : public ModCodec
{
diff --git a/src/ConfigParser.cpp b/src/ConfigParser.cpp
index fb2c1a1..c92a520 100644
--- a/src/ConfigParser.cpp
+++ b/src/ConfigParser.cpp
@@ -63,6 +63,27 @@ static GainMode parse_gainmode(const std::string &gainMode_setting)
throw std::runtime_error("Configuration error");
}
+static FFTEngine parse_fft_engine(const std::string &fft_engine_setting)
+{
+ string fft_engine_minuscule(fft_engine_setting);
+ std::transform(fft_engine_minuscule.begin(), fft_engine_minuscule.end(),
+ fft_engine_minuscule.begin(), ::tolower);
+
+ if (fft_engine_minuscule == "fftw") {
+ return FFTEngine::FFTW;
+ }
+ else if (fft_engine_minuscule == "kiss") {
+ return FFTEngine::KISS;
+ }
+ else if (fft_engine_minuscule == "dexter") {
+ return FFTEngine::DEXTER;
+ }
+
+ cerr << "Modulator fft_engine setting '" << fft_engine_setting <<
+ "' not recognised." << endl;
+ throw std::runtime_error("Configuration error");
+}
+
static void parse_configfile(
const std::string& configuration_file,
mod_settings_t& mod_settings)
@@ -156,6 +177,9 @@ static void parse_configfile(
mod_settings.showProcessTime);
// modulator parameters:
+ const string fft_engine_setting = pt.Get("modulator.fft_engine", "fftw");
+ mod_settings.fftEngine = parse_fft_engine(fft_engine_setting);
+
const string gainMode_setting = pt.Get("modulator.gainmode", "var");
mod_settings.gainMode = parse_gainmode(gainMode_setting);
mod_settings.gainmodeVariance = pt.GetReal("modulator.normalise_variance",
diff --git a/src/ConfigParser.h b/src/ConfigParser.h
index ae76dee..3bacfdd 100644
--- a/src/ConfigParser.h
+++ b/src/ConfigParser.h
@@ -36,6 +36,12 @@
#include "TII.h"
#include "output/SDRDevice.h"
+enum class FFTEngine {
+ FFTW, // floating point in software
+ KISS, // fixed-point in software
+ DEXTER // fixed-point in FPGA
+};
+
struct mod_settings_t {
std::string startupCheck;
@@ -51,6 +57,8 @@ struct mod_settings_t {
bool useLimeOutput = false;
bool useBladeRFOutput = false;
+ FFTEngine fftEngine = FFTEngine::FFTW;
+
size_t outputRate = 2048000;
size_t clockRate = 0;
unsigned dabMode = 1;
diff --git a/src/DabMod.cpp b/src/DabMod.cpp
index 3b072c1..7866818 100644
--- a/src/DabMod.cpp
+++ b/src/DabMod.cpp
@@ -31,10 +31,8 @@
#endif
#include <memory>
-#include <complex>
#include <string>
#include <iostream>
-#include <iomanip>
#include <cstdlib>
#include <stdexcept>
#include <cstdio>
@@ -51,7 +49,6 @@
#include "Utils.h"
#include "Log.h"
#include "DabModulator.h"
-#include "InputMemory.h"
#include "OutputFile.h"
#include "FormatConverter.h"
#include "FrameMultiplexer.h"
@@ -75,16 +72,16 @@
* samples can have peaks up to about 48000. The value of 50000
* should guarantee that with a digital gain of 1.0, UHD never clips
* our samples.
+ *
+ * This only applies when fixed_point == false.
*/
static const float normalise_factor = 50000.0f;
-//Empirical normalisation factors used to normalise the samples to amplitude 1.
+// Empirical normalisation factors used to normalise the samples to amplitude 1.
static const float normalise_factor_file_fix = 81000.0f;
static const float normalise_factor_file_var = 46000.0f;
static const float normalise_factor_file_max = 46000.0f;
-typedef std::complex<float> complexf;
-
using namespace std;
volatile sig_atomic_t running = 1;
@@ -255,7 +252,11 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s)
shared_ptr<ModOutput> output;
if (s.useFileOutput) {
- if (s.fileOutputFormat == "complexf") {
+ if (s.fftEngine != FFTEngine::FFTW) {
+ // Intentionally ignore fileOutputFormat, it is always sc16
+ output = make_shared<OutputFile>(s.outputName, s.fileOutputShowMetadata);
+ }
+ else if (s.fileOutputFormat == "complexf") {
output = make_shared<OutputFile>(s.outputName, s.fileOutputShowMetadata);
}
else if (s.fileOutputFormat == "complexf_normalised") {
@@ -291,6 +292,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s)
else if (s.useUHDOutput) {
s.normalise = 1.0f / normalise_factor;
s.sdr_device_config.sampleRate = s.outputRate;
+ s.sdr_device_config.fixedPoint = (s.fftEngine != FFTEngine::FFTW);
auto uhddevice = make_shared<Output::UHD>(s.sdr_device_config);
output = make_shared<Output::SDR>(s.sdr_device_config, uhddevice);
rcs.enrol((Output::SDR*)output.get());
@@ -301,6 +303,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s)
/* We normalise the same way as for the UHD output */
s.normalise = 1.0f / normalise_factor;
s.sdr_device_config.sampleRate = s.outputRate;
+ if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("soapy fixed_point unsupported");
auto soapydevice = make_shared<Output::Soapy>(s.sdr_device_config);
output = make_shared<Output::SDR>(s.sdr_device_config, soapydevice);
rcs.enrol((Output::SDR*)output.get());
@@ -320,6 +323,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s)
else if (s.useLimeOutput) {
/* We normalise the same way as for the UHD output */
s.normalise = 1.0f / normalise_factor;
+ if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("limesdr fixed_point unsupported");
s.sdr_device_config.sampleRate = s.outputRate;
auto limedevice = make_shared<Output::Lime>(s.sdr_device_config);
output = make_shared<Output::SDR>(s.sdr_device_config, limedevice);
@@ -330,6 +334,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s)
else if (s.useBladeRFOutput) {
/* We normalise specifically for the BladeRF output : range [-2048; 2047] */
s.normalise = 2047.0f / normalise_factor;
+ if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("bladerf fixed_point unsupported");
s.sdr_device_config.sampleRate = s.outputRate;
auto bladerfdevice = make_shared<Output::BladeRF>(s.sdr_device_config);
output = make_shared<Output::SDR>(s.sdr_device_config, bladerfdevice);
@@ -420,7 +425,8 @@ int launch_modulator(int argc, char* argv[])
ModulatorData m;
rcs.enrol(&m);
- {
+ // Neither KISS FFT used for fixedpoint nor the FFT Accelerator used for DEXTER need planning.
+ if (mod_settings.fftEngine == FFTEngine::FFTW) {
// This is mostly useful on ARM systems where FFTW planning takes some time. If we do it here
// it will be done before the modulator starts up
etiLog.level(debug) << "Running FFTW planning...";
@@ -442,7 +448,14 @@ int launch_modulator(int argc, char* argv[])
}
std::string output_format;
- if (mod_settings.useFileOutput and
+ if (mod_settings.fftEngine == FFTEngine::KISS) {
+ output_format = ""; //fixed point is native sc16, no converter needed
+ }
+ else if (mod_settings.fftEngine == FFTEngine::DEXTER) {
+ output_format = "s16"; // FPGA FFT Engine outputs s32
+ }
+ // else FFTW, i.e. floating point
+ else if (mod_settings.useFileOutput and
(mod_settings.fileOutputFormat == "s8" or
mod_settings.fileOutputFormat == "u8" or
mod_settings.fileOutputFormat == "s16")) {
diff --git a/src/DabModulator.cpp b/src/DabModulator.cpp
index 4a29132..5f7aaf6 100644
--- a/src/DabModulator.cpp
+++ b/src/DabModulator.cpp
@@ -3,7 +3,7 @@
Her Majesty the Queen in Right of Canada (Communications Research
Center Canada)
- Copyright (C) 2023
+ Copyright (C) 2024
Matthias P. Braendli, matthias.braendli@mpb.li
http://opendigitalradio.org
@@ -54,7 +54,6 @@
#include "SignalMultiplexer.h"
#include "TII.h"
#include "TimeInterleaver.h"
-#include "TimestampDecoder.h"
using namespace std;
@@ -142,14 +141,15 @@ int DabModulator::process(Buffer* dataOut)
auto cifMux = make_shared<FrameMultiplexer>(m_etiSource);
auto cifPart = make_shared<BlockPartitioner>(mode);
- auto cifMap = make_shared<QpskSymbolMapper>(m_nbCarriers);
- auto cifRef = make_shared<PhaseReference>(mode);
- auto cifFreq = make_shared<FrequencyInterleaver>(mode);
- auto cifDiff = make_shared<DifferentialModulator>(m_nbCarriers);
+ const bool fixedPoint = m_settings.fftEngine != FFTEngine::FFTW;
+ auto cifMap = make_shared<QpskSymbolMapper>(m_nbCarriers, fixedPoint);
+ auto cifRef = make_shared<PhaseReference>(mode, fixedPoint);
+ auto cifFreq = make_shared<FrequencyInterleaver>(mode, fixedPoint);
+ auto cifDiff = make_shared<DifferentialModulator>(m_nbCarriers, fixedPoint);
- auto cifNull = make_shared<NullSymbol>(m_nbCarriers);
- auto cifSig = make_shared<SignalMultiplexer>(
- (1 + m_nbSymbols) * m_nbCarriers * sizeof(complexf));
+ auto cifNull = make_shared<NullSymbol>(m_nbCarriers,
+ fixedPoint ? sizeof(complexfix) : sizeof(complexf));
+ auto cifSig = make_shared<SignalMultiplexer>();
// TODO this needs a review
bool useCicEq = false;
@@ -180,46 +180,79 @@ int DabModulator::process(Buffer* dataOut)
try {
tii = make_shared<TII>(
m_settings.dabMode,
- m_settings.tiiConfig);
+ m_settings.tiiConfig,
+ fixedPoint);
rcs.enrol(tii.get());
- tiiRef = make_shared<PhaseReference>(mode);
+ tiiRef = make_shared<PhaseReference>(mode, fixedPoint);
}
catch (const TIIError& e) {
etiLog.level(error) << "Could not initialise TII: " << e.what();
}
- auto cifOfdm = make_shared<OfdmGenerator>(
- (1 + m_nbSymbols),
- m_nbCarriers,
- m_spacing,
- m_settings.enableCfr,
- m_settings.cfrClip,
- m_settings.cfrErrorClip);
+ shared_ptr<ModPlugin> cifOfdm;
+
+ switch (m_settings.fftEngine) {
+ case FFTEngine::FFTW:
+ {
+ auto ofdm = make_shared<OfdmGeneratorCF32>(
+ (1 + m_nbSymbols),
+ m_nbCarriers,
+ m_spacing,
+ m_settings.enableCfr,
+ m_settings.cfrClip,
+ m_settings.cfrErrorClip);
+ rcs.enrol(ofdm.get());
+ cifOfdm = ofdm;
+ }
+ break;
+ case FFTEngine::KISS:
+ cifOfdm = make_shared<OfdmGeneratorFixed>(
+ (1 + m_nbSymbols),
+ m_nbCarriers,
+ m_spacing);
+ break;
+ case FFTEngine::DEXTER:
+#if defined(HAVE_DEXTER)
+ cifOfdm = make_shared<OfdmGeneratorDEXTER>(
+ (1 + m_nbSymbols),
+ m_nbCarriers,
+ m_spacing);
+#else
+ throw std::runtime_error("Cannot use DEXTER fft engine without --enable-dexter");
+#endif
+ break;
+ }
- rcs.enrol(cifOfdm.get());
+ shared_ptr<GainControl> cifGain;
- auto cifGain = make_shared<GainControl>(
- m_spacing,
- m_settings.gainMode,
- m_settings.digitalgain,
- m_settings.normalise,
- m_settings.gainmodeVariance);
+ if (not fixedPoint) {
+ cifGain = make_shared<GainControl>(
+ m_spacing,
+ m_settings.gainMode,
+ m_settings.digitalgain,
+ m_settings.normalise,
+ m_settings.gainmodeVariance);
- rcs.enrol(cifGain.get());
+ rcs.enrol(cifGain.get());
+ }
auto cifGuard = make_shared<GuardIntervalInserter>(
m_nbSymbols, m_spacing, m_nullSize, m_symSize,
- m_settings.ofdmWindowOverlap);
+ m_settings.ofdmWindowOverlap, m_settings.fftEngine);
rcs.enrol(cifGuard.get());
shared_ptr<FIRFilter> cifFilter;
if (not m_settings.filterTapsFilename.empty()) {
+ if (fixedPoint) throw std::runtime_error("fixed point doesn't support fir filter");
+
cifFilter = make_shared<FIRFilter>(m_settings.filterTapsFilename);
rcs.enrol(cifFilter.get());
}
shared_ptr<MemlessPoly> cifPoly;
if (not m_settings.polyCoefFilename.empty()) {
+ if (fixedPoint) throw std::runtime_error("fixed point doesn't support predistortion");
+
cifPoly = make_shared<MemlessPoly>(m_settings.polyCoefFilename,
m_settings.polyNumThreads);
rcs.enrol(cifPoly.get());
@@ -227,15 +260,21 @@ int DabModulator::process(Buffer* dataOut)
shared_ptr<Resampler> cifRes;
if (m_settings.outputRate != 2048000) {
+ if (fixedPoint) throw std::runtime_error("fixed point doesn't support resampler");
+
cifRes = make_shared<Resampler>(
2048000,
m_settings.outputRate,
m_spacing);
}
- if (not m_format.empty()) {
- m_formatConverter = make_shared<FormatConverter>(m_format);
+ if (m_settings.fftEngine == FFTEngine::FFTW and not m_format.empty()) {
+ m_formatConverter = make_shared<FormatConverter>(false, m_format);
+ }
+ else if (m_settings.fftEngine == FFTEngine::DEXTER) {
+ m_formatConverter = make_shared<FormatConverter>(true, m_format);
}
+ // KISS is already in s16
m_output = make_shared<OutputMemory>(dataOut);
diff --git a/src/DabModulator.h b/src/DabModulator.h
index 093a782..82782cd 100644
--- a/src/DabModulator.h
+++ b/src/DabModulator.h
@@ -40,12 +40,8 @@
#include "EtiReader.h"
#include "Flowgraph.h"
#include "FormatConverter.h"
-#include "GainControl.h"
#include "OutputMemory.h"
#include "RemoteControl.h"
-#include "Log.h"
-#include "TII.h"
-
class DabModulator : public ModInput, public ModMetadata, public RemoteControllable
{
diff --git a/src/DifferentialModulator.cpp b/src/DifferentialModulator.cpp
index 97a7998..21b4c3e 100644
--- a/src/DifferentialModulator.cpp
+++ b/src/DifferentialModulator.cpp
@@ -22,17 +22,14 @@
#include "DifferentialModulator.h"
#include "PcDebug.h"
-#include <stdio.h>
+#include <cstdio>
#include <stdexcept>
-#include <complex>
-#include <string.h>
+#include <cstring>
-typedef std::complex<float> complexf;
-
-
-DifferentialModulator::DifferentialModulator(size_t carriers) :
+DifferentialModulator::DifferentialModulator(size_t carriers, bool fixedPoint) :
ModMux(),
- d_carriers(carriers)
+ m_carriers(carriers),
+ m_fixedPoint(fixedPoint)
{
PDEBUG("DifferentialModulator::DifferentialModulator(%zu)\n", carriers);
@@ -42,10 +39,42 @@ DifferentialModulator::DifferentialModulator(size_t carriers) :
DifferentialModulator::~DifferentialModulator()
{
PDEBUG("DifferentialModulator::~DifferentialModulator()\n");
-
}
+template<typename T>
+void do_process(size_t carriers, const std::vector<Buffer*>& dataIn, Buffer* dataOut)
+{
+ size_t phaseSize = dataIn[0]->getLength() / sizeof(T);
+ size_t dataSize = dataIn[1]->getLength() / sizeof(T);
+ dataOut->setLength((phaseSize + dataSize) * sizeof(T));
+
+ const T* phase = reinterpret_cast<const T*>(dataIn[0]->getData());
+ const T* in = reinterpret_cast<const T*>(dataIn[1]->getData());
+ T* out = reinterpret_cast<T*>(dataOut->getData());
+
+ if (phaseSize != carriers) {
+ throw std::runtime_error(
+ "DifferentialModulator::process input phase size not valid!");
+ }
+ if (dataSize % carriers != 0) {
+ throw std::runtime_error(
+ "DifferentialModulator::process input data size not valid!");
+ }
+
+ memcpy(dataOut->getData(), phase, phaseSize * sizeof(T));
+ for (size_t i = 0; i < dataSize; i += carriers) {
+ for (size_t j = 0; j < carriers; j += 4) {
+ out[carriers + j] = out[j] * in[j];
+ out[carriers + j + 1] = out[j + 1] * in[j + 1];
+ out[carriers + j + 2] = out[j + 2] * in[j + 2];
+ out[carriers + j + 3] = out[j + 3] * in[j + 3];
+ }
+ in += carriers;
+ out += carriers;
+ }
+}
+
// dataIn[0] -> phase reference
// dataIn[1] -> data symbols
int DifferentialModulator::process(std::vector<Buffer*> dataIn, Buffer* dataOut)
@@ -67,33 +96,11 @@ int DifferentialModulator::process(std::vector<Buffer*> dataIn, Buffer* dataOut)
"DifferentialModulator::process nb of input streams not 2!");
}
- size_t phaseSize = dataIn[0]->getLength() / sizeof(complexf);
- size_t dataSize = dataIn[1]->getLength() / sizeof(complexf);
- dataOut->setLength((phaseSize + dataSize) * sizeof(complexf));
-
- const complexf* phase = reinterpret_cast<const complexf*>(dataIn[0]->getData());
- const complexf* in = reinterpret_cast<const complexf*>(dataIn[1]->getData());
- complexf* out = reinterpret_cast<complexf*>(dataOut->getData());
-
- if (phaseSize != d_carriers) {
- throw std::runtime_error(
- "DifferentialModulator::process input phase size not valid!");
- }
- if (dataSize % d_carriers != 0) {
- throw std::runtime_error(
- "DifferentialModulator::process input data size not valid!");
+ if (m_fixedPoint) {
+ do_process<complexfix>(m_carriers, dataIn, dataOut);
}
-
- memcpy(dataOut->getData(), phase, phaseSize * sizeof(complexf));
- for (size_t i = 0; i < dataSize; i += d_carriers) {
- for (size_t j = 0; j < d_carriers; j += 4) {
- out[d_carriers + j] = out[j] * in[j];
- out[d_carriers + j + 1] = out[j + 1] * in[j + 1];
- out[d_carriers + j + 2] = out[j + 2] * in[j + 2];
- out[d_carriers + j + 3] = out[j + 3] * in[j + 3];
- }
- in += d_carriers;
- out += d_carriers;
+ else {
+ do_process<complexf>(m_carriers, dataIn, dataOut);
}
return dataOut->getLength();
diff --git a/src/DifferentialModulator.h b/src/DifferentialModulator.h
index b26ea8b..9cc5081 100644
--- a/src/DifferentialModulator.h
+++ b/src/DifferentialModulator.h
@@ -35,7 +35,7 @@
class DifferentialModulator : public ModMux
{
public:
- DifferentialModulator(size_t carriers);
+ DifferentialModulator(size_t carriers, bool fixedPoint);
virtual ~DifferentialModulator();
DifferentialModulator(const DifferentialModulator&);
DifferentialModulator& operator=(const DifferentialModulator&);
@@ -45,6 +45,7 @@ public:
const char* name() { return "DifferentialModulator"; }
protected:
- size_t d_carriers;
+ size_t m_carriers;
+ size_t m_fixedPoint;
};
diff --git a/src/FIRFilter.h b/src/FIRFilter.h
index a4effa1..2d8fba9 100644
--- a/src/FIRFilter.h
+++ b/src/FIRFilter.h
@@ -33,21 +33,14 @@
#include "RemoteControl.h"
#include "ModPlugin.h"
-#include "PcDebug.h"
#include <sys/types.h>
-#include <complex>
-#include <thread>
#include <vector>
-#include <time.h>
#include <cstdio>
#include <string>
-#include <memory>
#define FIRFILTER_PIPELINE_DELAY 1
-typedef std::complex<float> complexf;
-
class FIRFilter : public PipelinedModCodec, public RemoteControllable
{
public:
diff --git a/src/Flowgraph.cpp b/src/Flowgraph.cpp
index 3d4cdcc..339e326 100644
--- a/src/Flowgraph.cpp
+++ b/src/Flowgraph.cpp
@@ -27,12 +27,10 @@
#include "Flowgraph.h"
#include "PcDebug.h"
#include "Log.h"
-#include <string>
#include <memory>
#include <algorithm>
#include <sstream>
#include <sys/types.h>
-#include <stdexcept>
#include <assert.h>
#include <sys/time.h>
@@ -254,15 +252,15 @@ Flowgraph::~Flowgraph()
char node_time_sz[1024] = {};
for (const auto &node : nodes) {
- snprintf(node_time_sz, 1023, " %30s: %10lu us (%2.2f %%)\n",
+ snprintf(node_time_sz, 1023, " %30s: %10lld us (%2.2f %%)\n",
node->plugin()->name(),
- node->processTime(),
+ (long long)node->processTime(),
node->processTime() * 100.0 / myProcessTime);
ss << node_time_sz;
}
- snprintf(node_time_sz, 1023, " %30s: %10lu us (100.00 %%)\n", "total",
- myProcessTime);
+ snprintf(node_time_sz, 1023, " %30s: %10lld us (100.00 %%)\n", "total",
+ (long long)myProcessTime);
ss << node_time_sz;
etiLog.level(debug) << ss.str();
diff --git a/src/FormatConverter.cpp b/src/FormatConverter.cpp
index e8e76ed..517f26e 100644
--- a/src/FormatConverter.cpp
+++ b/src/FormatConverter.cpp
@@ -28,17 +28,37 @@
#include "FormatConverter.h"
#include "PcDebug.h"
+#include "Log.h"
-#include <sys/types.h>
-#include <string.h>
#include <stdexcept>
+#include <cstring>
#include <assert.h>
+#include <sys/types.h>
+#if defined(__ARM_NEON)
+#include <arm_neon.h>
+#endif
-FormatConverter::FormatConverter(const std::string& format) :
+FormatConverter::FormatConverter(bool input_is_complexfix_wide, const std::string& format_out) :
ModCodec(),
- m_format(format)
+ m_input_complexfix_wide(input_is_complexfix_wide),
+ m_format_out(format_out)
{ }
+FormatConverter::~FormatConverter()
+{
+ if (
+#if defined(__ARM_NEON)
+ not m_input_complexfix_wide
+#else
+ true
+#endif
+ ) {
+ etiLog.level(debug) << "FormatConverter: " <<
+ m_num_clipped_samples.load() << " clipped";
+ }
+}
+
+
/* Expect the input samples to be in the correct range for the required format */
int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut)
{
@@ -47,71 +67,113 @@ int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut)
size_t num_clipped_samples = 0;
- size_t sizeIn = dataIn->getLength() / sizeof(float);
- float* in = reinterpret_cast<float*>(dataIn->getData());
+ if (m_input_complexfix_wide) {
+ size_t sizeIn = dataIn->getLength() / sizeof(int32_t);
+ if (m_format_out == "s16") {
+ dataOut->setLength(sizeIn * sizeof(int16_t));
+ const int32_t *in = reinterpret_cast<int32_t*>(dataIn->getData());
+ int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData());
- if (m_format == "s16") {
- dataOut->setLength(sizeIn * sizeof(int16_t));
- int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData());
+ constexpr int shift = 6;
- for (size_t i = 0; i < sizeIn; i++) {
- if (in[i] < INT16_MIN) {
- out[i] = INT16_MIN;
- num_clipped_samples++;
+#if defined(__ARM_NEON)
+ if (sizeIn % 4 != 0) {
+ throw std::logic_error("Unexpected length not multiple of 4");
}
- else if (in[i] > INT16_MAX) {
- out[i] = INT16_MAX;
- num_clipped_samples++;
+
+ for (size_t i = 0; i < sizeIn; i += 4) {
+ int32x4_t input_vec = vld1q_s32(&in[i]);
+ // Apply shift right, saturate on conversion to int16_t
+ int16x4_t output_vec = vqshrn_n_s32(input_vec, shift);
+ vst1_s16(&out[i], output_vec);
}
- else {
- out[i] = in[i];
+#else
+ for (size_t i = 0; i < sizeIn; i++) {
+ const int32_t val = in[i] >> shift;
+ if (val < INT16_MIN) {
+ out[i] = INT16_MIN;
+ num_clipped_samples++;
+ }
+ else if (val > INT16_MAX) {
+ out[i] = INT16_MAX;
+ num_clipped_samples++;
+ }
+ else {
+ out[i] = val;
+ }
}
+#endif
}
- }
- else if (m_format == "u8") {
- dataOut->setLength(sizeIn * sizeof(int8_t));
- uint8_t* out = reinterpret_cast<uint8_t*>(dataOut->getData());
-
- for (size_t i = 0; i < sizeIn; i++) {
- const auto samp = in[i] + 128.0f;
- if (samp < 0) {
- out[i] = 0;
- num_clipped_samples++;
- }
- else if (samp > UINT8_MAX) {
- out[i] = UINT8_MAX;
- num_clipped_samples++;
- }
- else {
- out[i] = samp;
- }
-
+ else {
+ throw std::runtime_error("FormatConverter: Invalid fix format " + m_format_out);
}
}
- else if (m_format == "s8") {
- dataOut->setLength(sizeIn * sizeof(int8_t));
- int8_t* out = reinterpret_cast<int8_t*>(dataOut->getData());
-
- for (size_t i = 0; i < sizeIn; i++) {
- if (in[i] < INT8_MIN) {
- out[i] = INT8_MIN;
- num_clipped_samples++;
+ else {
+ size_t sizeIn = dataIn->getLength() / sizeof(float);
+ const float* in = reinterpret_cast<float*>(dataIn->getData());
+
+ if (m_format_out == "s16") {
+ dataOut->setLength(sizeIn * sizeof(int16_t));
+ int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData());
+
+ for (size_t i = 0; i < sizeIn; i++) {
+ if (in[i] < INT16_MIN) {
+ out[i] = INT16_MIN;
+ num_clipped_samples++;
+ }
+ else if (in[i] > INT16_MAX) {
+ out[i] = INT16_MAX;
+ num_clipped_samples++;
+ }
+ else {
+ out[i] = in[i];
+ }
}
- else if (in[i] > INT8_MAX) {
- out[i] = INT8_MAX;
- num_clipped_samples++;
+ }
+ else if (m_format_out == "u8") {
+ dataOut->setLength(sizeIn * sizeof(int8_t));
+ uint8_t* out = reinterpret_cast<uint8_t*>(dataOut->getData());
+
+ for (size_t i = 0; i < sizeIn; i++) {
+ const auto samp = in[i] + 128.0f;
+ if (samp < 0) {
+ out[i] = 0;
+ num_clipped_samples++;
+ }
+ else if (samp > UINT8_MAX) {
+ out[i] = UINT8_MAX;
+ num_clipped_samples++;
+ }
+ else {
+ out[i] = samp;
+ }
+
}
- else {
- out[i] = in[i];
+ }
+ else if (m_format_out == "s8") {
+ dataOut->setLength(sizeIn * sizeof(int8_t));
+ int8_t* out = reinterpret_cast<int8_t*>(dataOut->getData());
+
+ for (size_t i = 0; i < sizeIn; i++) {
+ if (in[i] < INT8_MIN) {
+ out[i] = INT8_MIN;
+ num_clipped_samples++;
+ }
+ else if (in[i] > INT8_MAX) {
+ out[i] = INT8_MAX;
+ num_clipped_samples++;
+ }
+ else {
+ out[i] = in[i];
+ }
}
}
- }
- else {
- throw std::runtime_error("FormatConverter: Invalid format " + m_format);
+ else {
+ throw std::runtime_error("FormatConverter: Invalid format " + m_format_out);
+ }
}
m_num_clipped_samples.store(num_clipped_samples);
-
return dataOut->getLength();
}
diff --git a/src/FormatConverter.h b/src/FormatConverter.h
index 05511c0..1ed2283 100644
--- a/src/FormatConverter.h
+++ b/src/FormatConverter.h
@@ -33,18 +33,19 @@
#endif
#include "ModPlugin.h"
-#include <complex>
#include <atomic>
#include <string>
-#include <cstdint>
class FormatConverter : public ModCodec
{
public:
static size_t get_format_size(const std::string& format);
- // Allowed formats: s8, u8 and s16
- FormatConverter(const std::string& format);
+ // floating-point input allows output formats: s8, u8 and s16
+ // complexfix_wide input allows output formats: s16
+ // complexfix input is already in s16, and needs no converter
+ FormatConverter(bool input_is_complexfix_wide, const std::string& format_out);
+ virtual ~FormatConverter();
int process(Buffer* const dataIn, Buffer* dataOut);
const char* name();
@@ -52,7 +53,8 @@ class FormatConverter : public ModCodec
size_t get_num_clipped_samples() const;
private:
- std::string m_format;
+ bool m_input_complexfix_wide;
+ std::string m_format_out;
std::atomic<size_t> m_num_clipped_samples = 0;
};
diff --git a/src/FrameMultiplexer.cpp b/src/FrameMultiplexer.cpp
index e893120..ebd8b76 100644
--- a/src/FrameMultiplexer.cpp
+++ b/src/FrameMultiplexer.cpp
@@ -25,17 +25,11 @@
*/
#include "FrameMultiplexer.h"
-#include "PcDebug.h"
-#include <stdio.h>
#include <string>
-#include <stdexcept>
-#include <complex>
-#include <memory>
-#include <assert.h>
-#include <string.h>
-
-typedef std::complex<float> complexf;
+#include <cstdio>
+#include <cassert>
+#include <cstring>
FrameMultiplexer::FrameMultiplexer(
const EtiSource& etiSource) :
diff --git a/src/FrequencyInterleaver.cpp b/src/FrequencyInterleaver.cpp
index e76d525..6f36dcb 100644
--- a/src/FrequencyInterleaver.cpp
+++ b/src/FrequencyInterleaver.cpp
@@ -22,17 +22,15 @@
#include "FrequencyInterleaver.h"
#include "PcDebug.h"
-#include <stdio.h>
#include <stdexcept>
#include <string>
-#include <stdlib.h>
-#include <complex>
+#include <cstdio>
+#include <cstdlib>
-typedef std::complex<float> complexf;
-
-FrequencyInterleaver::FrequencyInterleaver(size_t mode) :
- ModCodec()
+FrequencyInterleaver::FrequencyInterleaver(size_t mode, bool fixedPoint) :
+ ModCodec(),
+ m_fixedPoint(fixedPoint)
{
PDEBUG("FrequencyInterleaver::FrequencyInterleaver(%zu) @ %p\n",
mode, this);
@@ -42,54 +40,53 @@ FrequencyInterleaver::FrequencyInterleaver(size_t mode) :
size_t beta;
switch (mode) {
case 1:
- d_carriers = 1536;
+ m_carriers = 1536;
num = 2048;
beta = 511;
break;
case 2:
- d_carriers = 384;
+ m_carriers = 384;
num = 512;
beta = 127;
break;
case 3:
- d_carriers = 192;
+ m_carriers = 192;
num = 256;
beta = 63;
break;
case 0:
case 4:
- d_carriers = 768;
+ m_carriers = 768;
num = 1024;
beta = 255;
break;
default:
- PDEBUG("Carriers: %zu\n", (d_carriers >> 1) << 1);
- throw std::runtime_error("FrequencyInterleaver::FrequencyInterleaver "
- "nb of carriers invalid!");
- break;
+ PDEBUG("Carriers: %zu\n", (m_carriers >> 1) << 1);
+ throw std::runtime_error("FrequencyInterleaver: invalid dab mode");
}
- const int ret = posix_memalign((void**)(&d_indexes), 16, d_carriers * sizeof(size_t));
+ const int ret = posix_memalign((void**)(&m_indices), 16, m_carriers * sizeof(size_t));
if (ret != 0) {
throw std::runtime_error("memory allocation failed: " + std::to_string(ret));
}
- size_t* index = d_indexes;
+ size_t *index = m_indices;
size_t perm = 0;
PDEBUG("i: %4u, R: %4u\n", 0, 0);
for (size_t j = 1; j < num; ++j) {
perm = (alpha * perm + beta) & (num - 1);
- if (perm >= ((num - d_carriers) / 2)
- && perm <= (num - (num - d_carriers) / 2)
+ if (perm >= ((num - m_carriers) / 2)
+ && perm <= (num - (num - m_carriers) / 2)
&& perm != (num / 2)) {
PDEBUG("i: %4zu, R: %4zu, d: %4zu, n: %4zu, k: %5zi, index: %zu\n",
- j, perm, perm, index - d_indexes, perm - num / 2,
+ j, perm, perm, index - m_indices, perm - num / 2,
perm > num / 2
? perm - (1 + (num / 2))
- : perm + (d_carriers - (num / 2)));
+ : perm + (m_carriers - (num / 2)));
*(index++) = perm > num / 2 ?
- perm - (1 + (num / 2)) : perm + (d_carriers - (num / 2));
- } else {
+ perm - (1 + (num / 2)) : perm + (m_carriers - (num / 2));
+ }
+ else {
PDEBUG("i: %4zu, R: %4zu\n", j, perm);
}
}
@@ -100,9 +97,33 @@ FrequencyInterleaver::~FrequencyInterleaver()
{
PDEBUG("FrequencyInterleaver::~FrequencyInterleaver() @ %p\n", this);
- free(d_indexes);
+ free(m_indices);
}
+template<typename T>
+void do_process(Buffer* const dataIn, Buffer* dataOut,
+ size_t carriers, const size_t * const indices)
+{
+ const T* in = reinterpret_cast<const T*>(dataIn->getData());
+ T* out = reinterpret_cast<T*>(dataOut->getData());
+ size_t sizeIn = dataIn->getLength() / sizeof(T);
+
+ if (sizeIn % carriers != 0) {
+ throw std::runtime_error(
+ "FrequencyInterleaver::process input size not valid!");
+ }
+
+ for (size_t i = 0; i < sizeIn;) {
+// memset(out, 0, m_carriers * sizeof(T));
+ for (size_t j = 0; j < carriers; i += 4, j += 4) {
+ out[indices[j]] = in[i];
+ out[indices[j + 1]] = in[i + 1];
+ out[indices[j + 2]] = in[i + 2];
+ out[indices[j + 3]] = in[i + 3];
+ }
+ out += carriers;
+ }
+}
int FrequencyInterleaver::process(Buffer* const dataIn, Buffer* dataOut)
{
@@ -112,24 +133,11 @@ int FrequencyInterleaver::process(Buffer* const dataIn, Buffer* dataOut)
dataOut->setLength(dataIn->getLength());
- const complexf* in = reinterpret_cast<const complexf*>(dataIn->getData());
- complexf* out = reinterpret_cast<complexf*>(dataOut->getData());
- size_t sizeIn = dataIn->getLength() / sizeof(complexf);
-
- if (sizeIn % d_carriers != 0) {
- throw std::runtime_error(
- "FrequencyInterleaver::process input size not valid!");
+ if (m_fixedPoint) {
+ do_process<complexfix>(dataIn, dataOut, m_carriers, m_indices);
}
-
- for (size_t i = 0; i < sizeIn;) {
-// memset(out, 0, d_carriers * sizeof(complexf));
- for (size_t j = 0; j < d_carriers; i += 4, j += 4) {
- out[d_indexes[j]] = in[i];
- out[d_indexes[j + 1]] = in[i + 1];
- out[d_indexes[j + 2]] = in[i + 2];
- out[d_indexes[j + 3]] = in[i + 3];
- }
- out += d_carriers;
+ else {
+ do_process<complexf>(dataIn, dataOut, m_carriers, m_indices);
}
return 1;
diff --git a/src/FrequencyInterleaver.h b/src/FrequencyInterleaver.h
index 43ca21a..b31b968 100644
--- a/src/FrequencyInterleaver.h
+++ b/src/FrequencyInterleaver.h
@@ -25,16 +25,14 @@
# include <config.h>
#endif
-
#include "ModPlugin.h"
#include <sys/types.h>
-
class FrequencyInterleaver : public ModCodec
{
public:
- FrequencyInterleaver(size_t mode);
+ FrequencyInterleaver(size_t mode, bool fixedPoint);
virtual ~FrequencyInterleaver();
FrequencyInterleaver(const FrequencyInterleaver&) = delete;
FrequencyInterleaver& operator=(const FrequencyInterleaver&) = delete;
@@ -43,7 +41,8 @@ public:
const char* name() override { return "FrequencyInterleaver"; }
protected:
- size_t d_carriers;
- size_t* d_indexes;
+ bool m_fixedPoint;
+ size_t m_carriers;
+ size_t *m_indices;
};
diff --git a/src/GainControl.h b/src/GainControl.h
index 04f6b58..d40a7d7 100644
--- a/src/GainControl.h
+++ b/src/GainControl.h
@@ -35,7 +35,6 @@
#include "RemoteControl.h"
#include <sys/types.h>
-#include <complex>
#include <string>
#include <mutex>
@@ -43,9 +42,6 @@
# include <xmmintrin.h>
#endif
-
-typedef std::complex<float> complexf;
-
enum class GainMode { GAIN_FIX = 0, GAIN_MAX = 1, GAIN_VAR = 2 };
class GainControl : public PipelinedModCodec, public RemoteControllable
diff --git a/src/GuardIntervalInserter.cpp b/src/GuardIntervalInserter.cpp
index 3c2db14..26d4fd1 100644
--- a/src/GuardIntervalInserter.cpp
+++ b/src/GuardIntervalInserter.cpp
@@ -29,39 +29,47 @@
#include <cstring>
#include <cassert>
#include <stdexcept>
-#include <complex>
#include <mutex>
-typedef std::complex<float> complexf;
+GuardIntervalInserter::Params::Params(
+ size_t nbSymbols,
+ size_t spacing,
+ size_t nullSize,
+ size_t symSize,
+ size_t& windowOverlap) :
+ nbSymbols(nbSymbols),
+ spacing(spacing),
+ nullSize(nullSize),
+ symSize(symSize),
+ windowOverlap(windowOverlap) {}
GuardIntervalInserter::GuardIntervalInserter(
size_t nbSymbols,
size_t spacing,
size_t nullSize,
size_t symSize,
- size_t& windowOverlap) :
+ size_t& windowOverlap,
+ FFTEngine fftEngine) :
ModCodec(),
RemoteControllable("guardinterval"),
- d_nbSymbols(nbSymbols),
- d_spacing(spacing),
- d_nullSize(nullSize),
- d_symSize(symSize),
- d_windowOverlap(windowOverlap)
+ m_fftEngine(fftEngine),
+ m_params(nbSymbols, spacing, nullSize, symSize, windowOverlap)
{
- if (d_nullSize == 0) {
+ if (nullSize == 0) {
throw std::logic_error("NULL symbol must be present");
}
+
RC_ADD_PARAMETER(windowlen, "Window length for OFDM windowng [0 to disable]");
/* We use a raised-cosine window for the OFDM windowing.
- * Each symbol is extended on both sides by d_windowOverlap samples.
+ * Each symbol is extended on both sides by windowOverlap samples.
*
*
* Sym n |####################|
* Sym n+1 |####################|
*
- * We now extend the symbols by d_windowOverlap (one dash)
+ * We now extend the symbols by windowOverlap (one dash)
*
* Sym n extended -|####################|-
* Sym n+1 extended -|####################|-
@@ -75,7 +83,7 @@ GuardIntervalInserter::GuardIntervalInserter(
* / \
* ... ________________/ \__ ...
*
- * The window length is 2*d_windowOverlap.
+ * The window length is 2*windowOverlap.
*/
update_window(windowOverlap);
@@ -87,44 +95,43 @@ GuardIntervalInserter::GuardIntervalInserter(
void GuardIntervalInserter::update_window(size_t new_window_overlap)
{
- std::lock_guard<std::mutex> lock(d_windowMutex);
+ std::lock_guard<std::mutex> lock(m_params.windowMutex);
- d_windowOverlap = new_window_overlap;
+ m_params.windowOverlap = new_window_overlap;
- // d_window only contains the rising window edge.
- d_window.resize(2*d_windowOverlap);
- for (size_t i = 0; i < 2*d_windowOverlap; i++) {
- d_window[i] = (float)(0.5 * (1.0 - cos(M_PI * i / (2*d_windowOverlap - 1))));
+ // m_params.window only contains the rising window edge.
+ m_params.window.resize(2*m_params.windowOverlap);
+ for (size_t i = 0; i < 2*m_params.windowOverlap; i++) {
+ m_params.window[i] = (float)(0.5 * (1.0 - cos(M_PI * i / (2*m_params.windowOverlap - 1))));
}
}
-int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut)
+template<typename T>
+int do_process(const GuardIntervalInserter::Params& p, Buffer* const dataIn, Buffer* dataOut)
{
- PDEBUG("GuardIntervalInserter::process(dataIn: %p, dataOut: %p)\n",
+ PDEBUG("GuardIntervalInserter do_process(dataIn: %p, dataOut: %p)\n",
dataIn, dataOut);
- std::lock_guard<std::mutex> lock(d_windowMutex);
-
- // Every symbol overlaps over a length of d_windowOverlap with
+ // Every symbol overlaps over a length of windowOverlap with
// the previous symbol, and with the next symbol. First symbol
// receives no prefix window, because we don't remember the
// last symbol from the previous TF (yet). Last symbol also
// receives no suffix window, for the same reason.
// Overall output buffer length must stay independent of the windowing.
- dataOut->setLength((d_nullSize + (d_nbSymbols * d_symSize)) * sizeof(complexf));
+ dataOut->setLength((p.nullSize + (p.nbSymbols * p.symSize)) * sizeof(T));
- const complexf* in = reinterpret_cast<const complexf*>(dataIn->getData());
- complexf* out = reinterpret_cast<complexf*>(dataOut->getData());
- size_t sizeIn = dataIn->getLength() / sizeof(complexf);
+ const T* in = reinterpret_cast<const T*>(dataIn->getData());
+ T* out = reinterpret_cast<T*>(dataOut->getData());
+ size_t sizeIn = dataIn->getLength() / sizeof(T);
- const size_t num_symbols = d_nbSymbols + 1;
- if (sizeIn != num_symbols * d_spacing)
+ const size_t num_symbols = p.nbSymbols + 1;
+ if (sizeIn != num_symbols * p.spacing)
{
- PDEBUG("Nb symbols: %zu\n", d_nbSymbols);
- PDEBUG("Spacing: %zu\n", d_spacing);
- PDEBUG("Null size: %zu\n", d_nullSize);
- PDEBUG("Sym size: %zu\n", d_symSize);
- PDEBUG("\n%zu != %zu\n", sizeIn, (d_nbSymbols + 1) * d_spacing);
+ PDEBUG("Nb symbols: %zu\n", p.nbSymbols);
+ PDEBUG("Spacing: %zu\n", p.spacing);
+ PDEBUG("Null size: %zu\n", p.nullSize);
+ PDEBUG("Sym size: %zu\n", p.symSize);
+ PDEBUG("\n%zu != %zu\n", sizeIn, (p.nbSymbols + 1) * p.spacing);
throw std::runtime_error(
"GuardIntervalInserter::process input size not valid!");
}
@@ -132,139 +139,162 @@ int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut)
// TODO remember the end of the last TF so that we can do some
// windowing too.
- if (d_windowOverlap) {
- {
- // Handle Null symbol separately because it is longer
- const size_t prefixlength = d_nullSize - d_spacing;
-
- // end = spacing
- memcpy(out, &in[d_spacing - prefixlength],
- prefixlength * sizeof(complexf));
-
- memcpy(&out[prefixlength], in, (d_spacing - d_windowOverlap) * sizeof(complexf));
+ std::lock_guard<std::mutex> lock(p.windowMutex);
+ if (p.windowOverlap) {
+ if constexpr (std::is_same_v<complexf, T>) {
+ {
+ // Handle Null symbol separately because it is longer
+ const size_t prefixlength = p.nullSize - p.spacing;
+
+ // end = spacing
+ memcpy(out, &in[p.spacing - prefixlength],
+ prefixlength * sizeof(T));
+
+ memcpy(&out[prefixlength], in, (p.spacing - p.windowOverlap) * sizeof(T));
+
+ // The remaining part of the symbol must have half of the window applied,
+ // sloping down from 1 to 0.5
+ for (size_t i = 0; i < p.windowOverlap; i++) {
+ const size_t out_ix = prefixlength + p.spacing - p.windowOverlap + i;
+ const size_t in_ix = p.spacing - p.windowOverlap + i;
+ out[out_ix] = in[in_ix] * p.window[2*p.windowOverlap - (i+1)];
+ }
- // The remaining part of the symbol must have half of the window applied,
- // sloping down from 1 to 0.5
- for (size_t i = 0; i < d_windowOverlap; i++) {
- const size_t out_ix = prefixlength + d_spacing - d_windowOverlap + i;
- const size_t in_ix = d_spacing - d_windowOverlap + i;
- out[out_ix] = in[in_ix] * d_window[2*d_windowOverlap - (i+1)];
- }
+ // Suffix is taken from the beginning of the symbol, and sees the other
+ // half of the window applied.
+ for (size_t i = 0; i < p.windowOverlap; i++) {
+ const size_t out_ix = prefixlength + p.spacing + i;
+ out[out_ix] = in[i] * p.window[p.windowOverlap - (i+1)];
+ }
- // Suffix is taken from the beginning of the symbol, and sees the other
- // half of the window applied.
- for (size_t i = 0; i < d_windowOverlap; i++) {
- const size_t out_ix = prefixlength + d_spacing + i;
- out[out_ix] = in[i] * d_window[d_windowOverlap - (i+1)];
+ in += p.spacing;
+ out += p.nullSize;
+ // out is now pointing to the proper end of symbol. There are
+ // windowOverlap samples ahead that were already written.
}
- in += d_spacing;
- out += d_nullSize;
- // out is now pointing to the proper end of symbol. There are
- // d_windowOverlap samples ahead that were already written.
- }
-
- // Data symbols
- for (size_t sym_ix = 0; sym_ix < d_nbSymbols; sym_ix++) {
- /* _ix variables are indices into in[], _ox variables are
- * indices for out[] */
- const ssize_t start_rise_ox = -d_windowOverlap;
- const size_t start_rise_ix = 2 * d_spacing - d_symSize - d_windowOverlap;
- /*
- const size_t start_real_symbol_ox = 0;
- const size_t start_real_symbol_ix = 2 * d_spacing - d_symSize;
- */
- const ssize_t end_rise_ox = d_windowOverlap;
- const size_t end_rise_ix = 2 * d_spacing - d_symSize + d_windowOverlap;
- const ssize_t end_cyclic_prefix_ox = d_symSize - d_spacing;
- /* end_cyclic_prefix_ix = end of symbol
- const size_t begin_fall_ox = d_symSize - d_windowOverlap;
- const size_t begin_fall_ix = d_spacing - d_windowOverlap;
- const size_t end_real_symbol_ox = d_symSize;
- end_real_symbol_ix = end of symbol
- const size_t end_fall_ox = d_symSize + d_windowOverlap;
- const size_t end_fall_ix = d_spacing + d_windowOverlap;
- */
-
- ssize_t ox = start_rise_ox;
- size_t ix = start_rise_ix;
-
- for (size_t i = 0; ix < end_rise_ix; i++) {
- out[ox] += in[ix] * d_window.at(i);
- ix++;
- ox++;
- }
- assert(ox == end_rise_ox);
-
- const size_t remaining_prefix_length = end_cyclic_prefix_ox - end_rise_ox;
- memcpy( &out[ox], &in[ix],
- remaining_prefix_length * sizeof(complexf));
- ox += remaining_prefix_length;
- assert(ox == end_cyclic_prefix_ox);
- ix = 0;
-
- const bool last_symbol = (sym_ix + 1 >= d_nbSymbols);
- if (last_symbol) {
- // No windowing at all at end
- memcpy(&out[ox], &in[ix], d_spacing * sizeof(complexf));
- ox += d_spacing;
- }
- else {
- // Copy the middle part of the symbol, d_windowOverlap samples
- // short of the end.
- memcpy( &out[ox],
- &in[ix],
- (d_spacing - d_windowOverlap) * sizeof(complexf));
- ox += d_spacing - d_windowOverlap;
- ix += d_spacing - d_windowOverlap;
- assert(ox == (ssize_t)(d_symSize - d_windowOverlap));
-
- // Apply window from 1 to 0.5 for the end of the symbol
- for (size_t i = 0; ox < (ssize_t)d_symSize; i++) {
- out[ox] = in[ix] * d_window[2*d_windowOverlap - (i+1)];
- ox++;
+ // Data symbols
+ for (size_t sym_ix = 0; sym_ix < p.nbSymbols; sym_ix++) {
+ /* _ix variables are indices into in[], _ox variables are
+ * indices for out[] */
+ const ssize_t start_rise_ox = -p.windowOverlap;
+ const size_t start_rise_ix = 2 * p.spacing - p.symSize - p.windowOverlap;
+ /*
+ const size_t start_real_symbol_ox = 0;
+ const size_t start_real_symbol_ix = 2 * p.spacing - p.symSize;
+ */
+ const ssize_t end_rise_ox = p.windowOverlap;
+ const size_t end_rise_ix = 2 * p.spacing - p.symSize + p.windowOverlap;
+ const ssize_t end_cyclic_prefix_ox = p.symSize - p.spacing;
+ /* end_cyclic_prefix_ix = end of symbol
+ const size_t begin_fall_ox = p.symSize - p.windowOverlap;
+ const size_t begin_fall_ix = p.spacing - p.windowOverlap;
+ const size_t end_real_symbol_ox = p.symSize;
+ end_real_symbol_ix = end of symbol
+ const size_t end_fall_ox = p.symSize + p.windowOverlap;
+ const size_t end_fall_ix = p.spacing + p.windowOverlap;
+ */
+
+ ssize_t ox = start_rise_ox;
+ size_t ix = start_rise_ix;
+
+ for (size_t i = 0; ix < end_rise_ix; i++) {
+ out[ox] += in[ix] * p.window.at(i);
ix++;
+ ox++;
}
- assert(ix == d_spacing);
+ assert(ox == end_rise_ox);
+ const size_t remaining_prefix_length = end_cyclic_prefix_ox - end_rise_ox;
+ memcpy( &out[ox], &in[ix],
+ remaining_prefix_length * sizeof(T));
+ ox += remaining_prefix_length;
+ assert(ox == end_cyclic_prefix_ox);
ix = 0;
- // Cyclic suffix, with window from 0.5 to 0
- for (size_t i = 0; ox < (ssize_t)(d_symSize + d_windowOverlap); i++) {
- out[ox] = in[ix] * d_window[d_windowOverlap - (i+1)];
- ox++;
- ix++;
+
+ const bool last_symbol = (sym_ix + 1 >= p.nbSymbols);
+ if (last_symbol) {
+ // No windowing at all at end
+ memcpy(&out[ox], &in[ix], p.spacing * sizeof(T));
+ ox += p.spacing;
+ }
+ else {
+ // Copy the middle part of the symbol, p.windowOverlap samples
+ // short of the end.
+ memcpy( &out[ox],
+ &in[ix],
+ (p.spacing - p.windowOverlap) * sizeof(T));
+ ox += p.spacing - p.windowOverlap;
+ ix += p.spacing - p.windowOverlap;
+ assert(ox == (ssize_t)(p.symSize - p.windowOverlap));
+
+ // Apply window from 1 to 0.5 for the end of the symbol
+ for (size_t i = 0; ox < (ssize_t)p.symSize; i++) {
+ out[ox] = in[ix] * p.window[2*p.windowOverlap - (i+1)];
+ ox++;
+ ix++;
+ }
+ assert(ix == p.spacing);
+
+ ix = 0;
+ // Cyclic suffix, with window from 0.5 to 0
+ for (size_t i = 0; ox < (ssize_t)(p.symSize + p.windowOverlap); i++) {
+ out[ox] = in[ix] * p.window[p.windowOverlap - (i+1)];
+ ox++;
+ ix++;
+ }
+
+ assert(ix == p.windowOverlap);
}
- assert(ix == d_windowOverlap);
+ out += p.symSize;
+ in += p.spacing;
+ // out is now pointing to the proper end of symbol. There are
+ // windowOverlap samples ahead that were already written.
}
-
- out += d_symSize;
- in += d_spacing;
- // out is now pointing to the proper end of symbol. There are
- // d_windowOverlap samples ahead that were already written.
+ }
+ else {
+ throw std::runtime_error("fixed-point doesn't support window overlap");
}
}
else {
// Handle Null symbol separately because it is longer
// end - (nullSize - spacing) = 2 * spacing - nullSize
- memcpy(out, &in[2 * d_spacing - d_nullSize],
- (d_nullSize - d_spacing) * sizeof(complexf));
- memcpy(&out[d_nullSize - d_spacing], in, d_spacing * sizeof(complexf));
- in += d_spacing;
- out += d_nullSize;
+ memcpy(out, &in[2 * p.spacing - p.nullSize],
+ (p.nullSize - p.spacing) * sizeof(T));
+ memcpy(&out[p.nullSize - p.spacing], in, p.spacing * sizeof(T));
+ in += p.spacing;
+ out += p.nullSize;
// Data symbols
- for (size_t i = 0; i < d_nbSymbols; ++i) {
+ for (size_t i = 0; i < p.nbSymbols; ++i) {
// end - (symSize - spacing) = 2 * spacing - symSize
- memcpy(out, &in[2 * d_spacing - d_symSize],
- (d_symSize - d_spacing) * sizeof(complexf));
- memcpy(&out[d_symSize - d_spacing], in, d_spacing * sizeof(complexf));
- in += d_spacing;
- out += d_symSize;
+ memcpy(out, &in[2 * p.spacing - p.symSize],
+ (p.symSize - p.spacing) * sizeof(T));
+ memcpy(&out[p.symSize - p.spacing], in, p.spacing * sizeof(T));
+ in += p.spacing;
+ out += p.symSize;
}
}
- return sizeIn;
+ const auto sizeOut = dataOut->getLength();
+ return sizeOut;
+}
+
+int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut)
+{
+ switch (m_fftEngine) {
+ case FFTEngine::FFTW:
+ return do_process<complexf>(m_params, dataIn, dataOut);
+ case FFTEngine::KISS:
+ if (m_params.windowOverlap) {
+ throw std::runtime_error("fixed point and ofdm windowing not supported");
+ }
+ return do_process<complexfix>(m_params, dataIn, dataOut);
+ case FFTEngine::DEXTER:
+ return do_process<complexfix_wide>(m_params, dataIn, dataOut);
+ }
+ throw std::logic_error("Unhandled fftEngine variant");
}
void GuardIntervalInserter::set_parameter(
@@ -293,7 +323,7 @@ const std::string GuardIntervalInserter::get_parameter(const std::string& parame
using namespace std;
stringstream ss;
if (parameter == "windowlen") {
- ss << d_windowOverlap;
+ ss << m_params.windowOverlap;
}
else {
ss << "Parameter '" << parameter <<
@@ -306,6 +336,6 @@ const std::string GuardIntervalInserter::get_parameter(const std::string& parame
const json::map_t GuardIntervalInserter::get_all_values() const
{
json::map_t map;
- map["windowlen"].v = d_windowOverlap;
+ map["windowlen"].v = m_params.windowOverlap;
return map;
}
diff --git a/src/GuardIntervalInserter.h b/src/GuardIntervalInserter.h
index f78ac91..8d329ff 100644
--- a/src/GuardIntervalInserter.h
+++ b/src/GuardIntervalInserter.h
@@ -30,6 +30,7 @@
# include <config.h>
#endif
+#include "ConfigParser.h"
#include "ModPlugin.h"
#include "RemoteControl.h"
#include <stdint.h>
@@ -50,7 +51,8 @@ class GuardIntervalInserter : public ModCodec, public RemoteControllable
size_t spacing,
size_t nullSize,
size_t symSize,
- size_t& windowOverlap);
+ size_t& windowOverlap,
+ FFTEngine fftEngine);
virtual ~GuardIntervalInserter() {}
@@ -62,16 +64,30 @@ class GuardIntervalInserter : public ModCodec, public RemoteControllable
virtual const std::string get_parameter(const std::string& parameter) const override;
virtual const json::map_t get_all_values() const override;
+ struct Params {
+ Params(
+ size_t nbSymbols,
+ size_t spacing,
+ size_t nullSize,
+ size_t symSize,
+ size_t& windowOverlap);
+
+ size_t nbSymbols;
+ size_t spacing;
+ size_t nullSize;
+ size_t symSize;
+ size_t& windowOverlap;
+
+ mutable std::mutex windowMutex;
+ std::vector<float> window;
+ };
+
protected:
void update_window(size_t new_window_overlap);
- size_t d_nbSymbols;
- size_t d_spacing;
- size_t d_nullSize;
- size_t d_symSize;
+ FFTEngine m_fftEngine;
+
+ Params m_params;
- mutable std::mutex d_windowMutex;
- size_t& d_windowOverlap;
- std::vector<float> d_window;
};
diff --git a/src/MemlessPoly.h b/src/MemlessPoly.h
index 91e6860..72de62c 100644
--- a/src/MemlessPoly.h
+++ b/src/MemlessPoly.h
@@ -32,13 +32,10 @@
#include "RemoteControl.h"
#include "ModPlugin.h"
-#include "PcDebug.h"
#include "ThreadsafeQueue.h"
#include <sys/types.h>
#include <array>
-#include <complex>
-#include <memory>
#include <string>
#include <thread>
#include <vector>
@@ -47,8 +44,6 @@
#define MEMLESSPOLY_PIPELINE_DELAY 1
-typedef std::complex<float> complexf;
-
enum class dpd_type_t {
odd_only_poly,
lookup_table
diff --git a/src/ModPlugin.h b/src/ModPlugin.h
index 470508f..bb3ee2c 100644
--- a/src/ModPlugin.h
+++ b/src/ModPlugin.h
@@ -33,9 +33,7 @@
#include "Buffer.h"
#include "ThreadsafeQueue.h"
#include "TimestampDecoder.h"
-#include <cstddef>
#include <vector>
-#include <memory>
#include <thread>
#include <atomic>
diff --git a/src/NullSymbol.cpp b/src/NullSymbol.cpp
index 4684dfe..526e662 100644
--- a/src/NullSymbol.cpp
+++ b/src/NullSymbol.cpp
@@ -27,18 +27,16 @@
#include "NullSymbol.h"
#include "PcDebug.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <complex>
-#include <string.h>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
-typedef std::complex<float> complexf;
-
-NullSymbol::NullSymbol(size_t nbCarriers) :
+NullSymbol::NullSymbol(size_t numCarriers, size_t typeSize) :
ModInput(),
- myNbCarriers(nbCarriers)
+ m_numCarriers(numCarriers),
+ m_typeSize(typeSize)
{
- PDEBUG("NullSymbol::NullSymbol(%zu) @ %p\n", nbCarriers, this);
+ PDEBUG("NullSymbol::NullSymbol(%zu) @ %p\n", numCarriers, this);
}
@@ -52,7 +50,7 @@ int NullSymbol::process(Buffer* dataOut)
{
PDEBUG("NullSymbol::process(dataOut: %p)\n", dataOut);
- dataOut->setLength(myNbCarriers * 2 * sizeof(float));
+ dataOut->setLength(m_numCarriers * m_typeSize);
memset(dataOut->getData(), 0, dataOut->getLength());
return dataOut->getLength();
diff --git a/src/NullSymbol.h b/src/NullSymbol.h
index 814e434..6ba9e63 100644
--- a/src/NullSymbol.h
+++ b/src/NullSymbol.h
@@ -39,14 +39,14 @@
class NullSymbol : public ModInput
{
public:
- NullSymbol(size_t nbCarriers);
+ NullSymbol(size_t nunCarriers, size_t typeSize);
virtual ~NullSymbol();
int process(Buffer* dataOut);
const char* name() { return "NullSymbol"; }
private:
- size_t myNbCarriers;
-
+ size_t m_numCarriers;
+ size_t m_typeSize;
};
diff --git a/src/OfdmGenerator.cpp b/src/OfdmGenerator.cpp
index cb799d3..38648c9 100644
--- a/src/OfdmGenerator.cpp
+++ b/src/OfdmGenerator.cpp
@@ -2,7 +2,7 @@
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty
the Queen in Right of Canada (Communications Research Center Canada)
- Copyright (C) 2023
+ Copyright (C) 2024
Matthias P. Braendli, matthias.braendli@mpb.li
http://opendigitalradio.org
@@ -27,17 +27,19 @@
#include "OfdmGenerator.h"
#include "PcDebug.h"
-#define FFT_TYPE fftwf_complex
-
-#include <string.h>
#include <stdexcept>
#include <assert.h>
#include <string>
#include <numeric>
+#include <vector>
+#include <cstring>
+#include <complex>
static const size_t MAX_CLIP_STATS = 10;
-OfdmGenerator::OfdmGenerator(size_t nbSymbols,
+using FFTW_TYPE = fftwf_complex;
+
+OfdmGeneratorCF32::OfdmGeneratorCF32(size_t nbSymbols,
size_t nbCarriers,
size_t spacing,
bool& enableCfr,
@@ -62,8 +64,7 @@ OfdmGenerator::OfdmGenerator(size_t nbSymbols,
nbSymbols, nbCarriers, spacing, inverse ? "true" : "false", this);
if (nbCarriers > spacing) {
- throw std::runtime_error(
- "OfdmGenerator::OfdmGenerator nbCarriers > spacing!");
+ throw std::runtime_error("OfdmGenerator nbCarriers > spacing!");
}
/* register the parameters that can be remote controlled */
@@ -102,29 +103,29 @@ OfdmGenerator::OfdmGenerator(size_t nbSymbols,
PDEBUG(" myZeroSize: %u\n", myZeroSize);
const int N = mySpacing; // The size of the FFT
- myFftIn = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N);
- myFftOut = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N);
+ myFftIn = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N);
+ myFftOut = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N);
fftwf_set_timelimit(2);
myFftPlan = fftwf_plan_dft_1d(N,
myFftIn, myFftOut,
FFTW_BACKWARD, FFTW_MEASURE);
- myCfrPostClip = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N);
- myCfrPostFft = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N);
+ myCfrPostClip = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N);
+ myCfrPostFft = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N);
myCfrFft = fftwf_plan_dft_1d(N,
myCfrPostClip, myCfrPostFft,
FFTW_FORWARD, FFTW_MEASURE);
- if (sizeof(complexf) != sizeof(FFT_TYPE)) {
+ if (sizeof(complexf) != sizeof(FFTW_TYPE)) {
printf("sizeof(complexf) %zu\n", sizeof(complexf));
- printf("sizeof(FFT_TYPE) %zu\n", sizeof(FFT_TYPE));
+ printf("sizeof(FFT_TYPE) %zu\n", sizeof(FFTW_TYPE));
throw std::runtime_error(
"OfdmGenerator::process complexf size is not FFT_TYPE size!");
}
}
-OfdmGenerator::~OfdmGenerator()
+OfdmGeneratorCF32::~OfdmGeneratorCF32()
{
PDEBUG("OfdmGenerator::~OfdmGenerator() @ %p\n", this);
@@ -153,15 +154,15 @@ OfdmGenerator::~OfdmGenerator()
}
}
-int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
+int OfdmGeneratorCF32::process(Buffer* const dataIn, Buffer* dataOut)
{
PDEBUG("OfdmGenerator::process(dataIn: %p, dataOut: %p)\n",
dataIn, dataOut);
dataOut->setLength(myNbSymbols * mySpacing * sizeof(complexf));
- FFT_TYPE* in = reinterpret_cast<FFT_TYPE*>(dataIn->getData());
- FFT_TYPE* out = reinterpret_cast<FFT_TYPE*>(dataOut->getData());
+ FFTW_TYPE *in = reinterpret_cast<FFTW_TYPE*>(dataIn->getData());
+ FFTW_TYPE *out = reinterpret_cast<FFTW_TYPE*>(dataOut->getData());
size_t sizeIn = dataIn->getLength() / sizeof(complexf);
size_t sizeOut = dataOut->getLength() / sizeof(complexf);
@@ -203,7 +204,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
myPaprAfterCFR.clear();
}
- for (size_t i = 0; i < myNbSymbols; ++i) {
+ for (size_t i = 0; i < myNbSymbols; i++) {
myFftIn[0][0] = 0;
myFftIn[0][1] = 0;
@@ -212,22 +213,20 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
* PosSrc=0 PosDst=1 PosSize=768
* NegSrc=768 NegDst=1280 NegSize=768
*/
- memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(FFT_TYPE));
+ memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(FFTW_TYPE));
memcpy(&myFftIn[myPosDst], &in[myPosSrc],
- myPosSize * sizeof(FFT_TYPE));
+ myPosSize * sizeof(FFTW_TYPE));
memcpy(&myFftIn[myNegDst], &in[myNegSrc],
- myNegSize * sizeof(FFT_TYPE));
-
+ myNegSize * sizeof(FFTW_TYPE));
if (myCfr) {
reference.resize(mySpacing);
memcpy(reinterpret_cast<fftwf_complex*>(reference.data()),
- myFftIn, mySpacing * sizeof(FFT_TYPE));
+ myFftIn, mySpacing * sizeof(FFTW_TYPE));
}
fftwf_execute(myFftPlan); // IFFT from myFftIn to myFftOut
-
if (myCfr) {
complexf *symbol = reinterpret_cast<complexf*>(myFftOut);
myPaprBeforeCFR.process_block(symbol, mySpacing);
@@ -235,7 +234,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
if (myMERCalcIndex == i) {
before_cfr.resize(mySpacing);
memcpy(reinterpret_cast<fftwf_complex*>(before_cfr.data()),
- myFftOut, mySpacing * sizeof(FFT_TYPE));
+ myFftOut, mySpacing * sizeof(FFTW_TYPE));
}
/* cfr_one_iteration runs the myFftPlan again at the end, and
@@ -277,7 +276,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
num_error_clip += stat.errclip_count;
}
- memcpy(out, myFftOut, mySpacing * sizeof(FFT_TYPE));
+ memcpy(out, myFftOut, mySpacing * sizeof(FFTW_TYPE));
in += myNbCarriers;
out += mySpacing;
@@ -308,14 +307,14 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut)
return sizeOut;
}
-OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration(
+OfdmGeneratorCF32::cfr_iter_stat_t OfdmGeneratorCF32::cfr_one_iteration(
complexf *symbol, const complexf *reference)
{
// use std::norm instead of std::abs to avoid calculating the
// square roots
const float clip_squared = myCfrClip * myCfrClip;
- OfdmGenerator::cfr_iter_stat_t ret;
+ OfdmGeneratorCF32::cfr_iter_stat_t ret;
// Clip
for (size_t i = 0; i < mySpacing; i++) {
@@ -331,7 +330,7 @@ OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration(
}
// Take FFT of our clipped signal
- memcpy(myCfrPostClip, symbol, mySpacing * sizeof(FFT_TYPE));
+ memcpy(myCfrPostClip, symbol, mySpacing * sizeof(FFTW_TYPE));
fftwf_execute(myCfrFft); // FFT from myCfrPostClip to myCfrPostFft
// Calculate the error in frequency domain by subtracting our reference
@@ -374,7 +373,7 @@ OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration(
}
-void OfdmGenerator::set_parameter(const std::string& parameter,
+void OfdmGeneratorCF32::set_parameter(const std::string& parameter,
const std::string& value)
{
using namespace std;
@@ -404,7 +403,7 @@ void OfdmGenerator::set_parameter(const std::string& parameter,
}
}
-const std::string OfdmGenerator::get_parameter(const std::string& parameter) const
+const std::string OfdmGeneratorCF32::get_parameter(const std::string& parameter) const
{
using namespace std;
stringstream ss;
@@ -458,9 +457,333 @@ const std::string OfdmGenerator::get_parameter(const std::string& parameter) con
return ss.str();
}
-const json::map_t OfdmGenerator::get_all_values() const
+const json::map_t OfdmGeneratorCF32::get_all_values() const
{
json::map_t map;
// TODO needs rework of the values
return map;
}
+
+OfdmGeneratorFixed::OfdmGeneratorFixed(size_t nbSymbols,
+ size_t nbCarriers,
+ size_t spacing,
+ bool inverse) :
+ ModCodec(),
+ myNbSymbols(nbSymbols),
+ myNbCarriers(nbCarriers),
+ mySpacing(spacing)
+{
+ PDEBUG("OfdmGenerator::OfdmGenerator(%zu, %zu, %zu, %s) @ %p\n",
+ nbSymbols, nbCarriers, spacing, inverse ? "true" : "false", this);
+
+ etiLog.level(info) << "Using KISS FFT by Mark Borgerding for fixed-point transform";
+
+ if (nbCarriers > spacing) {
+ throw std::runtime_error("OfdmGenerator nbCarriers > spacing!");
+ }
+
+ if (inverse) {
+ myPosDst = (nbCarriers & 1 ? 0 : 1);
+ myPosSrc = 0;
+ myPosSize = (nbCarriers + 1) / 2;
+ myNegDst = spacing - (nbCarriers / 2);
+ myNegSrc = (nbCarriers + 1) / 2;
+ myNegSize = nbCarriers / 2;
+ }
+ else {
+ myPosDst = (nbCarriers & 1 ? 0 : 1);
+ myPosSrc = nbCarriers / 2;
+ myPosSize = (nbCarriers + 1) / 2;
+ myNegDst = spacing - (nbCarriers / 2);
+ myNegSrc = 0;
+ myNegSize = nbCarriers / 2;
+ }
+ myZeroDst = myPosDst + myPosSize;
+ myZeroSize = myNegDst - myZeroDst;
+
+ PDEBUG(" myPosDst: %u\n", myPosDst);
+ PDEBUG(" myPosSrc: %u\n", myPosSrc);
+ PDEBUG(" myPosSize: %u\n", myPosSize);
+ PDEBUG(" myNegDst: %u\n", myNegDst);
+ PDEBUG(" myNegSrc: %u\n", myNegSrc);
+ PDEBUG(" myNegSize: %u\n", myNegSize);
+ PDEBUG(" myZeroDst: %u\n", myZeroDst);
+ PDEBUG(" myZeroSize: %u\n", myZeroSize);
+
+ const int N = mySpacing; // The size of the FFT
+
+ const size_t nbytes = N * sizeof(kiss_fft_cpx);
+ myFftIn = (kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes);
+ myFftOut = (kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes);
+ memset(myFftIn, 0, nbytes);
+
+ myKissCfg = kiss_fft_alloc(N, inverse, nullptr, nullptr);
+}
+
+OfdmGeneratorFixed::~OfdmGeneratorFixed()
+{
+ if (myKissCfg) KISS_FFT_FREE(myKissCfg);
+ if (myFftIn) KISS_FFT_FREE(myFftIn);
+ if (myFftOut) KISS_FFT_FREE(myFftOut);
+}
+
+int OfdmGeneratorFixed::process(Buffer* const dataIn, Buffer* dataOut)
+{
+ dataOut->setLength(myNbSymbols * mySpacing * sizeof(kiss_fft_cpx));
+
+ kiss_fft_cpx *in = reinterpret_cast<kiss_fft_cpx*>(dataIn->getData());
+ kiss_fft_cpx *out = reinterpret_cast<kiss_fft_cpx*>(dataOut->getData());
+
+ size_t sizeIn = dataIn->getLength() / sizeof(kiss_fft_cpx);
+ size_t sizeOut = dataOut->getLength() / sizeof(kiss_fft_cpx);
+
+ if (sizeIn != myNbSymbols * myNbCarriers) {
+ PDEBUG("Nb symbols: %zu\n", myNbSymbols);
+ PDEBUG("Nb carriers: %zu\n", myNbCarriers);
+ PDEBUG("Spacing: %zu\n", mySpacing);
+ PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * myNbCarriers);
+ throw std::runtime_error(
+ "OfdmGenerator::process input size not valid!");
+ }
+ if (sizeOut != myNbSymbols * mySpacing) {
+ PDEBUG("Nb symbols: %zu\n", myNbSymbols);
+ PDEBUG("Nb carriers: %zu\n", myNbCarriers);
+ PDEBUG("Spacing: %zu\n", mySpacing);
+ PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * mySpacing);
+ throw std::runtime_error(
+ "OfdmGenerator::process output size not valid!");
+ }
+
+ for (size_t i = 0; i < myNbSymbols; i++) {
+ myFftIn[0].r = 0;
+ myFftIn[0].i = 0;
+
+ /* For TM I this is:
+ * ZeroDst=769 ZeroSize=511
+ * PosSrc=0 PosDst=1 PosSize=768
+ * NegSrc=768 NegDst=1280 NegSize=768
+ */
+ memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(kiss_fft_cpx));
+ memcpy(&myFftIn[myPosDst], &in[myPosSrc], myPosSize * sizeof(kiss_fft_cpx));
+ memcpy(&myFftIn[myNegDst], &in[myNegSrc], myNegSize * sizeof(kiss_fft_cpx));
+
+ kiss_fft(myKissCfg, myFftIn, myFftOut);
+
+ memcpy(out, myFftOut, mySpacing * sizeof(kiss_fft_cpx));
+
+ in += myNbCarriers;
+ out += mySpacing;
+ }
+
+ return sizeOut;
+}
+
+#ifdef HAVE_DEXTER
+OfdmGeneratorDEXTER::OfdmGeneratorDEXTER(size_t nbSymbols,
+ size_t nbCarriers,
+ size_t spacing) :
+ ModCodec(),
+ myNbSymbols(nbSymbols),
+ myNbCarriers(nbCarriers),
+ mySpacing(spacing)
+{
+ PDEBUG("OfdmGeneratorDEXTER::OfdmGeneratorDEXTER(%zu, %zu, %zu) @ %p\n",
+ nbSymbols, nbCarriers, spacing, this);
+
+ etiLog.level(info) << "Using DEXTER FFT Accelerator for fixed-point transform";
+
+ if (nbCarriers > spacing) {
+ throw std::runtime_error("OfdmGenerator nbCarriers > spacing!");
+ }
+
+ myPosDst = (nbCarriers & 1 ? 0 : 1);
+ myPosSrc = 0;
+ myPosSize = (nbCarriers + 1) / 2;
+ myNegDst = spacing - (nbCarriers / 2);
+ myNegSrc = (nbCarriers + 1) / 2;
+ myNegSize = nbCarriers / 2;
+
+ myZeroDst = myPosDst + myPosSize;
+ myZeroSize = myNegDst - myZeroDst;
+
+ PDEBUG(" myPosDst: %u\n", myPosDst);
+ PDEBUG(" myPosSrc: %u\n", myPosSrc);
+ PDEBUG(" myPosSize: %u\n", myPosSize);
+ PDEBUG(" myNegDst: %u\n", myNegDst);
+ PDEBUG(" myNegSrc: %u\n", myNegSrc);
+ PDEBUG(" myNegSize: %u\n", myNegSize);
+ PDEBUG(" myZeroDst: %u\n", myZeroDst);
+ PDEBUG(" myZeroSize: %u\n", myZeroSize);
+
+ const size_t nbytes_in = mySpacing * sizeof(complexfix);
+ const size_t nbytes_out = mySpacing * sizeof(complexfix_wide);
+
+#define IIO_ENSURE(expr, err) { \
+ if (!(expr)) { \
+ etiLog.log(error, "%s (%s:%d)\n", err, __FILE__, __LINE__); \
+ throw std::runtime_error("Failed to set FFT for OfdmGeneratorDEXTER"); \
+ } \
+}
+ IIO_ENSURE((m_ctx = iio_create_default_context()), "No context");
+ IIO_ENSURE(m_dev_in = iio_context_find_device(m_ctx, "fft-accelerator-in"), "no dev");
+ IIO_ENSURE(m_dev_out = iio_context_find_device(m_ctx, "fft-accelerator-out"), "no dev");
+ IIO_ENSURE(m_channel_in = iio_device_find_channel(m_dev_in, "voltage0", true), "no channel");
+ IIO_ENSURE(m_channel_out = iio_device_find_channel(m_dev_out, "voltage0", false), "no channel");
+
+ iio_channel_enable(m_channel_in);
+ iio_channel_enable(m_channel_out);
+
+ m_buf_in = iio_device_create_buffer(m_dev_in, nbytes_in, false);
+ if (!m_buf_in) {
+ throw std::runtime_error("OfdmGeneratorDEXTER could not create in buffer");
+ }
+
+ m_buf_out = iio_device_create_buffer(m_dev_out, nbytes_out, false);
+ if (!m_buf_out) {
+ throw std::runtime_error("OfdmGeneratorDEXTER could not create out buffer");
+ }
+}
+
+OfdmGeneratorDEXTER::~OfdmGeneratorDEXTER()
+{
+ if (m_buf_in) {
+ iio_buffer_destroy(m_buf_in);
+ m_buf_in = nullptr;
+ }
+
+ if (m_buf_out) {
+ iio_buffer_destroy(m_buf_out);
+ m_buf_out = nullptr;
+ }
+
+ if (m_channel_in) {
+ iio_channel_disable(m_channel_in);
+ m_channel_in = nullptr;
+ }
+
+ if (m_channel_out) {
+ iio_channel_disable(m_channel_out);
+ m_channel_out = nullptr;
+ }
+
+ if (m_ctx) {
+ iio_context_destroy(m_ctx);
+ m_ctx = nullptr;
+ }
+}
+
+int OfdmGeneratorDEXTER::process(Buffer* const dataIn, Buffer* dataOut)
+{
+ dataOut->setLength(myNbSymbols * mySpacing * sizeof(complexfix_wide));
+
+ complexfix *in = reinterpret_cast<complexfix*>(dataIn->getData());
+ complexfix_wide *out = reinterpret_cast<complexfix_wide*>(dataOut->getData());
+
+ size_t sizeIn = dataIn->getLength() / sizeof(complexfix);
+ size_t sizeOut = dataOut->getLength() / sizeof(complexfix_wide);
+
+ if (sizeIn != myNbSymbols * myNbCarriers) {
+ PDEBUG("Nb symbols: %zu\n", myNbSymbols);
+ PDEBUG("Nb carriers: %zu\n", myNbCarriers);
+ PDEBUG("Spacing: %zu\n", mySpacing);
+ PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * myNbCarriers);
+ throw std::runtime_error(
+ "OfdmGenerator::process input size not valid!");
+ }
+ if (sizeOut != myNbSymbols * mySpacing) {
+ PDEBUG("Nb symbols: %zu\n", myNbSymbols);
+ PDEBUG("Nb carriers: %zu\n", myNbCarriers);
+ PDEBUG("Spacing: %zu\n", mySpacing);
+ PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * mySpacing);
+ throw std::runtime_error("OfdmGenerator::process output size not valid!");
+ }
+
+ ptrdiff_t iio_buf_size = (uint8_t*)iio_buffer_end(m_buf_in) - (uint8_t*)iio_buffer_start(m_buf_in);
+ if (iio_buf_size != (ssize_t)(mySpacing * sizeof(complexfix))) {
+ throw std::runtime_error("OfdmGenerator::process incorrect iio buffer size!");
+ }
+
+ for (size_t i = 0; i < myNbSymbols; i++) {
+ complexfix *fft_in = reinterpret_cast<complexfix*>(iio_buffer_start(m_buf_in));
+
+ /* For TM I this is:
+ * ZeroDst=769 ZeroSize=511
+ * PosSrc=0 PosDst=1 PosSize=768
+ * NegSrc=768 NegDst=1280 NegSize=768
+ */
+
+ fft_in[0] = static_cast<complexfix::value_type>(0);
+ for (size_t i = 0; i < myZeroSize; i++) {
+ fft_in[myZeroDst + i] = static_cast<complexfix::value_type>(0);
+ }
+
+ memcpy(&fft_in[myPosDst], &in[myPosSrc], myPosSize * sizeof(complexfix));
+ memcpy(&fft_in[myNegDst], &in[myNegSrc], myNegSize * sizeof(complexfix));
+
+ ssize_t nbytes_tx = iio_buffer_push(m_buf_in);
+ if (nbytes_tx < 0) {
+ throw std::runtime_error("OfdmGenerator::process error pushing IIO buffer!");
+ }
+
+ in += myNbCarriers;
+
+ // Keep one buffer in flight while we're doing shuffling data around here,
+ // this improves performance.
+ // I believe that, by default, IIO allocates four buffers in total.
+ if (i > 0) {
+ ssize_t nbytes_rx = iio_buffer_refill(m_buf_out);
+ if (nbytes_rx < 0) {
+ throw std::runtime_error("OfdmGenerator::process error refilling IIO buffer!");
+ }
+
+ ptrdiff_t p_inc = iio_buffer_step(m_buf_out);
+ if (p_inc != 1) {
+ throw std::runtime_error("OfdmGenerator::process Wrong p_inc");
+ }
+
+ // The FFT Accelerator takes 16-bit I + 16-bit Q, and outputs 32-bit I and 32-bit Q.
+ // The formatconvert will take care of this
+ const uint8_t *fft_out = (const uint8_t*)iio_buffer_first(m_buf_out, m_channel_out);
+ const uint8_t *fft_out_end = (const uint8_t*)iio_buffer_end(m_buf_out);
+ constexpr size_t sizeof_out_iq = sizeof(complexfix_wide);
+ if ((fft_out_end - fft_out) != (ssize_t)(mySpacing * sizeof_out_iq)) {
+ fprintf(stderr, "FFT_OUT: %p %p %zu %zu\n",
+ fft_out, fft_out_end, (fft_out_end - fft_out),
+ mySpacing * sizeof_out_iq);
+ throw std::runtime_error("OfdmGenerator::process fft_out length invalid!");
+ }
+
+ memcpy(out, fft_out, mySpacing * sizeof_out_iq);
+
+ out += mySpacing;
+ }
+ }
+
+ ssize_t nbytes_rx = iio_buffer_refill(m_buf_out);
+ if (nbytes_rx < 0) {
+ throw std::runtime_error("OfdmGenerator::process error refilling IIO buffer!");
+ }
+
+ ptrdiff_t p_inc = iio_buffer_step(m_buf_out);
+ if (p_inc != 1) {
+ throw std::runtime_error("OfdmGenerator::process Wrong p_inc");
+ }
+
+ // The FFT Accelerator takes 16-bit I + 16-bit Q, and outputs 32-bit I and 32-bit Q.
+ // The formatconvert will take care of this
+ const uint8_t *fft_out = (const uint8_t*)iio_buffer_first(m_buf_out, m_channel_out);
+ const uint8_t *fft_out_end = (const uint8_t*)iio_buffer_end(m_buf_out);
+ constexpr size_t sizeof_out_iq = sizeof(complexfix_wide);
+ if ((fft_out_end - fft_out) != (ssize_t)(mySpacing * sizeof_out_iq)) {
+ fprintf(stderr, "FFT_OUT: %p %p %zu %zu\n",
+ fft_out, fft_out_end, (fft_out_end - fft_out),
+ mySpacing * sizeof_out_iq);
+ throw std::runtime_error("OfdmGenerator::process fft_out length invalid!");
+ }
+
+ memcpy(out, fft_out, mySpacing * sizeof_out_iq);
+
+ return sizeOut;
+}
+
+#endif // HAVE_DEXTER
diff --git a/src/OfdmGenerator.h b/src/OfdmGenerator.h
index dc1ad46..475b2a4 100644
--- a/src/OfdmGenerator.h
+++ b/src/OfdmGenerator.h
@@ -2,7 +2,7 @@
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty
the Queen in Right of Canada (Communications Research Center Canada)
- Copyright (C) 2023
+ Copyright (C) 2024
Matthias P. Braendli, matthias.braendli@mpb.li
http://opendigitalradio.org
@@ -33,27 +33,30 @@
#include "ModPlugin.h"
#include "RemoteControl.h"
#include "PAPRStats.h"
-#include "fftw3.h"
+#include "kiss_fft.h"
+
#include <cstddef>
-#include <vector>
-#include <complex>
#include <atomic>
+#include <fftw3.h>
-typedef std::complex<float> complexf;
+#ifdef HAVE_DEXTER
+# include <iio.h>
+#endif
-class OfdmGenerator : public ModCodec, public RemoteControllable
+// Complex Float uses FFTW
+class OfdmGeneratorCF32 : public ModCodec, public RemoteControllable
{
public:
- OfdmGenerator(size_t nbSymbols,
+ OfdmGeneratorCF32(size_t nbSymbols,
size_t nbCarriers,
size_t spacing,
bool& enableCfr,
float& cfrClip,
float& cfrErrorClip,
bool inverse = true);
- virtual ~OfdmGenerator();
- OfdmGenerator(const OfdmGenerator&) = delete;
- OfdmGenerator& operator=(const OfdmGenerator&) = delete;
+ virtual ~OfdmGeneratorCF32();
+ OfdmGeneratorCF32(const OfdmGeneratorCF32&) = delete;
+ OfdmGeneratorCF32& operator=(const OfdmGeneratorCF32&) = delete;
int process(Buffer* const dataIn, Buffer* dataOut) override;
const char* name() override { return "OfdmGenerator"; }
@@ -107,4 +110,76 @@ class OfdmGenerator : public ModCodec, public RemoteControllable
std::deque<double> myMERs;
};
+// Fixed point implementation uses KISS FFT with -DFIXED_POINT=32
+class OfdmGeneratorFixed : public ModCodec
+{
+ public:
+ OfdmGeneratorFixed(size_t nbSymbols,
+ size_t nbCarriers,
+ size_t spacing,
+ bool inverse = true);
+ virtual ~OfdmGeneratorFixed();
+ OfdmGeneratorFixed(const OfdmGeneratorFixed&) = delete;
+ OfdmGeneratorFixed& operator=(const OfdmGeneratorFixed&) = delete;
+
+ int process(Buffer* const dataIn, Buffer* dataOut) override;
+ const char* name() override { return "OfdmGenerator"; }
+
+ private:
+ kiss_fft_cfg myKissCfg = nullptr;
+ kiss_fft_cpx *myFftIn, *myFftOut;
+
+ const size_t myNbSymbols;
+ const size_t myNbCarriers;
+ const size_t mySpacing;
+ unsigned myPosSrc;
+ unsigned myPosDst;
+ unsigned myPosSize;
+ unsigned myNegSrc;
+ unsigned myNegDst;
+ unsigned myNegSize;
+ unsigned myZeroDst;
+ unsigned myZeroSize;
+};
+
+#ifdef HAVE_DEXTER
+// The PrecisionWave DEXTER device contains an FFT accelerator in FPGA
+// It only does inverse FFTs
+class OfdmGeneratorDEXTER : public ModCodec
+{
+ public:
+ OfdmGeneratorDEXTER(size_t nbSymbols,
+ size_t nbCarriers,
+ size_t spacing);
+ virtual ~OfdmGeneratorDEXTER();
+ OfdmGeneratorDEXTER(const OfdmGeneratorDEXTER&) = delete;
+ OfdmGeneratorDEXTER& operator=(const OfdmGeneratorDEXTER&) = delete;
+
+ int process(Buffer* const dataIn, Buffer* dataOut) override;
+ const char* name() override { return "OfdmGenerator"; }
+
+ private:
+ struct iio_context *m_ctx = nullptr;
+ // "in" and "out" are from the point of view of the FFT Accelerator block
+ struct iio_device *m_dev_in = nullptr;
+ struct iio_channel *m_channel_in = nullptr;
+ struct iio_buffer *m_buf_in = nullptr;
+
+ struct iio_device *m_dev_out = nullptr;
+ struct iio_channel *m_channel_out = nullptr;
+ struct iio_buffer *m_buf_out = nullptr;
+
+ const size_t myNbSymbols;
+ const size_t myNbCarriers;
+ const size_t mySpacing;
+ unsigned myPosSrc;
+ unsigned myPosDst;
+ unsigned myPosSize;
+ unsigned myNegSrc;
+ unsigned myNegDst;
+ unsigned myNegSize;
+ unsigned myZeroDst;
+ unsigned myZeroSize;
+};
+#endif // HAVE_DEXTER
diff --git a/src/OutputMemory.cpp b/src/OutputMemory.cpp
index d6ef917..f673555 100644
--- a/src/OutputMemory.cpp
+++ b/src/OutputMemory.cpp
@@ -26,20 +26,14 @@
#include "OutputMemory.h"
#include "PcDebug.h"
-#include "Log.h"
-#include "TimestampDecoder.h"
-
-#include <stdexcept>
-#include <string.h>
-#include <math.h>
-
+#include <cmath>
OutputMemory::OutputMemory(Buffer* dataOut)
: ModOutput()
{
PDEBUG("OutputMemory::OutputMemory(%p) @ %p\n", dataOut, this);
- setOutput(dataOut);
+ m_dataOut = dataOut;
#if OUTPUT_MEM_HISTOGRAM
myMax = 0.0f;
@@ -49,7 +43,6 @@ OutputMemory::OutputMemory(Buffer* dataOut)
#endif
}
-
OutputMemory::~OutputMemory()
{
#if OUTPUT_MEM_HISTOGRAM
@@ -66,19 +59,12 @@ OutputMemory::~OutputMemory()
PDEBUG("OutputMemory::~OutputMemory() @ %p\n", this);
}
-
-void OutputMemory::setOutput(Buffer* dataOut)
-{
- myDataOut = dataOut;
-}
-
-
int OutputMemory::process(Buffer* dataIn)
{
PDEBUG("OutputMemory::process(dataIn: %p)\n",
dataIn);
- *myDataOut = *dataIn;
+ *m_dataOut = *dataIn;
#if OUTPUT_MEM_HISTOGRAM
const float* in = (const float*)dataIn->getData();
@@ -93,17 +79,17 @@ int OutputMemory::process(Buffer* dataIn)
}
#endif
- return myDataOut->getLength();
+ return m_dataOut->getLength();
}
meta_vec_t OutputMemory::process_metadata(const meta_vec_t& metadataIn)
{
- myMetadata = metadataIn;
+ m_metadata = metadataIn;
return {};
}
meta_vec_t OutputMemory::get_latest_metadata()
{
- return myMetadata;
+ return m_metadata;
}
diff --git a/src/OutputMemory.h b/src/OutputMemory.h
index f0a5fbb..299d31d 100644
--- a/src/OutputMemory.h
+++ b/src/OutputMemory.h
@@ -61,11 +61,9 @@ public:
meta_vec_t get_latest_metadata(void);
- void setOutput(Buffer* dataOut);
-
protected:
- Buffer* myDataOut;
- meta_vec_t myMetadata;
+ Buffer* m_dataOut;
+ meta_vec_t m_metadata;
#if OUTPUT_MEM_HISTOGRAM
// keep track of max value
diff --git a/src/PAPRStats.cpp b/src/PAPRStats.cpp
index 0c9764a..103f02f 100644
--- a/src/PAPRStats.cpp
+++ b/src/PAPRStats.cpp
@@ -33,7 +33,6 @@
# include <iostream>
#endif
-
PAPRStats::PAPRStats(size_t num_blocks_to_accumulate) :
m_num_blocks_to_accumulate(num_blocks_to_accumulate)
{
diff --git a/src/PAPRStats.h b/src/PAPRStats.h
index 86ad8b0..a4ded86 100644
--- a/src/PAPRStats.h
+++ b/src/PAPRStats.h
@@ -31,12 +31,9 @@
#endif
#include <cstddef>
-#include <vector>
#include <deque>
#include <complex>
-typedef std::complex<float> complexf;
-
/* Helper class to calculate Peak-to-average-power ratio.
* Definition of PAPR:
*
@@ -53,6 +50,8 @@ typedef std::complex<float> complexf;
*/
class PAPRStats
{
+ typedef std::complex<float> complexf;
+
public:
PAPRStats(size_t num_blocks_to_accumulate);
diff --git a/src/PhaseReference.cpp b/src/PhaseReference.cpp
index 568e15e..71dec87 100644
--- a/src/PhaseReference.cpp
+++ b/src/PhaseReference.cpp
@@ -29,12 +29,10 @@
#include <stdexcept>
-using complexf = std::complex<float>;
-
/* ETSI EN 300 401 Table 43 (Clause 14.3.2)
* Contains h_{i,k} values
*/
-const uint8_t PhaseReference::d_h[4][32] = {
+static const uint8_t d_h[4][32] = {
/* h0 */ { 0, 2, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 2, 2, 1, 1,
0, 2, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 2, 2, 1, 1 },
/* h1 */ { 0, 3, 2, 3, 0, 1, 3, 0, 2, 1, 2, 3, 2, 3, 3, 0,
@@ -54,41 +52,80 @@ const uint8_t PhaseReference::d_h[4][32] = {
* Tables 44 to 47 describe the frequency interleaving done in
* FrequencyInterleaver.
*/
-PhaseReference::PhaseReference(unsigned int dabmode) :
+PhaseReference::PhaseReference(unsigned int dabmode, bool fixedPoint) :
ModInput(),
- d_dabmode(dabmode)
+ d_dabmode(dabmode),
+ d_fixedPoint(fixedPoint)
{
PDEBUG("PhaseReference::PhaseReference(%u) @ %p\n", dabmode, this);
switch (d_dabmode) {
case 1:
d_carriers = 1536;
- d_num = 2048;
break;
case 2:
d_carriers = 384;
- d_num = 512;
break;
case 3:
d_carriers = 192;
- d_num = 256;
break;
case 4:
d_dabmode = 0;
case 0:
d_carriers = 768;
- d_num = 1024;
break;
default:
throw std::runtime_error(
"PhaseReference::PhaseReference DAB mode not valid!");
}
- d_dataIn.resize(d_carriers);
- fillData();
+
+ if (d_fixedPoint) {
+ d_phaseRefFixed.fillData(d_dabmode, d_carriers);
+ }
+ else {
+ d_phaseRefCF32.fillData(d_dabmode, d_carriers);
+ }
}
-complexf convert(uint8_t data) {
+static const int table[][48][2] = {
+ { // Mode 0/4
+ // Positive part
+ { 0, 0 }, { 3, 1 }, { 2, 0 }, { 1, 2 }, { 0, 0 }, { 3, 1 },
+ { 2, 2 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 3 }, { 1, 0 },
+ // Negative part
+ { 0, 0 }, { 1, 1 }, { 2, 1 }, { 3, 2 }, { 0, 2 }, { 1, 2 },
+ { 2, 0 }, { 3, 3 }, { 0, 3 }, { 1, 1 }, { 2, 3 }, { 3, 2 },
+ },
+ { // Mode 1
+ // Positive part
+ { 0, 3 }, { 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 2 }, { 3, 2 },
+ { 2, 1 }, { 1, 0 }, { 0, 2 }, { 3, 2 }, { 2, 3 }, { 1, 3 },
+ { 0, 0 }, { 3, 2 }, { 2, 1 }, { 1, 3 }, { 0, 3 }, { 3, 3 },
+ { 2, 3 }, { 1, 0 }, { 0, 3 }, { 3, 0 }, { 2, 1 }, { 1, 1 },
+ // Negative part
+ { 0, 1 }, { 1, 2 }, { 2, 0 }, { 3, 1 }, { 0, 3 }, { 1, 2 },
+ { 2, 2 }, { 3, 3 }, { 0, 2 }, { 1, 1 }, { 2, 2 }, { 3, 3 },
+ { 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 3 }, { 0, 2 }, { 1, 2 },
+ { 2, 2 }, { 3, 1 }, { 0, 1 }, { 1, 3 }, { 2, 1 }, { 3, 2 },
+ },
+ { // Mode 2
+ // Positive part
+ { 2, 0 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 0 }, { 1, 3 },
+ // Negative part
+ { 0, 2 }, { 1, 3 }, { 2, 2 }, { 3, 2 }, { 0, 1 }, { 1, 2 },
+ },
+ { // Mode 3
+ // Positive part
+ { 3, 2 }, { 2, 2 }, { 1, 2 },
+ // Negative part
+ { 0, 2 }, { 1, 3 }, { 2, 0 },
+ },
+};
+
+
+template <>
+complexf PhaseRefGen<complexf>::convert(uint8_t data) {
const complexf value[] = {
complexf(1, 0),
complexf(0, 1),
@@ -98,62 +135,37 @@ complexf convert(uint8_t data) {
return value[data % 4];
}
+template <>
+complexfix PhaseRefGen<complexfix>::convert(uint8_t data) {
+ constexpr auto one = fixed_16{1};
+ constexpr auto zero = fixed_16{0};
-void PhaseReference::fillData()
-{
- const int table[][48][2] = {
- { // Mode 0/4
- // Positive part
- { 0, 0 }, { 3, 1 }, { 2, 0 }, { 1, 2 }, { 0, 0 }, { 3, 1 },
- { 2, 2 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 3 }, { 1, 0 },
- // Negative part
- { 0, 0 }, { 1, 1 }, { 2, 1 }, { 3, 2 }, { 0, 2 }, { 1, 2 },
- { 2, 0 }, { 3, 3 }, { 0, 3 }, { 1, 1 }, { 2, 3 }, { 3, 2 },
- },
- { // Mode 1
- // Positive part
- { 0, 3 }, { 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 2 }, { 3, 2 },
- { 2, 1 }, { 1, 0 }, { 0, 2 }, { 3, 2 }, { 2, 3 }, { 1, 3 },
- { 0, 0 }, { 3, 2 }, { 2, 1 }, { 1, 3 }, { 0, 3 }, { 3, 3 },
- { 2, 3 }, { 1, 0 }, { 0, 3 }, { 3, 0 }, { 2, 1 }, { 1, 1 },
- // Negative part
- { 0, 1 }, { 1, 2 }, { 2, 0 }, { 3, 1 }, { 0, 3 }, { 1, 2 },
- { 2, 2 }, { 3, 3 }, { 0, 2 }, { 1, 1 }, { 2, 2 }, { 3, 3 },
- { 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 3 }, { 0, 2 }, { 1, 2 },
- { 2, 2 }, { 3, 1 }, { 0, 1 }, { 1, 3 }, { 2, 1 }, { 3, 2 },
- },
- { // Mode 2
- // Positive part
- { 2, 0 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 0 }, { 1, 3 },
- // Negative part
- { 0, 2 }, { 1, 3 }, { 2, 2 }, { 3, 2 }, { 0, 1 }, { 1, 2 },
- },
- { // Mode 3
- // Positive part
- { 3, 2 }, { 2, 2 }, { 1, 2 },
- // Negative part
- { 0, 2 }, { 1, 3 }, { 2, 0 },
- },
+ const complexfix value[] = {
+ complexfix(one, zero),
+ complexfix(zero, one),
+ complexfix(-one, zero),
+ complexfix(zero, -one),
};
+ return value[data % 4];
+}
- if (d_dabmode > 3) {
- throw std::runtime_error(
- "PhaseReference::fillData invalid DAB mode!");
- }
-
- if (d_dataIn.size() != d_carriers) {
+template <typename T>
+void PhaseRefGen<T>::fillData(unsigned int dabmode, size_t carriers)
+{
+ dataIn.resize(carriers);
+ if (dataIn.size() != carriers) {
throw std::runtime_error(
- "PhaseReference::fillData d_dataIn has incorrect size!");
+ "PhaseReference::fillData dataIn has incorrect size!");
}
for (size_t index = 0,
offset = 0;
- index < d_dataIn.size();
+ index < dataIn.size();
++offset) {
for (size_t k = 0; k < 32; ++k) {
- d_dataIn[index++] = convert(
- d_h[ table[d_dabmode][offset][0] ][k] +
- table[d_dabmode][offset][1] );
+ dataIn[index++] = convert(
+ d_h[ table[dabmode][offset][0] ][k] +
+ table[dabmode][offset][1] );
}
}
}
@@ -163,7 +175,12 @@ int PhaseReference::process(Buffer* dataOut)
{
PDEBUG("PhaseReference::process(dataOut: %p)\n", dataOut);
- dataOut->setData(&d_dataIn[0], d_carriers * sizeof(complexf));
+ if (d_fixedPoint) {
+ dataOut->setData(d_phaseRefFixed.dataIn.data(), d_carriers * sizeof(complexfix));
+ }
+ else {
+ dataOut->setData(d_phaseRefCF32.dataIn.data(), d_carriers * sizeof(complexf));
+ }
return 1;
}
diff --git a/src/PhaseReference.h b/src/PhaseReference.h
index 6ecdc4e..735009c 100644
--- a/src/PhaseReference.h
+++ b/src/PhaseReference.h
@@ -32,25 +32,33 @@
#include "ModPlugin.h"
-#include <cstddef>
-#include <complex>
#include <vector>
+#include <cstddef>
+
+template <typename T>
+struct PhaseRefGen {
+ std::vector<T> dataIn;
+ void fillData(unsigned int dabmode, size_t carriers);
+
+ private:
+ T convert(uint8_t data);
+};
+
class PhaseReference : public ModInput
{
public:
- PhaseReference(unsigned int dabmode);
+ PhaseReference(unsigned int dabmode, bool fixedPoint);
int process(Buffer* dataOut) override;
const char* name() override { return "PhaseReference"; }
protected:
unsigned int d_dabmode;
+ bool d_fixedPoint;
size_t d_carriers;
- size_t d_num;
- const static uint8_t d_h[4][32];
- std::vector<std::complex<float> > d_dataIn;
- void fillData();
+ PhaseRefGen<complexf> d_phaseRefCF32;
+ PhaseRefGen<complexfix> d_phaseRefFixed;
};
diff --git a/src/QpskSymbolMapper.cpp b/src/QpskSymbolMapper.cpp
index e26853a..c12ad80 100644
--- a/src/QpskSymbolMapper.cpp
+++ b/src/QpskSymbolMapper.cpp
@@ -23,7 +23,6 @@
#include <cstdio>
#include <cstring>
#include <stdexcept>
-#include <complex>
#include <cmath>
#ifdef __SSE__
# include <xmmintrin.h>
@@ -32,12 +31,10 @@
#include "QpskSymbolMapper.h"
#include "PcDebug.h"
-
-typedef std::complex<float> complexf;
-
-QpskSymbolMapper::QpskSymbolMapper(size_t carriers) :
+QpskSymbolMapper::QpskSymbolMapper(size_t carriers, bool fixedPoint) :
ModCodec(),
- d_carriers(carriers) { }
+ m_fixedPoint(fixedPoint),
+ m_carriers(carriers) { }
int QpskSymbolMapper::process(Buffer* const dataIn, Buffer* dataOut)
{
@@ -45,112 +42,172 @@ int QpskSymbolMapper::process(Buffer* const dataIn, Buffer* dataOut)
"(dataIn: %p, dataOut: %p)\n",
dataIn, dataOut);
- dataOut->setLength(dataIn->getLength() * 4 * 2 * sizeof(float)); // 4 output complex symbols per input byte
-#ifdef __SSE__
- const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData());
- __m128* out = reinterpret_cast<__m128*>(dataOut->getData());
-
- if (dataIn->getLength() % (d_carriers / 4) != 0) {
- throw std::runtime_error(
- "QpskSymbolMapper::process input size not valid: " +
- std::to_string(dataIn->getLength()) +
- "(input size) % (" + std::to_string(d_carriers) +
- " (carriers) / 4) != 0");
- }
+ // 4 output complex symbols per input byte
+
+ if (m_fixedPoint) {
+ dataOut->setLength(dataIn->getLength() * 4 * sizeof(complexfix));
+
+ using fixed_t = complexfix::value_type;
- const static __m128 symbols[16] = {
- _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2),
- _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2),
- _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2),
- _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2),
- _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2),
- _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
- _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2),
- _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
- _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2),
- _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2),
- _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, M_SQRT1_2),
- _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2),
- _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2),
- _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
- _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2),
- _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2)
- };
- size_t inOffset = 0;
- size_t outOffset = 0;
- uint8_t tmp = 0;
- for (size_t i = 0; i < dataIn->getLength(); i += d_carriers / 4) {
- for (size_t j = 0; j < d_carriers / 8; ++j) {
- tmp = (in[inOffset] & 0xc0) >> 4;
- tmp |= (in[inOffset + (d_carriers / 8)] & 0xc0) >> 6;
- out[outOffset] = symbols[tmp];
- tmp = (in[inOffset] & 0x30) >> 2;
- tmp |= (in[inOffset + (d_carriers / 8)] & 0x30) >> 4;
- out[outOffset + 1] = symbols[tmp];
- tmp = (in[inOffset] & 0x0c);
- tmp |= (in[inOffset + (d_carriers / 8)] & 0x0c) >> 2;
- out[outOffset + 2] = symbols[tmp];
- tmp = (in[inOffset] & 0x03) << 2;
- tmp |= (in[inOffset + (d_carriers / 8)] & 0x03);
- out[outOffset + 3] = symbols[tmp];
- ++inOffset;
- outOffset += 4;
+ const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData());
+ fixed_t* out = reinterpret_cast<fixed_t*>(dataOut->getData());
+
+ if (dataIn->getLength() % (m_carriers / 4) != 0) {
+ throw std::runtime_error(
+ "QpskSymbolMapper::process input size not valid!");
+ }
+
+ constexpr fixed_t v = static_cast<fixed_t>(M_SQRT1_2);
+
+ const static fixed_t symbols[16][4] = {
+ { v, v, v, v},
+ { v, v, v, -v},
+ { v, -v, v, v},
+ { v, -v, v, -v},
+ { v, v, -v, v},
+ { v, v, -v, -v},
+ { v, -v, -v, v},
+ { v, -v, -v, -v},
+ {-v, v, v, v},
+ {-v, v, v, -v},
+ {-v, -v, v, v},
+ {-v, -v, v, -v},
+ {-v, v, -v, v},
+ {-v, v, -v, -v},
+ {-v, -v, -v, v},
+ {-v, -v, -v, -v}
+ };
+ size_t inOffset = 0;
+ size_t outOffset = 0;
+ uint8_t tmp;
+ for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) {
+ for (size_t j = 0; j < m_carriers / 8; ++j) {
+ tmp = (in[inOffset] & 0xc0) >> 4;
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6;
+ memcpy(&out[outOffset], symbols[tmp], sizeof(fixed_t) * 4);
+ tmp = (in[inOffset] & 0x30) >> 2;
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4;
+ memcpy(&out[outOffset + 4], symbols[tmp], sizeof(fixed_t) * 4);
+ tmp = (in[inOffset] & 0x0c);
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2;
+ memcpy(&out[outOffset + 8], symbols[tmp], sizeof(fixed_t) * 4);
+ tmp = (in[inOffset] & 0x03) << 2;
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0x03);
+ memcpy(&out[outOffset + 12], symbols[tmp], sizeof(fixed_t) * 4);
+ ++inOffset;
+ outOffset += 4*4;
+ }
+ inOffset += m_carriers / 8;
}
- inOffset += d_carriers / 8;
}
+ else {
+ dataOut->setLength(dataIn->getLength() * 4 * sizeof(complexf));
+#ifdef __SSE__
+ const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData());
+ __m128* out = reinterpret_cast<__m128*>(dataOut->getData());
+
+ if (dataIn->getLength() % (m_carriers / 4) != 0) {
+ throw std::runtime_error(
+ "QpskSymbolMapper::process input size not valid: " +
+ std::to_string(dataIn->getLength()) +
+ "(input size) % (" + std::to_string(m_carriers) +
+ " (carriers) / 4) != 0");
+ }
+
+ const static __m128 symbols[16] = {
+ _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2),
+ _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2),
+ _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2),
+ _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2),
+ _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2),
+ _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
+ _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2),
+ _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
+ _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2),
+ _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2),
+ _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2),
+ _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2),
+ _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2),
+ _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2),
+ _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2),
+ _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2)
+ };
+ size_t inOffset = 0;
+ size_t outOffset = 0;
+ uint8_t tmp = 0;
+ for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) {
+ for (size_t j = 0; j < m_carriers / 8; ++j) {
+ tmp = (in[inOffset] & 0xc0) >> 4;
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6;
+ out[outOffset] = symbols[tmp];
+ tmp = (in[inOffset] & 0x30) >> 2;
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4;
+ out[outOffset + 1] = symbols[tmp];
+ tmp = (in[inOffset] & 0x0c);
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2;
+ out[outOffset + 2] = symbols[tmp];
+ tmp = (in[inOffset] & 0x03) << 2;
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0x03);
+ out[outOffset + 3] = symbols[tmp];
+ ++inOffset;
+ outOffset += 4;
+ }
+ inOffset += m_carriers / 8;
+ }
#else // !__SSE__
- const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData());
- float* out = reinterpret_cast<float*>(dataOut->getData());
- if (dataIn->getLength() % (d_carriers / 4) != 0) {
- throw std::runtime_error(
- "QpskSymbolMapper::process input size not valid!");
- }
- if (dataOut->getLength() / sizeof(float) != dataIn->getLength() * 4 * 2) { // 4 output complex symbols per input byte
- throw std::runtime_error(
- "QpskSymbolMapper::process output size not valid!");
- }
+ const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData());
+ float* out = reinterpret_cast<float*>(dataOut->getData());
+ if (dataIn->getLength() % (m_carriers / 4) != 0) {
+ throw std::runtime_error(
+ "QpskSymbolMapper::process input size not valid!");
+ }
+ if (dataOut->getLength() / sizeof(float) != dataIn->getLength() * 4 * 2) { // 4 output complex symbols per input byte
+ throw std::runtime_error(
+ "QpskSymbolMapper::process output size not valid!");
+ }
- const static float symbols[16][4] = {
- { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2},
- { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2},
- { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2},
- { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2},
- { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2},
- { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
- { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2},
- { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
- {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2},
- {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2},
- {-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, M_SQRT1_2},
- {-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2},
- {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2},
- {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
- {-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2},
- {-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}
- };
- size_t inOffset = 0;
- size_t outOffset = 0;
- uint8_t tmp;
- for (size_t i = 0; i < dataIn->getLength(); i += d_carriers / 4) {
- for (size_t j = 0; j < d_carriers / 8; ++j) {
- tmp = (in[inOffset] & 0xc0) >> 4;
- tmp |= (in[inOffset + (d_carriers / 8)] & 0xc0) >> 6;
- memcpy(&out[outOffset], symbols[tmp], sizeof(float) * 4);
- tmp = (in[inOffset] & 0x30) >> 2;
- tmp |= (in[inOffset + (d_carriers / 8)] & 0x30) >> 4;
- memcpy(&out[outOffset + 4], symbols[tmp], sizeof(float) * 4);
- tmp = (in[inOffset] & 0x0c);
- tmp |= (in[inOffset + (d_carriers / 8)] & 0x0c) >> 2;
- memcpy(&out[outOffset + 8], symbols[tmp], sizeof(float) * 4);
- tmp = (in[inOffset] & 0x03) << 2;
- tmp |= (in[inOffset + (d_carriers / 8)] & 0x03);
- memcpy(&out[outOffset + 12], symbols[tmp], sizeof(float) * 4);
- ++inOffset;
- outOffset += 4*4;
+ const static float symbols[16][4] = {
+ { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2},
+ { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2},
+ { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2},
+ { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2},
+ { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2},
+ { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
+ { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2},
+ { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
+ {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2},
+ {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2},
+ {-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2},
+ {-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2},
+ {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2},
+ {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2},
+ {-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2},
+ {-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}
+ };
+ size_t inOffset = 0;
+ size_t outOffset = 0;
+ uint8_t tmp;
+ for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) {
+ for (size_t j = 0; j < m_carriers / 8; ++j) {
+ tmp = (in[inOffset] & 0xc0) >> 4;
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6;
+ memcpy(&out[outOffset], symbols[tmp], sizeof(float) * 4);
+ tmp = (in[inOffset] & 0x30) >> 2;
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4;
+ memcpy(&out[outOffset + 4], symbols[tmp], sizeof(float) * 4);
+ tmp = (in[inOffset] & 0x0c);
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2;
+ memcpy(&out[outOffset + 8], symbols[tmp], sizeof(float) * 4);
+ tmp = (in[inOffset] & 0x03) << 2;
+ tmp |= (in[inOffset + (m_carriers / 8)] & 0x03);
+ memcpy(&out[outOffset + 12], symbols[tmp], sizeof(float) * 4);
+ ++inOffset;
+ outOffset += 4*4;
+ }
+ inOffset += m_carriers / 8;
}
- inOffset += d_carriers / 8;
- }
#endif // __SSE__
+ }
return 1;
}
diff --git a/src/QpskSymbolMapper.h b/src/QpskSymbolMapper.h
index dbcf4dd..6cf7a2e 100644
--- a/src/QpskSymbolMapper.h
+++ b/src/QpskSymbolMapper.h
@@ -31,12 +31,13 @@
class QpskSymbolMapper : public ModCodec
{
public:
- QpskSymbolMapper(size_t carriers);
+ QpskSymbolMapper(size_t carriers, bool fixedPoint);
int process(Buffer* const dataIn, Buffer* dataOut);
const char* name() { return "QpskSymbolMapper"; }
protected:
- size_t d_carriers;
+ bool m_fixedPoint;
+ size_t m_carriers;
};
diff --git a/src/Resampler.h b/src/Resampler.h
index d1a9f7a..2c810f6 100644
--- a/src/Resampler.h
+++ b/src/Resampler.h
@@ -37,9 +37,6 @@
#define FFT_TYPE fftwf_complex
#define FFT_PLAN fftwf_plan
-#include <complex>
-typedef std::complex<float> complexf;
-
class Resampler : public ModCodec
{
diff --git a/src/SignalMultiplexer.cpp b/src/SignalMultiplexer.cpp
index 1d95bdd..d4955d0 100644
--- a/src/SignalMultiplexer.cpp
+++ b/src/SignalMultiplexer.cpp
@@ -22,25 +22,20 @@
#include "SignalMultiplexer.h"
#include "PcDebug.h"
-#include <stdio.h>
-#include <stdexcept>
+#include <cstdio>
#include <assert.h>
-#include <string.h>
-SignalMultiplexer::SignalMultiplexer(size_t framesize) :
- ModMux(),
- d_frameSize(framesize)
+SignalMultiplexer::SignalMultiplexer() :
+ ModMux()
{
- PDEBUG("SignalMultiplexer::SignalMultiplexer(%zu) @ %p\n", framesize, this);
-
+ PDEBUG("SignalMultiplexer::SignalMultiplexer() @ %p\n", this);
}
SignalMultiplexer::~SignalMultiplexer()
{
PDEBUG("SignalMultiplexer::~SignalMultiplexer() @ %p\n", this);
-
}
diff --git a/src/SignalMultiplexer.h b/src/SignalMultiplexer.h
index 5186a8d..1f6bc12 100644
--- a/src/SignalMultiplexer.h
+++ b/src/SignalMultiplexer.h
@@ -36,7 +36,7 @@
class SignalMultiplexer : public ModMux
{
public:
- SignalMultiplexer(size_t frameSize);
+ SignalMultiplexer();
virtual ~SignalMultiplexer();
SignalMultiplexer(const SignalMultiplexer&);
SignalMultiplexer& operator=(const SignalMultiplexer&);
@@ -44,8 +44,5 @@ public:
int process(std::vector<Buffer*> dataIn, Buffer* dataOut);
const char* name() { return "SignalMultiplexer"; }
-
-protected:
- size_t d_frameSize;
};
diff --git a/src/TII.cpp b/src/TII.cpp
index 2656cbf..bce15aa 100644
--- a/src/TII.cpp
+++ b/src/TII.cpp
@@ -2,7 +2,7 @@
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty
the Queen in Right of Canada (Communications Research Center Canada)
- Copyright (C) 2023
+ Copyright (C) 2024
Matthias P. Braendli, matthias.braendli@mpb.li
http://opendigitalradio.org
@@ -27,11 +27,8 @@
#include "TII.h"
#include "PcDebug.h"
-#include <stdio.h>
-#include <stdexcept>
-#include <string.h>
-
-typedef std::complex<float> complexf;
+#include <cstdio>
+#include <cstring>
/* TII pattern for TM I, II, IV */
const int pattern_tm1_2_4[][8] = { // {{{
@@ -106,11 +103,12 @@ const int pattern_tm1_2_4[][8] = { // {{{
{1,1,1,0,1,0,0,0},
{1,1,1,1,0,0,0,0} }; // }}}
-TII::TII(unsigned int dabmode, tii_config_t& tii_config) :
+TII::TII(unsigned int dabmode, tii_config_t& tii_config, bool fixedPoint) :
ModCodec(),
RemoteControllable("tii"),
m_dabmode(dabmode),
- m_conf(tii_config)
+ m_conf(tii_config),
+ m_fixedPoint(fixedPoint)
{
PDEBUG("TII::TII(%u) @ %p\n", dabmode, this);
@@ -171,56 +169,72 @@ const char* TII::name()
return m_name.c_str();
}
+template<typename T>
+void do_process(size_t carriers, bool old_variant, const std::vector<bool>& Acp, Buffer* dataIn, Buffer* dataOut)
+{
+ const T* in = reinterpret_cast<const T*>(dataIn->getData());
+ T* out = reinterpret_cast<T*>(dataOut->getData());
+
+ /* Normalise the TII carrier power according to ETSI TR 101 496-3
+ * Clause 5.4.2.2 Paragraph 7:
+ *
+ * > The ratio of carriers in a TII symbol to a normal DAB symbol
+ * > is 1:48 for all Modes, so that the signal power in a TII symbol is
+ * > 16 dB below the signal power of the other symbols.
+ *
+ * This is because we only enable 32 out of 1536 carriers, not because
+ * every carrier is lower power.
+ */
+ for (size_t i = 0; i < Acp.size(); i++) {
+ /* See header file for an explanation of the old variant.
+ *
+ * A_{c,p}(k) and A_{c,p}(k-1) are never both simultaneously true,
+ * so instead of doing the sum inside z_{m,0,k}, we could do
+ *
+ * if (m_Acp[i]) out[i] = in[i];
+ * if (m_Acp[i-1]) out[i] = in[i-1]
+ *
+ * (Considering only the new variant)
+ *
+ * To avoid messing with indices, we substitute j = i-1
+ *
+ * if (m_Acp[i]) out[i] = in[i];
+ * if (m_Acp[j]) out[j+1] = in[j]
+ *
+ * and fuse the two conditionals together:
+ */
+ if (Acp[i]) {
+ out[i] = in[i];
+ out[i+1] = (old_variant ? in[i+1] : in[i]);
+ }
+ }
+}
int TII::process(Buffer* dataIn, Buffer* dataOut)
{
+ const size_t sizeof_samples = m_fixedPoint ? sizeof(complexfix) : sizeof(complexf);
+
PDEBUG("TII::process(dataOut: %p)\n",
dataOut);
if ( (dataIn == NULL) or
- (dataIn->getLength() != m_carriers * sizeof(complexf))) {
+ (dataIn->getLength() != m_carriers * sizeof_samples)) {
throw TIIError("TII::process input size not valid!");
}
- dataOut->setLength(m_carriers * sizeof(complexf));
- memset(dataOut->getData(), 0, dataOut->getLength());
+ dataOut->setLength(m_carriers * sizeof_samples);
+ memset(dataOut->getData(), 0, dataOut->getLength());
if (m_conf.enable and m_insert) {
std::lock_guard<std::mutex> lock(m_enabled_carriers_mutex);
- complexf* in = reinterpret_cast<complexf*>(dataIn->getData());
- complexf* out = reinterpret_cast<complexf*>(dataOut->getData());
-
- /* Normalise the TII carrier power according to ETSI TR 101 496-3
- * Clause 5.4.2.2 Paragraph 7:
- *
- * > The ratio of carriers in a TII symbol to a normal DAB symbol
- * > is 1:48 for all Modes, so that the signal power in a TII symbol is
- * > 16 dB below the signal power of the other symbols.
- *
- * This is because we only enable 32 out of 1536 carriers, not because
- * every carrier is lower power.
- */
- for (size_t i = 0; i < m_Acp.size(); i++) {
- /* See header file for an explanation of the old variant.
- *
- * A_{c,p}(k) and A_{c,p}(k-1) are never both simultaneously true,
- * so instead of doing the sum inside z_{m,0,k}, we could do
- *
- * if (m_Acp[i]) out[i] = in[i];
- * if (m_Acp[i-1]) out[i] = in[i-1]
- *
- * (Considering only the new variant)
- *
- * To avoid messing with indices, we substitute j = i-1
- *
- * if (m_Acp[i]) out[i] = in[i];
- * if (m_Acp[j]) out[j+1] = in[j]
- *
- * and fuse the two conditionals together:
- */
- if (m_Acp[i]) {
- out[i] = in[i];
- out[i+1] = (m_conf.old_variant ? in[i+1] : in[i]);
- }
+ if (m_fixedPoint) {
+ do_process<complexfix>(
+ m_carriers, m_conf.old_variant, m_Acp,
+ dataIn, dataOut);
+ }
+ else {
+ do_process<complexf>(
+ m_carriers, m_conf.old_variant, m_Acp,
+ dataIn, dataOut);
}
}
diff --git a/src/TII.h b/src/TII.h
index f6de70b..6fe4d4f 100644
--- a/src/TII.h
+++ b/src/TII.h
@@ -2,7 +2,7 @@
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty
the Queen in Right of Canada (Communications Research Center Canada)
- Copyright (C) 2023
+ Copyright (C) 2024
Matthias P. Braendli, matthias.braendli@mpb.li
http://opendigitalradio.org
@@ -36,8 +36,6 @@
#include "RemoteControl.h"
#include <cstddef>
-#include <thread>
-#include <complex>
#include <vector>
#include <string>
@@ -81,7 +79,7 @@ class TIIError : public std::runtime_error {
class TII : public ModCodec, public RemoteControllable
{
public:
- TII(unsigned int dabmode, tii_config_t& tii_config);
+ TII(unsigned int dabmode, tii_config_t& tii_config, bool fixedPoint);
virtual ~TII() {}
int process(Buffer* dataIn, Buffer* dataOut) override;
@@ -106,6 +104,8 @@ class TII : public ModCodec, public RemoteControllable
// Remote-controllable settings
tii_config_t& m_conf;
+ bool m_fixedPoint = false;
+
// Internal flag when to insert TII
bool m_insert = true;
diff --git a/src/Utils.cpp b/src/Utils.cpp
index fa2fd5d..f947acd 100644
--- a/src/Utils.cpp
+++ b/src/Utils.cpp
@@ -66,6 +66,9 @@ static void printHeader()
#if defined(__SSE__)
"SSE " <<
#endif
+#if defined(__ARM_NEON)
+ "NEON " <<
+#endif
"\n";
}
diff --git a/src/output/Dexter.h b/src/output/Dexter.h
index d4f425f..f8a17ba 100644
--- a/src/output/Dexter.h
+++ b/src/output/Dexter.h
@@ -98,16 +98,16 @@ class Dexter : public Output::SDRDevice
SDRDeviceConfig& m_conf;
- struct iio_context* m_ctx = nullptr;
- struct iio_device* m_dexter_dsp_tx = nullptr;
+ struct iio_context *m_ctx = nullptr;
+ struct iio_device *m_dexter_dsp_tx = nullptr;
- struct iio_device* m_ad9957 = nullptr;
- struct iio_device* m_ad9957_tx0 = nullptr;
- struct iio_channel* m_tx_channel = nullptr;
+ struct iio_device *m_ad9957 = nullptr;
+ struct iio_device *m_ad9957_tx0 = nullptr;
+ struct iio_channel *m_tx_channel = nullptr;
struct iio_buffer *m_buffer = nullptr;
/* Underflows are counted in a separate thread */
- struct iio_context* m_underflow_ctx = nullptr;
+ struct iio_context *m_underflow_ctx = nullptr;
std::atomic<bool> m_running = ATOMIC_VAR_INIT(false);
std::thread m_underflow_read_thread;
void underflow_read_process();
diff --git a/src/output/SDR.cpp b/src/output/SDR.cpp
index 594171f..22398c7 100644
--- a/src/output/SDR.cpp
+++ b/src/output/SDR.cpp
@@ -34,6 +34,7 @@
#include "RemoteControl.h"
#include "Utils.h"
+#include <chrono>
#include <cmath>
#include <iostream>
#include <assert.h>
diff --git a/src/output/SDR.h b/src/output/SDR.h
index 960de0c..86bf295 100644
--- a/src/output/SDR.h
+++ b/src/output/SDR.h
@@ -34,16 +34,12 @@ DESCRIPTION:
# include <config.h>
#endif
-#include <chrono>
#include "ModPlugin.h"
-#include "EtiReader.h"
#include "output/SDRDevice.h"
#include "output/Feedback.h"
namespace Output {
-using complexf = std::complex<float>;
-
class SDR : public ModOutput, public ModMetadata, public RemoteControllable {
public:
SDR(SDRDeviceConfig& config, std::shared_ptr<SDRDevice> device);
diff --git a/src/output/SDRDevice.h b/src/output/SDRDevice.h
index 378829c..ec9373d 100644
--- a/src/output/SDRDevice.h
+++ b/src/output/SDRDevice.h
@@ -38,9 +38,7 @@ DESCRIPTION:
#include <string>
#include <vector>
#include <complex>
-#include <variant>
#include <optional>
-#include <unordered_map>
#include "TimestampDecoder.h"
@@ -59,6 +57,8 @@ struct SDRDeviceConfig {
std::string tx_antenna;
std::string rx_antenna;
+ bool fixedPoint = false;
+
long masterClockRate = 32768000;
unsigned sampleRate = 2048000;
double frequency = 0.0;
diff --git a/src/output/UHD.cpp b/src/output/UHD.cpp
index e097692..b30f9e1 100644
--- a/src/output/UHD.cpp
+++ b/src/output/UHD.cpp
@@ -31,10 +31,7 @@
//#define MDEBUG(fmt, args...) fprintf(LOG, fmt , ## args)
#define MDEBUG(fmt, args...)
-#include "PcDebug.h"
#include "Log.h"
-#include "RemoteControl.h"
-#include "Utils.h"
#include <thread>
#include <iomanip>
@@ -52,14 +49,12 @@
# include <uhd/utils/thread_priority.hpp>
#endif
-
-#include <cmath>
#include <iostream>
-#include <assert.h>
+#include <cmath>
+#include <cassert>
#include <stdexcept>
-#include <stdio.h>
+#include <cstdio>
#include <time.h>
-#include <errno.h>
#include <unistd.h>
#include <pthread.h>
@@ -235,7 +230,8 @@ UHD::UHD(SDRDeviceConfig& config) :
m_usrp->set_rx_gain(m_conf.rxgain);
etiLog.log(debug, "OutputUHD:Actual RX Gain: %f", m_usrp->get_rx_gain());
- const uhd::stream_args_t stream_args("fc32"); //complex floats
+ const uhd::stream_args_t stream_args(
+ m_conf.fixedPoint ? "sc16" : "fc32");
m_rx_stream = m_usrp->get_rx_stream(stream_args);
m_tx_stream = m_usrp->get_tx_stream(stream_args);
@@ -319,8 +315,9 @@ double UHD::get_bandwidth(void) const
void UHD::transmit_frame(struct FrameData&& frame)
{
const double tx_timeout = 20.0;
- const size_t sizeIn = frame.buf.size() / sizeof(complexf);
- const complexf* in_data = reinterpret_cast<const complexf*>(&frame.buf[0]);
+
+ const size_t sample_size = m_conf.fixedPoint ? (2 * sizeof(int16_t)) : sizeof(complexf);
+ const size_t sizeIn = frame.buf.size() / sample_size;
uhd::tx_metadata_t md_tx;
@@ -353,9 +350,9 @@ void UHD::transmit_frame(struct FrameData&& frame)
samps_to_send <= usrp_max_num_samps );
m_require_timestamp_refresh = false;
- //send a single packet
+ // send a single packet
size_t num_tx_samps = m_tx_stream->send(
- &in_data[num_acc_samps],
+ frame.buf.data() + sample_size * num_acc_samps,
samps_to_send, md_tx, tx_timeout);
etiLog.log(trace, "UHD,sent %zu of %zu", num_tx_samps, samps_to_send);
diff --git a/src/output/UHD.h b/src/output/UHD.h
index 9891c7a..c4f1a45 100644
--- a/src/output/UHD.h
+++ b/src/output/UHD.h
@@ -45,12 +45,9 @@ DESCRIPTION:
#include <atomic>
#include <thread>
-#include "Log.h"
#include "output/SDR.h"
#include "output/USRPTime.h"
#include "TimestampDecoder.h"
-#include "RemoteControl.h"
-#include "ThreadsafeQueue.h"
#include <stdio.h>
#include <sys/types.h>