diff options
author | Matthias P. Braendli <matthias.braendli@mpb.li> | 2024-11-25 21:02:36 +0100 |
---|---|---|
committer | Matthias P. Braendli <matthias.braendli@mpb.li> | 2024-11-25 21:02:36 +0100 |
commit | 2e9500d4854a3db9e0f407021934407155b82776 (patch) | |
tree | 72681993fb7ebdadb9b9bc41fe9a6a8130ab1da3 | |
parent | 23b5d884dbdb4ce6a20872cce6a48ea0eed39f39 (diff) | |
parent | d45cca6f447c9a72bc9eaeb9d861fa6fcff9e597 (diff) | |
download | dabmod-2e9500d4854a3db9e0f407021934407155b82776.tar.gz dabmod-2e9500d4854a3db9e0f407021934407155b82776.tar.bz2 dabmod-2e9500d4854a3db9e0f407021934407155b82776.zip |
Merge branch 'fixedpoint' into next
68 files changed, 5370 insertions, 747 deletions
diff --git a/Makefile.am b/Makefile.am index d29b530..87d553a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -35,10 +35,11 @@ endif bin_PROGRAMS = odr-dabmod -odr_dabmod_CFLAGS = -Wall -Isrc -Ilib \ - $(GITVERSION_FLAGS) -odr_dabmod_CXXFLAGS = -Wall -Isrc -Ilib \ - $(GITVERSION_FLAGS) $(BOOST_CPPFLAGS) +KISS_FLAGS=-DFIXED_POINT=16 +odr_dabmod_CFLAGS = -Wall -Isrc -Ilib -Ikiss \ + $(GITVERSION_FLAGS) $(KISS_FLAGS) +odr_dabmod_CXXFLAGS = -Wall -Isrc -Ilib -Ikiss \ + $(GITVERSION_FLAGS) $(BOOST_CPPFLAGS) $(KISS_FLAGS) odr_dabmod_LDADD = $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB) $(UHD_LIBS) $(LIMESDR_LIBS) $(ADDITIONAL_UHD_LIBS) odr_dabmod_SOURCES = src/DabMod.cpp \ src/PcDebug.h \ @@ -175,7 +176,17 @@ odr_dabmod_SOURCES = src/DabMod.cpp \ src/PAPRStats.cpp \ src/PAPRStats.h \ src/TII.cpp \ - src/TII.h + src/TII.h \ + kiss/kfc.h \ + kiss/kfc.c \ + kiss/kiss_fft.c \ + kiss/kiss_fft.h \ + kiss/kiss_fftnd.c \ + kiss/kiss_fftnd.h \ + kiss/kiss_fftndr.c \ + kiss/kiss_fftndr.h \ + kiss/kiss_fftr.c \ + kiss/kiss_fftr.h man_MANS = man/odr-dabmod.1 diff --git a/doc/example.ini b/doc/example.ini index eda50a5..0d0f8e3 100644 --- a/doc/example.ini +++ b/doc/example.ini @@ -103,6 +103,8 @@ gainmode=var ; If not defined, use Transmission Mode 1 ;mode=1 +fixed_point=1 + ; The digital gain is a value that is multiplied to each sample. It is used ; to tune the chain to make sure that no non-linearities appear up to the ; USRP daughterboard programmable gain amplifier (PGA). diff --git a/fpm/LICENSE b/fpm/LICENSE new file mode 100644 index 0000000..bb86b71 --- /dev/null +++ b/fpm/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Mike Lankamp + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/fpm/README.md b/fpm/README.md new file mode 100644 index 0000000..38ee444 --- /dev/null +++ b/fpm/README.md @@ -0,0 +1,48 @@ +# fpm +A C++ header-only fixed-point math library. "fpm" stands for "fixed-point math". + +It is designed to serve as a drop-in replacement for floating-point types and aims to provide as much of the standard library's functionality as possible with exclusively integers. `fpm` requires C++11 or higher. + +[![Build Status](https://travis-ci.org/MikeLankamp/fpm.svg?branch=master)](https://travis-ci.org/MikeLankamp/fpm) +[![Build status](https://ci.appveyor.com/api/projects/status/0velpwqk38spu412?svg=true)](https://ci.appveyor.com/project/MikeLankamp/fpm) + +`fpm` is designed to guard against accidental conversion to and from floats and supports many of the standard C++ maths functions, including trigonometry, power and logarithmic functions, with performance and accuracy generally comparable to alternative libraries. + +## Why use fixed-point math? +There are several reasons why you can not or choose not to use floating-point math, but still want a similar type: +* Your target platform lacks an FPU, does not support floating-point operations or its floating-point operations are + considerably slower than fixed-point integer operations. +* You require deterministic calculations. + +If any of these reasons apply for you, and your problem domain has a clearly outlined range and required resolution, +then fixed-point numbers might be a solution for you. + +## Quick Start +To use `fpm`, include its header `<fpm/fixed.hpp>` and use the `fpm::fixed_16_16`, `fpm::fixed_24_8` or `fpm::fixed_8_24` +types as if they were native floating-pointer types: +```c++ +#include <fpm/fixed.hpp> // For fpm::fixed_16_16 +#include <fpm/math.hpp> // For fpm::cos +#include <fpm/ios.hpp> // For fpm::operator<< +#include <iostream> // For std::cin, std::cout + +int main() { + std::cout << "Please input a number: "; + fpm::fixed_16_16 x; + std::cin >> x; + std::cout << "The cosine of " << x << " radians is: " << cos(x) << std::endl; + return 0; +} +``` + +To use the fixed-point equivalents of the `<math.h>` functions such as `sqrt`, `sin` and `log`, include the header `<fpm/math.hpp>`. +To stream fixed-point values to or from streams, include the header `<fpm/ios.hpp>`. + +## Documentation +Please refer to the [documentation](docs/index.md) for detailed information how to use `fpm`, or skip straight to the [performance](docs/performance.md) or [accuracy](docs/accuracy.md) results. + +## Contributions +This library is a work-in-progress. We welcome any contributions that improve the functional coverage or the performance or accuracy of the mathematical functions. + +## License +See the [LICENSE](LICENSE) file diff --git a/fpm/fixed.hpp b/fpm/fixed.hpp new file mode 100644 index 0000000..e2e71bf --- /dev/null +++ b/fpm/fixed.hpp @@ -0,0 +1,490 @@ +#ifndef FPM_FIXED_HPP +#define FPM_FIXED_HPP + +#include <cassert> +#include <cmath> +#include <cstdint> +#include <functional> +#include <limits> +#include <type_traits> + +namespace fpm +{ + +//! Fixed-point number type +//! \tparam BaseType the base integer type used to store the fixed-point number. This can be a signed or unsigned type. +//! \tparam IntermediateType the integer type used to store intermediate results during calculations. +//! \tparam FractionBits the number of bits of the BaseType used to store the fraction +//! \tparam EnableRounding enable rounding of LSB for multiplication, division, and type conversion +template <typename BaseType, typename IntermediateType, unsigned int FractionBits, bool EnableRounding = true> +class fixed +{ + static_assert(std::is_integral<BaseType>::value, "BaseType must be an integral type"); + static_assert(FractionBits > 0, "FractionBits must be greater than zero"); + static_assert(FractionBits <= sizeof(BaseType) * 8 - 1, "BaseType must at least be able to contain entire fraction, with space for at least one integral bit"); + static_assert(sizeof(IntermediateType) > sizeof(BaseType), "IntermediateType must be larger than BaseType"); + static_assert(std::is_signed<IntermediateType>::value == std::is_signed<BaseType>::value, "IntermediateType must have same signedness as BaseType"); + + // Although this value fits in the BaseType in terms of bits, if there's only one integral bit, this value + // is incorrect (flips from positive to negative), so we must extend the size to IntermediateType. + static constexpr IntermediateType FRACTION_MULT = IntermediateType(1) << FractionBits; + + struct raw_construct_tag {}; + constexpr inline fixed(BaseType val, raw_construct_tag) noexcept : m_value(val) {} + +public: + inline fixed() noexcept = default; + + // Converts an integral number to the fixed-point type. + // Like static_cast, this truncates bits that don't fit. + template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> + constexpr inline explicit fixed(T val) noexcept + : m_value(static_cast<BaseType>(val * FRACTION_MULT)) + {} + + // Converts an floating-point number to the fixed-point type. + // Like static_cast, this truncates bits that don't fit. + template <typename T, typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr> + constexpr inline explicit fixed(T val) noexcept + : m_value(static_cast<BaseType>((EnableRounding) ? + (val >= 0.0) ? (val * FRACTION_MULT + T{0.5}) : (val * FRACTION_MULT - T{0.5}) + : (val * FRACTION_MULT))) + {} + + // Constructs from another fixed-point type with possibly different underlying representation. + // Like static_cast, this truncates bits that don't fit. + template <typename B, typename I, unsigned int F, bool R> + constexpr inline explicit fixed(fixed<B,I,F,R> val) noexcept + : m_value(from_fixed_point<F>(val.raw_value()).raw_value()) + {} + + // Explicit conversion to a floating-point type + template <typename T, typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr> + constexpr inline explicit operator T() const noexcept + { + return static_cast<T>(m_value) / FRACTION_MULT; + } + + // Explicit conversion to an integral type + template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> + constexpr inline explicit operator T() const noexcept + { + return static_cast<T>(m_value / FRACTION_MULT); + } + + // Returns the raw underlying value of this type. + // Do not use this unless you know what you're doing. + constexpr inline BaseType raw_value() const noexcept + { + return m_value; + } + + //! Constructs a fixed-point number from another fixed-point number. + //! \tparam NumFractionBits the number of bits used by the fraction in \a value. + //! \param value the integer fixed-point number + template <unsigned int NumFractionBits, typename T, typename std::enable_if<(NumFractionBits > FractionBits)>::type* = nullptr> + static constexpr inline fixed from_fixed_point(T value) noexcept + { + // To correctly round the last bit in the result, we need one more bit of information. + // We do this by multiplying by two before dividing and adding the LSB to the real result. + return (EnableRounding) ? fixed(static_cast<BaseType>( + value / (T(1) << (NumFractionBits - FractionBits)) + + (value / (T(1) << (NumFractionBits - FractionBits - 1)) % 2)), + raw_construct_tag{}) : + fixed(static_cast<BaseType>(value / (T(1) << (NumFractionBits - FractionBits))), + raw_construct_tag{}); + } + + template <unsigned int NumFractionBits, typename T, typename std::enable_if<(NumFractionBits <= FractionBits)>::type* = nullptr> + static constexpr inline fixed from_fixed_point(T value) noexcept + { + return fixed(static_cast<BaseType>( + value * (T(1) << (FractionBits - NumFractionBits))), + raw_construct_tag{}); + } + + // Constructs a fixed-point number from its raw underlying value. + // Do not use this unless you know what you're doing. + static constexpr inline fixed from_raw_value(BaseType value) noexcept + { + return fixed(value, raw_construct_tag{}); + } + + // + // Constants + // + static constexpr fixed e() { return from_fixed_point<61>(6267931151224907085ll); } + static constexpr fixed pi() { return from_fixed_point<61>(7244019458077122842ll); } + static constexpr fixed half_pi() { return from_fixed_point<62>(7244019458077122842ll); } + static constexpr fixed two_pi() { return from_fixed_point<60>(7244019458077122842ll); } + + // + // Arithmetic member operators + // + + constexpr inline fixed operator-() const noexcept + { + return fixed::from_raw_value(-m_value); + } + + inline fixed& operator+=(const fixed& y) noexcept + { + m_value += y.m_value; + return *this; + } + + template <typename I, typename std::enable_if<std::is_integral<I>::value>::type* = nullptr> + inline fixed& operator+=(I y) noexcept + { + m_value += y * FRACTION_MULT; + return *this; + } + + inline fixed& operator-=(const fixed& y) noexcept + { + m_value -= y.m_value; + return *this; + } + + template <typename I, typename std::enable_if<std::is_integral<I>::value>::type* = nullptr> + inline fixed& operator-=(I y) noexcept + { + m_value -= y * FRACTION_MULT; + return *this; + } + + inline fixed& operator*=(const fixed& y) noexcept + { + if (EnableRounding){ + // Normal fixed-point multiplication is: x * y / 2**FractionBits. + // To correctly round the last bit in the result, we need one more bit of information. + // We do this by multiplying by two before dividing and adding the LSB to the real result. + auto value = (static_cast<IntermediateType>(m_value) * y.m_value) / (FRACTION_MULT / 2); + m_value = static_cast<BaseType>((value / 2) + (value % 2)); + } else { + auto value = (static_cast<IntermediateType>(m_value) * y.m_value) / FRACTION_MULT; + m_value = static_cast<BaseType>(value); + } + return *this; + } + + template <typename I, typename std::enable_if<std::is_integral<I>::value>::type* = nullptr> + inline fixed& operator*=(I y) noexcept + { + m_value *= y; + return *this; + } + + inline fixed& operator/=(const fixed& y) noexcept + { + assert(y.m_value != 0); + if (EnableRounding){ + // Normal fixed-point division is: x * 2**FractionBits / y. + // To correctly round the last bit in the result, we need one more bit of information. + // We do this by multiplying by two before dividing and adding the LSB to the real result. + auto value = (static_cast<IntermediateType>(m_value) * FRACTION_MULT * 2) / y.m_value; + m_value = static_cast<BaseType>((value / 2) + (value % 2)); + } else { + auto value = (static_cast<IntermediateType>(m_value) * FRACTION_MULT) / y.m_value; + m_value = static_cast<BaseType>(value); + } + return *this; + } + + template <typename I, typename std::enable_if<std::is_integral<I>::value>::type* = nullptr> + inline fixed& operator/=(I y) noexcept + { + m_value /= y; + return *this; + } + +private: + BaseType m_value; +}; + +// +// Convenience typedefs +// + +using fixed_16_16 = fixed<std::int32_t, std::int64_t, 16>; +using fixed_24_8 = fixed<std::int32_t, std::int64_t, 8>; +using fixed_8_24 = fixed<std::int32_t, std::int64_t, 24>; + +// +// Addition +// + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline fixed<B, I, F, R> operator+(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept +{ + return fixed<B, I, F, R>(x) += y; +} + +template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> +constexpr inline fixed<B, I, F, R> operator+(const fixed<B, I, F, R>& x, T y) noexcept +{ + return fixed<B, I, F, R>(x) += y; +} + +template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> +constexpr inline fixed<B, I, F, R> operator+(T x, const fixed<B, I, F, R>& y) noexcept +{ + return fixed<B, I, F, R>(y) += x; +} + +// +// Subtraction +// + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline fixed<B, I, F, R> operator-(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept +{ + return fixed<B, I, F, R>(x) -= y; +} + +template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> +constexpr inline fixed<B, I, F, R> operator-(const fixed<B, I, F, R>& x, T y) noexcept +{ + return fixed<B, I, F, R>(x) -= y; +} + +template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> +constexpr inline fixed<B, I, F, R> operator-(T x, const fixed<B, I, F, R>& y) noexcept +{ + return fixed<B, I, F, R>(x) -= y; +} + +// +// Multiplication +// + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline fixed<B, I, F, R> operator*(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept +{ + return fixed<B, I, F, R>(x) *= y; +} + +template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> +constexpr inline fixed<B, I, F, R> operator*(const fixed<B, I, F, R>& x, T y) noexcept +{ + return fixed<B, I, F, R>(x) *= y; +} + +template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> +constexpr inline fixed<B, I, F, R> operator*(T x, const fixed<B, I, F, R>& y) noexcept +{ + return fixed<B, I, F, R>(y) *= x; +} + +// +// Division +// + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline fixed<B, I, F, R> operator/(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept +{ + return fixed<B, I, F, R>(x) /= y; +} + +template <typename B, typename I, unsigned int F, typename T, bool R, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> +constexpr inline fixed<B, I, F, R> operator/(const fixed<B, I, F, R>& x, T y) noexcept +{ + return fixed<B, I, F, R>(x) /= y; +} + +template <typename B, typename I, unsigned int F, typename T, bool R, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> +constexpr inline fixed<B, I, F, R> operator/(T x, const fixed<B, I, F, R>& y) noexcept +{ + return fixed<B, I, F, R>(x) /= y; +} + +// +// Comparison operators +// + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool operator==(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept +{ + return x.raw_value() == y.raw_value(); +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool operator!=(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept +{ + return x.raw_value() != y.raw_value(); +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool operator<(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept +{ + return x.raw_value() < y.raw_value(); +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool operator>(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept +{ + return x.raw_value() > y.raw_value(); +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool operator<=(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept +{ + return x.raw_value() <= y.raw_value(); +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool operator>=(const fixed<B, I, F, R>& x, const fixed<B, I, F, R>& y) noexcept +{ + return x.raw_value() >= y.raw_value(); +} + +namespace detail +{ +// Number of base-10 digits required to fully represent a number of bits +static constexpr int max_digits10(int bits) +{ + // 8.24 fixed-point equivalent of (int)ceil(bits * std::log10(2)); + using T = long long; + return static_cast<int>((T{bits} * 5050445 + (T{1} << 24) - 1) >> 24); +} + +// Number of base-10 digits that can be fully represented by a number of bits +static constexpr int digits10(int bits) +{ + // 8.24 fixed-point equivalent of (int)(bits * std::log10(2)); + using T = long long; + return static_cast<int>((T{bits} * 5050445) >> 24); +} + +} // namespace detail +} // namespace fpm + +// Specializations for customization points +namespace std +{ + +template <typename B, typename I, unsigned int F, bool R> +struct hash<fpm::fixed<B,I,F,R>> +{ + using argument_type = fpm::fixed<B, I, F, R>; + using result_type = std::size_t; + + result_type operator()(argument_type arg) const noexcept(noexcept(std::declval<std::hash<B>>()(arg.raw_value()))) { + return m_hash(arg.raw_value()); + } + +private: + std::hash<B> m_hash; +}; + +template <typename B, typename I, unsigned int F, bool R> +struct numeric_limits<fpm::fixed<B,I,F,R>> +{ + static constexpr bool is_specialized = true; + static constexpr bool is_signed = std::numeric_limits<B>::is_signed; + static constexpr bool is_integer = false; + static constexpr bool is_exact = true; + static constexpr bool has_infinity = false; + static constexpr bool has_quiet_NaN = false; + static constexpr bool has_signaling_NaN = false; + static constexpr std::float_denorm_style has_denorm = std::denorm_absent; + static constexpr bool has_denorm_loss = false; + static constexpr std::float_round_style round_style = std::round_to_nearest; + static constexpr bool is_iec_559 = false; + static constexpr bool is_bounded = true; + static constexpr bool is_modulo = std::numeric_limits<B>::is_modulo; + static constexpr int digits = std::numeric_limits<B>::digits; + + // Any number with `digits10` significant base-10 digits (that fits in + // the range of the type) is guaranteed to be convertible from text and + // back without change. Worst case, this is 0.000...001, so we can only + // guarantee this case. Nothing more. + static constexpr int digits10 = 1; + + // This is equal to max_digits10 for the integer and fractional part together. + static constexpr int max_digits10 = + fpm::detail::max_digits10(std::numeric_limits<B>::digits - F) + fpm::detail::max_digits10(F); + + static constexpr int radix = 2; + static constexpr int min_exponent = 1 - F; + static constexpr int min_exponent10 = -fpm::detail::digits10(F); + static constexpr int max_exponent = std::numeric_limits<B>::digits - F; + static constexpr int max_exponent10 = fpm::detail::digits10(std::numeric_limits<B>::digits - F); + static constexpr bool traps = true; + static constexpr bool tinyness_before = false; + + static constexpr fpm::fixed<B,I,F,R> lowest() noexcept { + return fpm::fixed<B,I,F,R>::from_raw_value(std::numeric_limits<B>::lowest()); + }; + + static constexpr fpm::fixed<B,I,F,R> min() noexcept { + return lowest(); + } + + static constexpr fpm::fixed<B,I,F,R> max() noexcept { + return fpm::fixed<B,I,F,R>::from_raw_value(std::numeric_limits<B>::max()); + }; + + static constexpr fpm::fixed<B,I,F,R> epsilon() noexcept { + return fpm::fixed<B,I,F,R>::from_raw_value(1); + }; + + static constexpr fpm::fixed<B,I,F,R> round_error() noexcept { + return fpm::fixed<B,I,F,R>(1) / 2; + }; + + static constexpr fpm::fixed<B,I,F,R> denorm_min() noexcept { + return min(); + } +}; + +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_specialized; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_signed; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_integer; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_exact; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::has_infinity; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::has_quiet_NaN; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::has_signaling_NaN; +template <typename B, typename I, unsigned int F, bool R> +constexpr std::float_denorm_style numeric_limits<fpm::fixed<B,I,F,R>>::has_denorm; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::has_denorm_loss; +template <typename B, typename I, unsigned int F, bool R> +constexpr std::float_round_style numeric_limits<fpm::fixed<B,I,F,R>>::round_style; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_iec_559; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_bounded; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::is_modulo; +template <typename B, typename I, unsigned int F, bool R> +constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::digits; +template <typename B, typename I, unsigned int F, bool R> +constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::digits10; +template <typename B, typename I, unsigned int F, bool R> +constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::max_digits10; +template <typename B, typename I, unsigned int F, bool R> +constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::radix; +template <typename B, typename I, unsigned int F, bool R> +constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::min_exponent; +template <typename B, typename I, unsigned int F, bool R> +constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::min_exponent10; +template <typename B, typename I, unsigned int F, bool R> +constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::max_exponent; +template <typename B, typename I, unsigned int F, bool R> +constexpr int numeric_limits<fpm::fixed<B,I,F,R>>::max_exponent10; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::traps; +template <typename B, typename I, unsigned int F, bool R> +constexpr bool numeric_limits<fpm::fixed<B,I,F,R>>::tinyness_before; + +} + +#endif diff --git a/fpm/ios.hpp b/fpm/ios.hpp new file mode 100644 index 0000000..69581fb --- /dev/null +++ b/fpm/ios.hpp @@ -0,0 +1,740 @@ +#ifndef FPM_IOS_HPP +#define FPM_IOS_HPP + +#include "fixed.hpp" +#include "math.hpp" +#include <array> +#include <algorithm> +#include <cctype> +#include <climits> +#include <limits> +#include <ios> +#include <vector> + +namespace fpm +{ + +template <typename CharT, typename B, typename I, unsigned int F, bool R> +std::basic_ostream<CharT>& operator<<(std::basic_ostream<CharT>& os, fixed<B, I, F, R> x) noexcept +{ + const auto uppercase = ((os.flags() & std::ios_base::uppercase) != 0); + const auto showpoint = ((os.flags() & std::ios_base::showpoint) != 0); + const auto adjustfield = (os.flags() & std::ios_base::adjustfield); + const auto width = os.width(); + const auto& ctype = std::use_facet<std::ctype<CharT>>(os.getloc()); + const auto& numpunct = std::use_facet<std::numpunct<CharT>>(os.getloc()); + + auto floatfield = (os.flags() & std::ios_base::floatfield); + auto precision = os.precision(); + auto show_trailing_zeros = true; + auto use_significant_digits = false; + + // Invalid precision? Reset to the default + if (precision < 0) + { + precision = 6; + } + + // Output buffer. Needs to be big enough for the formatted number without padding. + // Optional prefixes (i.e. "+"/"-", decimal separator, exponent "e+/-" and/or "0x"). + constexpr auto worst_case_constant_size = 6; + // Maximum number of digits from the base type (covers integral + fractional digits) + constexpr auto worst_case_digit_count = std::numeric_limits<B>::digits10 + 2; + // Exponent suffixes (i.e. maximum digits based on log of the base type size). + // Needs a log10, but that isn't constexpr, so we're over-allocating on the stack. Can't hurt. + constexpr auto worst_case_suffix_size = std::numeric_limits<B>::digits; + // Double the digit count: in the worst case the thousands grouping add a character per digit. + using buffer_t = std::array<CharT, worst_case_constant_size + worst_case_digit_count * 2 + worst_case_suffix_size>; + buffer_t buffer; + + // Output cursor + auto end = buffer.begin(); + + // Keep track of the start of "internal" padding + typename buffer_t::iterator internal_pad = buffer.end(); + + // Representation of a number. + // The value of the number is: raw / divisor * (10|2) ^ exponent + // The base of the exponent is 2 in hexfloat mode, or 10 otherwise. + struct number_t { + I raw; // raw fixed-point value + I divisor; // the divisor indicating the place of the decimal point + int exponent; // the exponent applied + }; + + // Convert a value without exponent to scientific representation + // where the part before the decimal point is less than 10. + const auto as_scientific = [](number_t value) { + assert(value.exponent == 0); + if (value.raw > 0) + { + while (value.raw / 10 >= value.divisor) { + value.divisor *= 10; + ++value.exponent; + } + while (value.raw < value.divisor) { + value.raw *= 10; + --value.exponent; + } + } + return value; + }; + + number_t value = { x.raw_value(), I{1} << F, 0}; + + auto base = B{10}; + + // First write the sign + if (value.raw < 0) + { + *end++ = ctype.widen('-'); + value.raw = -value.raw; + internal_pad = end; + } + else if (os.flags() & std::ios_base::showpos) + { + *end++ = ctype.widen('+'); + internal_pad = end; + } + assert(value.raw >= 0); + + switch (floatfield) + { + case std::ios_base::fixed | std::ios_base::scientific: + // Hexadecimal mode: figure out the hexadecimal exponent and write "0x" + if (value.raw > 0) + { + auto bit = detail::find_highest_bit(value.raw); + value.exponent = bit - F; // exponent is applied to base 2 + value.divisor = I{1} << bit; // divisor is at the highest bit, ensuring it starts with "1." + precision = (bit + 3) / 4; // precision is number of nibbles, so we show all of them + } + base = 16; + show_trailing_zeros = false; // Always strip trailing zeros in hexfloat mode + + *end++ = ctype.widen('0'); + *end++ = ctype.widen(uppercase ? 'X' : 'x'); + break; + + case std::ios_base::scientific: + // Scientific mode, normalize value to scientific notation + value = as_scientific(value); + break; + + case std::ios_base::fixed: + // Fixed mode. Nothing to do. + break; + + default: + { + // "auto" mode: figure out the exponent + const number_t sci_value = as_scientific(value); + + // Now `precision` indicates the number of *significant digits* (not fractional digits). + use_significant_digits = true; + precision = std::max<std::streamsize>(precision, 1); + + if (sci_value.exponent >= precision || sci_value.exponent < -4) { + // Display as scientific format + floatfield = std::ios_base::scientific; + value = sci_value; + } else { + // Display as fixed format. + // "showpoint" indicates whether or not we show trailing zeros + floatfield = std::ios_base::fixed; + show_trailing_zeros = showpoint; + } + break; + } + }; + + // If we didn't write a sign, any internal padding starts here + // (after a potential "0x" for hexfloats). + if (internal_pad == buffer.end()) { + internal_pad = end; + } + + // Separate out the integral part of the number + I integral = value.raw / value.divisor; + value.raw %= value.divisor; + + // Here we start printing the number itself + const char* const digits = uppercase ? "0123456789ABCDEF" : "0123456789abcdef"; + const auto digits_start = end; + + // Are we already printing significant digits? (yes if we're not counting significant digits) + bool significant_digits = !use_significant_digits; + + // Print the integral part + int last_digit = 0; + if (integral == 0) { + *end++ = ctype.widen('0'); + if (value.raw == 0) { + // If the fraction is zero too, all zeros including the integral count + // as significant digits. + significant_digits = true; + } + } else { + while (integral > 0) { + last_digit = integral % base; + *end++ = ctype.widen(digits[last_digit]); + integral /= base; + } + std::reverse(digits_start, end); + significant_digits = true; + } + + if (use_significant_digits && significant_digits) + { + // Apparently the integral part was significant; subtract its + // length from the remaining significant digits. + precision -= (end - digits_start); + } + + // At this point, `value` contains only the fraction and + // `precision` holds the number of digits to print. + assert(value.raw < value.divisor); + assert(precision >= 0); + + // Location of decimal point + typename buffer_t::iterator point = buffer.end(); + + // Start (and length) of the trailing zeros to insert while printing + // By tracking this to print them later instead of actually printing them now, + // we can support large precisions with a small printing buffer. + typename buffer_t::iterator trailing_zeros_start = buffer.end(); + std::streamsize trailing_zeros_count = 0; + + if (precision > 0) + { + // Print the fractional part + *(point = end++) = numpunct.decimal_point(); + + for (int i = 0; i < precision; ++i) + { + if (value.raw == 0) + { + // The rest of the digits are all zeros, mark them + // to be printed in this spot. + trailing_zeros_start = end; + trailing_zeros_count = precision - i; + break; + } + + // Shift the divisor if we can to avoid overflow on the value + if (value.divisor % base == 0) { + value.divisor /= base; + } else { + value.raw *= base; + } + assert(value.divisor > 0); + assert(value.raw >= 0); + last_digit = (value.raw / value.divisor) % base; + value.raw %= value.divisor; + *end++ = ctype.widen(digits[last_digit]); + + if (!significant_digits) { + // We're still finding the first significant digit + if (last_digit != 0) { + // Found it + significant_digits = true; + } else { + // Not yet; increment number of digits to print + ++precision; + } + } + } + } + else if (showpoint) + { + // No fractional part to print, but we still want the point + *(point = end++) = numpunct.decimal_point(); + } + + // Insert `ch` into the output at `position`, updating all references accordingly + const auto insert_character = [&](typename buffer_t::iterator position, CharT ch) { + assert(position >= buffer.begin() && position < end); + std::move_backward(position, end, end + 1); + if (point != buffer.end() && position < point) { + ++point; + } + if (trailing_zeros_start != buffer.end() && position < trailing_zeros_start) { + ++trailing_zeros_start; + } + ++end; + *position = ch; + }; + + // Round the number: round to nearest + bool increment = false; + if (value.raw > value.divisor / 2) { + // Round up + increment = true; + } else if (value.raw == value.divisor / 2) { + // It's a tie (i.e. "xyzw.5"): round to even + increment = ((last_digit % 2) == 1); + } + + if (increment) + { + auto p = end - 1; + // Increment all digits backwards while we see "9" + while (p >= digits_start) { + if (p == point) { + // Skip over the decimal point + --p; + } + if ((*p)++ != ctype.widen('9')) { + break; + } + *p-- = ctype.widen('0'); + } + + if (p < digits_start) { + // We've incremented all the way to the start (all 9's), we need to insert the + // carried-over 1 from incrementing the last 9. + assert(p == digits_start - 1); + insert_character(++p, ctype.widen('1')); + + if (floatfield == std::ios::scientific) + { + // We just made the integral part equal to 10, so we shift the decimal point + // back one place (if any) and tweak the exponent, so that we keep the integer part + // less than 10. + if (point != buffer.end()) { + assert(p + 2 == point); + std::swap(*(point - 1), *point); + --point; + } + ++value.exponent; + + // We've introduced an extra digit so we need to strip the last digit + // to maintain the same precision + --end; + } + } + + if (use_significant_digits && *p == ctype.widen('1') && point != buffer.end()) { + // We've converted a leading zero to a 1 so we need to strip the last digit + // (behind the decimal point) to maintain the same significant digit count. + --end; + } + } + + if (point != buffer.end()) + { + if (!show_trailing_zeros) + { + // Remove trailing zeros + while (*(end - 1) == ctype.widen('0')) { + --end; + } + + // Also clear the "trailing zeros to append during printing" range + trailing_zeros_start = buffer.end(); + trailing_zeros_count = 0; + } + + if (end - 1 == point && trailing_zeros_count == 0 && !showpoint) { + // Remove the decimal point, too + --end; + } + } + + // Apply thousands grouping + const auto& grouping = numpunct.grouping(); + if (!grouping.empty()) + { + // Step backwards from the end or decimal point, inserting the + // thousands separator at every group interval. + const CharT thousands_sep = ctype.widen(numpunct.thousands_sep()); + std::size_t group = 0; + auto p = point != buffer.end() ? point : end; + auto size = static_cast<int>(grouping[group]); + while (size > 0 && size < CHAR_MAX && p - digits_start > size) { + p -= size; + insert_character(p, thousands_sep); + if (group < grouping.size() - 1) { + size = static_cast<int>(grouping[++group]); + } + } + } + + // Print the exponent if required + assert(floatfield != 0); + if (floatfield & std::ios_base::scientific) + { + // Hexadecimal (%a/%A) or decimal (%e/%E) scientific notation + if (floatfield & std::ios_base::fixed) { + *end++ = ctype.widen(uppercase ? 'P' : 'p'); + } else { + *end++ = ctype.widen(uppercase ? 'E' : 'e'); + } + + if (value.exponent < 0) { + *end++ = ctype.widen('-'); + value.exponent = -value.exponent; + } else { + *end++ = ctype.widen('+'); + } + + if (floatfield == std::ios_base::scientific) { + // In decimal scientific notation (%e/%E), the exponent is at least two digits + if (value.exponent < 10) { + *end++ = ctype.widen('0'); + } + } + + const auto exponent_start = end; + if (value.exponent == 0) { + *end++ = ctype.widen('0'); + } else while (value.exponent > 0) { + *end++ = ctype.widen(digits[value.exponent % 10]); + value.exponent /= 10; + } + std::reverse(exponent_start, end); + } + + // Write character `ch` `count` times to the stream + const auto sputcn = [&](CharT ch, std::streamsize count){ + // Fill a buffer to output larger chunks + constexpr std::streamsize chunk_size = 64; + std::array<CharT, chunk_size> fill_buffer; + std::fill_n(fill_buffer.begin(), std::min(count, chunk_size), ch); + + for (std::streamsize size, left = count; left > 0; left -= size) { + size = std::min(chunk_size, left); + os.rdbuf()->sputn(&fill_buffer[0], size); + } + }; + + // Outputs a range of characters, making sure to output the trailing zeros range + // if it lies in the specified range + const auto put_range = [&](typename buffer_t::const_iterator begin, typename buffer_t::const_iterator end) { + assert(end >= begin); + if (trailing_zeros_start >= begin && trailing_zeros_start <= end) { + // Print range with trailing zeros range in the middle + assert(trailing_zeros_count > 0); + os.rdbuf()->sputn(&*begin, trailing_zeros_start - begin); + sputcn(ctype.widen('0'), trailing_zeros_count); + os.rdbuf()->sputn(&*trailing_zeros_start, end - trailing_zeros_start); + } else { + // Print range as-is + os.rdbuf()->sputn(&*begin, end - begin); + } + }; + + // Pad the buffer if necessary. + // Note that the length of trailing zeros is counted towards the length of the content. + const auto content_size = end - buffer.begin() + trailing_zeros_count; + if (content_size >= width) + { + // Buffer needs no padding, output as-is + put_range(buffer.begin(), end); + } + else + { + const auto pad_size = width - content_size; + switch (adjustfield) + { + case std::ios_base::left: + // Content is left-aligned, so output the buffer, followed by the padding + put_range(buffer.begin(), end); + sputcn(os.fill(), pad_size); + break; + case std::ios_base::internal: + // Content is internally aligned, so output the buffer up to the "internal pad" + // point, followed by the padding, followed by the remainder of the buffer. + put_range(buffer.begin(), internal_pad); + sputcn(os.fill(), pad_size); + put_range(internal_pad, end); + break; + default: + // Content is right-aligned, so output the padding, followed by the buffer + sputcn(os.fill(), pad_size); + put_range(buffer.begin(), end); + break; + } + } + + // Width is reset after every write + os.width(0); + + return os; +} + + +template <typename CharT, class Traits, typename B, typename I, unsigned int F, bool R> +std::basic_istream<CharT, Traits>& operator>>(std::basic_istream<CharT, Traits>& is, fixed<B, I, F, R>& x) +{ + typename std::basic_istream<CharT, Traits>::sentry sentry(is); + if (!sentry) + { + return is; + } + + const auto& ctype = std::use_facet<std::ctype<CharT>>(is.getloc()); + const auto& numpunct = std::use_facet<std::numpunct<CharT>>(is.getloc()); + + bool thousands_separator_allowed = false; + const bool supports_thousands_separators = !numpunct.grouping().empty(); + + const auto& is_valid_character = [](char ch) { + // Note: allowing ['p', 'i', 'n', 't', 'y'] is technically in violation of the spec (we are emulating std::num_get), + // but otherwise we cannot parse hexfloats and "infinity". This is a known issue with the spec (LWG #2381). + return std::isxdigit(ch) || + ch == 'x' || ch == 'X' || ch == 'p' || ch == 'P' || + ch == 'i' || ch == 'I' || ch == 'n' || ch == 'N' || + ch == 't' || ch == 'T' || ch == 'y' || ch == 'Y' || + ch == '-' || ch == '+'; + }; + + const auto& peek = [&]() { + for(;;) { + auto ch = is.rdbuf()->sgetc(); + if (ch == Traits::eof()) { + is.setstate(std::ios::eofbit); + return '\0'; + } + if (ch == numpunct.decimal_point()) { + return '.'; + } + if (ch == numpunct.thousands_sep()) + { + if (!supports_thousands_separators || !thousands_separator_allowed) { + return '\0'; + } + // Ignore valid thousands separators + is.rdbuf()->sbumpc(); + continue; + } + auto res = ctype.narrow(ch, 0); + if (!is_valid_character(res)) { + // Invalid character: end input + return '\0'; + } + return res; + } + }; + + const auto& bump = [&]() { + is.rdbuf()->sbumpc(); + }; + + const auto& next = [&]() { + bump(); + return peek(); + }; + + bool negate = false; + auto ch = peek(); + if (ch == '-') { + negate = true; + ch = next(); + } else if (ch == '+') { + ch = next(); + } + + const char infinity[] = "infinity"; + // Must be "inf" or "infinity" + int i = 0; + while (i < 8 && ch == infinity[i]) { + ++i; + ch = next(); + } + + if (i > 0) { + if (i == 3 || i == 8) { + x = negate ? std::numeric_limits<fixed<B, I, F, R>>::min() : std::numeric_limits<fixed<B, I, F, R>>::max(); + } else { + is.setstate(std::ios::failbit); + } + return is; + } + + char exponent_char = 'e'; + int base = 10; + + constexpr auto NoFraction = std::numeric_limits<std::size_t>::max(); + std::size_t fraction_start = NoFraction; + std::vector<unsigned char> significand; + + if (ch == '0') { + ch = next(); + if (ch == 'x' || ch == 'X') { + // Hexfloat + exponent_char = 'p'; + base = 16; + ch = next(); + } else { + significand.push_back(0); + } + } + + // Parse the significand + thousands_separator_allowed = true; + for (;; ch = next()) { + if (ch == '.') { + if (fraction_start != NoFraction) { + // Double decimal point. Stop parsing. + break; + } + fraction_start = significand.size(); + thousands_separator_allowed = false; + } else { + unsigned char val = base; + if (ch >= '0' && ch <= '9') { + val = ch - '0'; + } else if (ch >= 'a' && ch <= 'f') { + val = ch - 'a' + 10; + } else if (ch >= 'A' && ch <= 'F') { + val = ch - 'A' + 10; + } + if (val < 0 || val >= base) { + break; + } + significand.push_back(val); + } + } + if (significand.empty()) { + // We need a significand + is.setstate(std::ios::failbit); + return is; + } + thousands_separator_allowed = false; + + if (fraction_start == NoFraction) { + // If we haven't seen a fraction yet, place it at the end of the significand + fraction_start = significand.size(); + } + + // Parse the exponent + bool exponent_overflow = false; + std::size_t exponent = 0; + bool exponent_negate = false; + if (std::tolower(ch) == exponent_char) + { + ch = next(); + if (ch == '-') { + exponent_negate = true; + ch = next(); + } else if (ch == '+') { + ch = next(); + } + + bool parsed = false; + while (std::isdigit(ch)) { + if (exponent <= std::numeric_limits<int>::max() / 10) { + exponent = exponent * 10 + (ch - '0'); + } else { + exponent_overflow = true; + } + parsed = true; + ch = next(); + } + if (!parsed) { + // If the exponent character is given, the exponent value may not be empty + is.setstate(std::ios::failbit); + return is; + } + } + + // We've parsed all we need. Construct the value. + if (exponent_overflow) { + // Absolute exponent is too large + if (std::all_of(significand.begin(), significand.end(), [](unsigned char x){ return x == 0; })) { + // Significand is zero. Exponent doesn't matter. + x = fixed<B, I, F, R>(0); + } else if (exponent_negate) { + // A huge negative exponent approaches 0. + x = fixed<B, I, F, R>::from_raw_value(0); + } else { + // A huge positive exponent approaches infinity. + x = std::numeric_limits<fixed<B, I, F, R>>::max(); + } + return is; + } + + // Shift the fraction offset according to exponent + { + const auto exponent_mult = (base == 10) ? 1: 4; + if (exponent_negate) { + const auto adjust = std::min(exponent / exponent_mult, fraction_start); + fraction_start -= adjust; + exponent -= adjust * exponent_mult; + } else { + const auto adjust = std::min(exponent / exponent_mult, significand.size() - fraction_start); + fraction_start += adjust; + exponent -= adjust * exponent_mult; + } + } + + constexpr auto IsSigned = std::is_signed<B>::value; + constexpr auto IntBits = sizeof(B) * 8 - F - (IsSigned ? 1 : 0); + constexpr auto MaxInt = (I{1} << IntBits) - 1; + constexpr auto MaxFraction = (I{1} << F) - 1; + constexpr auto MaxValue = (I{1} << sizeof(B) * 8) - 1; + + // Parse the integer part + I integer = 0; + for (std::size_t i = 0; i < fraction_start; ++i) { + if (integer > MaxInt / base) { + // Overflow + x = negate ? std::numeric_limits<fixed<B, I, F, R>>::min() : std::numeric_limits<fixed<B, I, F, R>>::max(); + return is; + } + assert(significand[i] < base); + integer = integer * base + significand[i]; + } + + // Parse the fractional part + I fraction = 0; + I divisor = 1; + for (std::size_t i = fraction_start; i < significand.size(); ++i) { + assert(significand[i] < base); + if (divisor > MaxFraction / base) { + // We're done + break; + } + fraction = fraction * base + significand[i]; + divisor *= base; + } + + // Construct the value from the parsed parts + I raw_value = (integer << F) + (fraction << F) / divisor; + + // Apply remaining exponent + if (exponent_char == 'p') { + // Base-2 exponent + if (exponent_negate) { + raw_value >>= exponent; + } else { + raw_value <<= exponent; + } + } else { + // Base-10 exponent + if (exponent_negate) { + I remainder = 0; + for (std::size_t e = 0; e < exponent; ++e) { + remainder = raw_value % 10; + raw_value /= 10; + } + raw_value += remainder / 5; + } else { + for (std::size_t e = 0; e < exponent; ++e) { + if (raw_value > MaxValue / 10) { + // Overflow + x = negate ? std::numeric_limits<fixed<B, I, F, R>>::min() : std::numeric_limits<fixed<B, I, F, R>>::max(); + return is; + } + raw_value *= 10; + } + } + } + x = fixed<B, I, F, R>::from_raw_value(static_cast<B>(negate ? -raw_value : raw_value)); + return is; +} + +} + +#endif diff --git a/fpm/math.hpp b/fpm/math.hpp new file mode 100644 index 0000000..7a76349 --- /dev/null +++ b/fpm/math.hpp @@ -0,0 +1,684 @@ +#ifndef FPM_MATH_HPP +#define FPM_MATH_HPP + +#include "fixed.hpp" +#include <cmath> + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +namespace fpm +{ + +// +// Helper functions +// +namespace detail +{ + +// Returns the index of the most-signifcant set bit +inline long find_highest_bit(unsigned long long value) noexcept +{ + assert(value != 0); +#if defined(_MSC_VER) + unsigned long index; +#if defined(_WIN64) + _BitScanReverse64(&index, value); +#else + if (_BitScanReverse(&index, static_cast<unsigned long>(value >> 32)) != 0) { + index += 32; + } else { + _BitScanReverse(&index, static_cast<unsigned long>(value & 0xfffffffflu)); + } +#endif + return index; +#elif defined(__GNUC__) || defined(__clang__) + return sizeof(value) * 8 - 1 - __builtin_clzll(value); +#else +# error "your platform does not support find_highest_bit()" +#endif +} + +} + +// +// Classification methods +// + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline int fpclassify(fixed<B, I, F, R> x) noexcept +{ + return (x.raw_value() == 0) ? FP_ZERO : FP_NORMAL; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool isfinite(fixed<B, I, F, R>) noexcept +{ + return true; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool isinf(fixed<B, I, F, R>) noexcept +{ + return false; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool isnan(fixed<B, I, F, R>) noexcept +{ + return false; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool isnormal(fixed<B, I, F, R> x) noexcept +{ + return x.raw_value() != 0; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool signbit(fixed<B, I, F, R> x) noexcept +{ + return x.raw_value() < 0; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool isgreater(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept +{ + return x > y; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool isgreaterequal(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept +{ + return x >= y; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool isless(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept +{ + return x < y; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool islessequal(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept +{ + return x <= y; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool islessgreater(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept +{ + return x != y; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline bool isunordered(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept +{ + return false; +} + +// +// Nearest integer operations +// +template <typename B, typename I, unsigned int F, bool R> +inline fixed<B, I, F, R> ceil(fixed<B, I, F, R> x) noexcept +{ + constexpr auto FRAC = B(1) << F; + auto value = x.raw_value(); + if (value > 0) value += FRAC - 1; + return fixed<B, I, F, R>::from_raw_value(value / FRAC * FRAC); +} + +template <typename B, typename I, unsigned int F, bool R> +inline fixed<B, I, F, R> floor(fixed<B, I, F, R> x) noexcept +{ + constexpr auto FRAC = B(1) << F; + auto value = x.raw_value(); + if (value < 0) value -= FRAC - 1; + return fixed<B, I, F, R>::from_raw_value(value / FRAC * FRAC); +} + +template <typename B, typename I, unsigned int F, bool R> +inline fixed<B, I, F, R> trunc(fixed<B, I, F, R> x) noexcept +{ + constexpr auto FRAC = B(1) << F; + return fixed<B, I, F, R>::from_raw_value(x.raw_value() / FRAC * FRAC); +} + +template <typename B, typename I, unsigned int F, bool R> +inline fixed<B, I, F, R> round(fixed<B, I, F, R> x) noexcept +{ + constexpr auto FRAC = B(1) << F; + auto value = x.raw_value() / (FRAC / 2); + return fixed<B, I, F, R>::from_raw_value(((value / 2) + (value % 2)) * FRAC); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> nearbyint(fixed<B, I, F, R> x) noexcept +{ + // Rounding mode is assumed to be FE_TONEAREST + constexpr auto FRAC = B(1) << F; + auto value = x.raw_value(); + const bool is_half = std::abs(value % FRAC) == FRAC / 2; + value /= FRAC / 2; + value = (value / 2) + (value % 2); + value -= (value % 2) * is_half; + return fixed<B, I, F, R>::from_raw_value(value * FRAC); +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline fixed<B, I, F, R> rint(fixed<B, I, F, R> x) noexcept +{ + // Rounding mode is assumed to be FE_TONEAREST + return nearbyint(x); +} + +// +// Mathematical functions +// +template <typename B, typename I, unsigned int F, bool R> +constexpr inline fixed<B, I, F, R> abs(fixed<B, I, F, R> x) noexcept +{ + return (x >= fixed<B, I, F, R>{0}) ? x : -x; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline fixed<B, I, F, R> fmod(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept +{ + return + assert(y.raw_value() != 0), + fixed<B, I, F, R>::from_raw_value(x.raw_value() % y.raw_value()); +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline fixed<B, I, F, R> remainder(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept +{ + return + assert(y.raw_value() != 0), + x - nearbyint(x / y) * y; +} + +template <typename B, typename I, unsigned int F, bool R> +inline fixed<B, I, F, R> remquo(fixed<B, I, F, R> x, fixed<B, I, F, R> y, int* quo) noexcept +{ + assert(y.raw_value() != 0); + assert(quo != nullptr); + *quo = x.raw_value() / y.raw_value(); + return fixed<B, I, F, R>::from_raw_value(x.raw_value() % y.raw_value()); +} + +// +// Manipulation functions +// + +template <typename B, typename I, unsigned int F, bool R, typename C, typename J, unsigned int G, bool S> +constexpr inline fixed<B, I, F, R> copysign(fixed<B, I, F, R> x, fixed<C, J, G, S> y) noexcept +{ + return + x = abs(x), + (y >= fixed<C, J, G, S>{0}) ? x : -x; +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline fixed<B, I, F, R> nextafter(fixed<B, I, F, R> from, fixed<B, I, F, R> to) noexcept +{ + return from == to ? to : + to > from ? fixed<B, I, F, R>::from_raw_value(from.raw_value() + 1) + : fixed<B, I, F, R>::from_raw_value(from.raw_value() - 1); +} + +template <typename B, typename I, unsigned int F, bool R> +constexpr inline fixed<B, I, F, R> nexttoward(fixed<B, I, F, R> from, fixed<B, I, F, R> to) noexcept +{ + return nextafter(from, to); +} + +template <typename B, typename I, unsigned int F, bool R> +inline fixed<B, I, F, R> modf(fixed<B, I, F, R> x, fixed<B, I, F, R>* iptr) noexcept +{ + const auto raw = x.raw_value(); + constexpr auto FRAC = B{1} << F; + *iptr = fixed<B, I, F, R>::from_raw_value(raw / FRAC * FRAC); + return fixed<B, I, F, R>::from_raw_value(raw % FRAC); +} + + +// +// Power functions +// + +template <typename B, typename I, unsigned int F, bool R, typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr> +fixed<B, I, F, R> pow(fixed<B, I, F, R> base, T exp) noexcept +{ + using Fixed = fixed<B, I, F, R>; + + if (base == Fixed(0)) { + assert(exp > 0); + return Fixed(0); + } + + Fixed result {1}; + if (exp < 0) + { + for (Fixed intermediate = base; exp != 0; exp /= 2, intermediate *= intermediate) + { + if ((exp % 2) != 0) + { + result /= intermediate; + } + } + } + else + { + for (Fixed intermediate = base; exp != 0; exp /= 2, intermediate *= intermediate) + { + if ((exp % 2) != 0) + { + result *= intermediate; + } + } + } + return result; +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> pow(fixed<B, I, F, R> base, fixed<B, I, F, R> exp) noexcept +{ + using Fixed = fixed<B, I, F, R>; + + if (base == Fixed(0)) { + assert(exp > Fixed(0)); + return Fixed(0); + } + + if (exp < Fixed(0)) + { + return 1 / pow(base, -exp); + } + + constexpr auto FRAC = B(1) << F; + if (exp.raw_value() % FRAC == 0) + { + // Non-fractional exponents are easier to calculate + return pow(base, exp.raw_value() / FRAC); + } + + // For negative bases we do not support fractional exponents. + // Technically fractions with odd denominators could work, + // but that's too much work to figure out. + assert(base > Fixed(0)); + return exp2(log2(base) * exp); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> exp(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + if (x < Fixed(0)) { + return 1 / exp(-x); + } + constexpr auto FRAC = B(1) << F; + const B x_int = x.raw_value() / FRAC; + x -= x_int; + assert(x >= Fixed(0) && x < Fixed(1)); + + constexpr auto fA = Fixed::template from_fixed_point<63>( 128239257017632854ll); // 1.3903728105644451e-2 + constexpr auto fB = Fixed::template from_fixed_point<63>( 320978614890280666ll); // 3.4800571158543038e-2 + constexpr auto fC = Fixed::template from_fixed_point<63>(1571680799599592947ll); // 1.7040197373796334e-1 + constexpr auto fD = Fixed::template from_fixed_point<63>(4603349000587966862ll); // 4.9909609871464493e-1 + constexpr auto fE = Fixed::template from_fixed_point<62>(4612052447974689712ll); // 1.0000794567422495 + constexpr auto fF = Fixed::template from_fixed_point<63>(9223361618412247875ll); // 9.9999887043019773e-1 + return pow(Fixed::e(), x_int) * (((((fA * x + fB) * x + fC) * x + fD) * x + fE) * x + fF); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> exp2(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + if (x < Fixed(0)) { + return 1 / exp2(-x); + } + constexpr auto FRAC = B(1) << F; + const B x_int = x.raw_value() / FRAC; + x -= x_int; + assert(x >= Fixed(0) && x < Fixed(1)); + + constexpr auto fA = Fixed::template from_fixed_point<63>( 17491766697771214ll); // 1.8964611454333148e-3 + constexpr auto fB = Fixed::template from_fixed_point<63>( 82483038782406547ll); // 8.9428289841091295e-3 + constexpr auto fC = Fixed::template from_fixed_point<63>( 515275173969157690ll); // 5.5866246304520701e-2 + constexpr auto fD = Fixed::template from_fixed_point<63>(2214897896212987987ll); // 2.4013971109076949e-1 + constexpr auto fE = Fixed::template from_fixed_point<63>(6393224161192452326ll); // 6.9315475247516736e-1 + constexpr auto fF = Fixed::template from_fixed_point<63>(9223371050976163566ll); // 9.9999989311082668e-1 + return Fixed(1 << x_int) * (((((fA * x + fB) * x + fC) * x + fD) * x + fE) * x + fF); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> expm1(fixed<B, I, F, R> x) noexcept +{ + return exp(x) - 1; +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> log2(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + assert(x > Fixed(0)); + + // Normalize input to the [1:2] domain + B value = x.raw_value(); + const long highest = detail::find_highest_bit(value); + if (highest >= F) { + value >>= (highest - F); + } else { + value <<= (F - highest); + } + x = Fixed::from_raw_value(value); + assert(x >= Fixed(1) && x < Fixed(2)); + + constexpr auto fA = Fixed::template from_fixed_point<63>( 413886001457275979ll); // 4.4873610194131727e-2 + constexpr auto fB = Fixed::template from_fixed_point<63>(-3842121857793256941ll); // -4.1656368651734915e-1 + constexpr auto fC = Fixed::template from_fixed_point<62>( 7522345947206307744ll); // 1.6311487636297217 + constexpr auto fD = Fixed::template from_fixed_point<61>(-8187571043052183818ll); // -3.5507929249026341 + constexpr auto fE = Fixed::template from_fixed_point<60>( 5870342889289496598ll); // 5.0917108110420042 + constexpr auto fF = Fixed::template from_fixed_point<61>(-6457199832668582866ll); // -2.8003640347009253 + return Fixed(highest - F) + (((((fA * x + fB) * x + fC) * x + fD) * x + fE) * x + fF); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> log(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + return log2(x) / log2(Fixed::e()); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> log10(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + return log2(x) / log2(Fixed(10)); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> log1p(fixed<B, I, F, R> x) noexcept +{ + return log(1 + x); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> cbrt(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + + if (x == Fixed(0)) + { + return x; + } + if (x < Fixed(0)) + { + return -cbrt(-x); + } + assert(x >= Fixed(0)); + + // Finding the cube root of an integer, taken from Hacker's Delight, + // based on the square root algorithm. + + // We start at the greatest power of eight that's less than the argument. + int ofs = ((detail::find_highest_bit(x.raw_value()) + 2*F) / 3 * 3); + I num = I{x.raw_value()}; + I res = 0; + + const auto do_round = [&] + { + for (; ofs >= 0; ofs -= 3) + { + res += res; + const I val = (3*res*(res + 1) + 1) << ofs; + if (num >= val) + { + num -= val; + res++; + } + } + }; + + // We should shift by 2*F (since there are two multiplications), but that + // could overflow even the intermediate type, so we have to split the + // algorithm up in two rounds of F bits each. Each round will deplete + // 'num' digit by digit, so after a round we can shift it again. + num <<= F; + ofs -= F; + do_round(); + + num <<= F; + ofs += F; + do_round(); + + return Fixed::from_raw_value(static_cast<B>(res)); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> sqrt(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + + assert(x >= Fixed(0)); + if (x == Fixed(0)) + { + return x; + } + + // Finding the square root of an integer in base-2, from: + // https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Binary_numeral_system_.28base_2.29 + + // Shift by F first because it's fixed-point. + I num = I{x.raw_value()} << F; + I res = 0; + + // "bit" starts at the greatest power of four that's less than the argument. + for (I bit = I{1} << ((detail::find_highest_bit(x.raw_value()) + F) / 2 * 2); bit != 0; bit >>= 2) + { + const I val = res + bit; + res >>= 1; + if (num >= val) + { + num -= val; + res += bit; + } + } + + // Round the last digit up if necessary + if (num > res) + { + res++; + } + + return Fixed::from_raw_value(static_cast<B>(res)); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> hypot(fixed<B, I, F, R> x, fixed<B, I, F, R> y) noexcept +{ + assert(x != 0 || y != 0); + return sqrt(x*x + y*y); +} + +// +// Trigonometry functions +// + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> sin(fixed<B, I, F, R> x) noexcept +{ + // This sine uses a fifth-order curve-fitting approximation originally + // described by Jasper Vijn on coranac.com which has a worst-case + // relative error of 0.07% (over [-pi:pi]). + using Fixed = fixed<B, I, F, R>; + + // Turn x from [0..2*PI] domain into [0..4] domain + x = fmod(x, Fixed::two_pi()); + x = x / Fixed::half_pi(); + + // Take x modulo one rotation, so [-4..+4]. + if (x < Fixed(0)) { + x += Fixed(4); + } + + int sign = +1; + if (x > Fixed(2)) { + // Reduce domain to [0..2]. + sign = -1; + x -= Fixed(2); + } + + if (x > Fixed(1)) { + // Reduce domain to [0..1]. + x = Fixed(2) - x; + } + + const Fixed x2 = x*x; + return sign * x * (Fixed::pi() - x2*(Fixed::two_pi() - 5 - x2*(Fixed::pi() - 3)))/2; +} + +template <typename B, typename I, unsigned int F, bool R> +inline fixed<B, I, F, R> cos(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + if (x > Fixed(0)) { // Prevent an overflow due to the addition of π/2 + return sin(x - (Fixed::two_pi() - Fixed::half_pi())); + } else { + return sin(Fixed::half_pi() + x); + } +} + +template <typename B, typename I, unsigned int F, bool R> +inline fixed<B, I, F, R> tan(fixed<B, I, F, R> x) noexcept +{ + auto cx = cos(x); + + // Tangent goes to infinity at 90 and -90 degrees. + // We can't represent that with fixed-point maths. + assert(abs(cx).raw_value() > 1); + + return sin(x) / cx; +} + +namespace detail { + +// Calculates atan(x) assuming that x is in the range [0,1] +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> atan_sanitized(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + assert(x >= Fixed(0) && x <= Fixed(1)); + + constexpr auto fA = Fixed::template from_fixed_point<63>( 716203666280654660ll); // 0.0776509570923569 + constexpr auto fB = Fixed::template from_fixed_point<63>(-2651115102768076601ll); // -0.287434475393028 + constexpr auto fC = Fixed::template from_fixed_point<63>( 9178930894564541004ll); // 0.995181681698119 (PI/4 - A - B) + + const auto xx = x * x; + return ((fA*xx + fB)*xx + fC)*x; +} + +// Calculate atan(y / x), assuming x != 0. +// +// If x is very, very small, y/x can easily overflow the fixed-point range. +// If q = y/x and q > 1, atan(q) would calculate atan(1/q) as intermediate step +// anyway. We can shortcut that here and avoid the loss of information, thus +// improving the accuracy of atan(y/x) for very small x. +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> atan_div(fixed<B, I, F, R> y, fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + assert(x != Fixed(0)); + + // Make sure y and x are positive. + // If y / x is negative (when y or x, but not both, are negative), negate the result to + // keep the correct outcome. + if (y < Fixed(0)) { + if (x < Fixed(0)) { + return atan_div(-y, -x); + } + return -atan_div(-y, x); + } + if (x < Fixed(0)) { + return -atan_div(y, -x); + } + assert(y >= Fixed(0)); + assert(x > Fixed(0)); + + if (y > x) { + return Fixed::half_pi() - detail::atan_sanitized(x / y); + } + return detail::atan_sanitized(y / x); +} + +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> atan(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + if (x < Fixed(0)) + { + return -atan(-x); + } + + if (x > Fixed(1)) + { + return Fixed::half_pi() - detail::atan_sanitized(Fixed(1) / x); + } + + return detail::atan_sanitized(x); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> asin(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + assert(x >= Fixed(-1) && x <= Fixed(+1)); + + const auto yy = Fixed(1) - x * x; + if (yy == Fixed(0)) + { + return copysign(Fixed::half_pi(), x); + } + return detail::atan_div(x, sqrt(yy)); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> acos(fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + assert(x >= Fixed(-1) && x <= Fixed(+1)); + + if (x == Fixed(-1)) + { + return Fixed::pi(); + } + const auto yy = Fixed(1) - x * x; + return Fixed(2)*detail::atan_div(sqrt(yy), Fixed(1) + x); +} + +template <typename B, typename I, unsigned int F, bool R> +fixed<B, I, F, R> atan2(fixed<B, I, F, R> y, fixed<B, I, F, R> x) noexcept +{ + using Fixed = fixed<B, I, F, R>; + if (x == Fixed(0)) + { + assert(y != Fixed(0)); + return (y > Fixed(0)) ? Fixed::half_pi() : -Fixed::half_pi(); + } + + auto ret = detail::atan_div(y, x); + + if (x < Fixed(0)) + { + return (y >= Fixed(0)) ? ret + Fixed::pi() : ret - Fixed::pi(); + } + return ret; +} + +} + +#endif diff --git a/kiss/CHANGELOG b/kiss/CHANGELOG new file mode 100644 index 0000000..2dd3603 --- /dev/null +++ b/kiss/CHANGELOG @@ -0,0 +1,123 @@ +1.3.0 2012-07-18 + removed non-standard malloc.h from kiss_fft.h + + moved -lm to end of link line + + checked various return values + + converted python Numeric code to NumPy + + fixed test of int32_t on 64 bit OS + + added padding in a couple of places to allow SIMD alignment of structs + +1.2.9 2010-05-27 + threadsafe ( including OpenMP ) + + first edition of kissfft.hh the C++ template fft engine + +1.2.8 + Changed memory.h to string.h -- apparently more standard + + Added openmp extensions. This can have fairly linear speedups for larger FFT sizes. + +1.2.7 + Shrank the real-fft memory footprint. Thanks to Galen Seitz. + +1.2.6 (Nov 14, 2006) The "thanks to GenArts" release. + Added multi-dimensional real-optimized FFT, see tools/kiss_fftndr + Thanks go to GenArts, Inc. for sponsoring the development. + +1.2.5 (June 27, 2006) The "release for no good reason" release. + Changed some harmless code to make some compilers' warnings go away. + Added some more digits to pi -- why not. + Added kiss_fft_next_fast_size() function to help people decide how much to pad. + Changed multidimensional test from 8 dimensions to only 3 to avoid testing + problems with fixed point (sorry Buckaroo Banzai). + +1.2.4 (Oct 27, 2005) The "oops, inverse fixed point real fft was borked" release. + Fixed scaling bug for inverse fixed point real fft -- also fixed test code that should've been failing. + Thanks to Jean-Marc Valin for bug report. + + Use sys/types.h for more portable types than short,int,long => int16_t,int32_t,int64_t + If your system does not have these, you may need to define them -- but at least it breaks in a + loud and easily fixable way -- unlike silently using the wrong size type. + + Hopefully tools/psdpng.c is fixed -- thanks to Steve Kellog for pointing out the weirdness. + +1.2.3 (June 25, 2005) The "you want to use WHAT as a sample" release. + Added ability to use 32 bit fixed point samples -- requires a 64 bit intermediate result, a la 'long long' + + Added ability to do 4 FFTs in parallel by using SSE SIMD instructions. This is accomplished by + using the __m128 (vector of 4 floats) as kiss_fft_scalar. Define USE_SIMD to use this. + + I know, I know ... this is drifting a bit from the "kiss" principle, but the speed advantages + make it worth it for some. Also recent gcc makes it SOO easy to use vectors of 4 floats like a POD type. + +1.2.2 (May 6, 2005) The Matthew release + Replaced fixed point division with multiply&shift. Thanks to Jean-Marc Valin for + discussions regarding. Considerable speedup for fixed-point. + + Corrected overflow protection in real fft routines when using fixed point. + Finder's Credit goes to Robert Oschler of robodance for pointing me at the bug. + This also led to the CHECK_OVERFLOW_OP macro. + +1.2.1 (April 4, 2004) + compiles cleanly with just about every -W warning flag under the sun + + reorganized kiss_fft_state so it could be read-only/const. This may be useful for embedded systems + that are willing to predeclare twiddle factors, factorization. + + Fixed C_MUL,S_MUL on 16-bit platforms. + + tmpbuf will only be allocated if input & output buffers are same + scratchbuf will only be allocated for ffts that are not multiples of 2,3,5 + + NOTE: The tmpbuf,scratchbuf changes may require synchronization code for multi-threaded apps. + + +1.2 (Feb 23, 2004) + interface change -- cfg object is forward declaration of struct instead of void* + This maintains type saftey and lets the compiler warn/error about stupid mistakes. + (prompted by suggestion from Erik de Castro Lopo) + + small speed improvements + + added psdpng.c -- sample utility that will create png spectrum "waterfalls" from an input file + ( not terribly useful yet) + +1.1.1 (Feb 1, 2004 ) + minor bug fix -- only affects odd rank, in-place, multi-dimensional FFTs + +1.1 : (Jan 30,2004) + split sample_code/ into test/ and tools/ + + Removed 2-D fft and added N-D fft (arbitrary) + + modified fftutil.c to allow multi-d FFTs + + Modified core fft routine to allow an input stride via kiss_fft_stride() + (eased support of multi-D ffts) + + Added fast convolution filtering (FIR filtering using overlap-scrap method, with tail scrap) + + Add kfc.[ch]: the KISS FFT Cache. It takes care of allocs for you ( suggested by Oscar Lesta ). + +1.0.1 (Dec 15, 2003) + fixed bug that occurred when nfft==1. Thanks to Steven Johnson. + +1.0 : (Dec 14, 2003) + changed kiss_fft function from using a single buffer, to two buffers. + If the same buffer pointer is supplied for both in and out, kiss will + manage the buffer copies. + + added kiss_fft2d and kiss_fftr as separate source files (declarations in kiss_fft.h ) + +0.4 :(Nov 4,2003) optimized for radix 2,3,4,5 + +0.3 :(Oct 28, 2003) woops, version 2 didn't actually factor out any radices other than 2. + Thanks to Steven Johnson for finding this one. + +0.2 :(Oct 27, 2003) added mixed radix, only radix 2,4 optimized versions + +0.1 :(May 19 2003) initial release, radix 2 only diff --git a/kiss/COPYING b/kiss/COPYING new file mode 100644 index 0000000..6b4b622 --- /dev/null +++ b/kiss/COPYING @@ -0,0 +1,11 @@ +Copyright (c) 2003-2010 Mark Borgerding . All rights reserved. + +KISS FFT is provided under: + + SPDX-License-Identifier: BSD-3-Clause + +Being under the terms of the BSD 3-clause "New" or "Revised" License, +according with: + + LICENSES/BSD-3-Clause + diff --git a/kiss/README.md b/kiss/README.md new file mode 100644 index 0000000..1138a0c --- /dev/null +++ b/kiss/README.md @@ -0,0 +1,245 @@ +# KISS FFT [![Build Status](https://travis-ci.com/mborgerding/kissfft.svg?branch=master)](https://travis-ci.com/mborgerding/kissfft) + +KISS FFT - A mixed-radix Fast Fourier Transform based up on the principle, +"Keep It Simple, Stupid." + +There are many great fft libraries already around. Kiss FFT is not trying +to be better than any of them. It only attempts to be a reasonably efficient, +moderately useful FFT that can use fixed or floating data types and can be +incorporated into someone's C program in a few minutes with trivial licensing. + +## USAGE: + +The basic usage for 1-d complex FFT is: + +```c + #include "kiss_fft.h" + kiss_fft_cfg cfg = kiss_fft_alloc( nfft ,is_inverse_fft ,0,0 ); + while ... + + ... // put kth sample in cx_in[k].r and cx_in[k].i + + kiss_fft( cfg , cx_in , cx_out ); + + ... // transformed. DC is in cx_out[0].r and cx_out[0].i + + kiss_fft_free(cfg); +``` + - **Note**: frequency-domain data is stored from dc up to 2pi. + so cx_out[0] is the dc bin of the FFT + and cx_out[nfft/2] is the Nyquist bin (if exists) + +Declarations are in "kiss_fft.h", along with a brief description of the +functions you'll need to use. + +Code definitions for 1d complex FFTs are in kiss_fft.c. + +You can do other cool stuff with the extras you'll find in tools/ +> - multi-dimensional FFTs +> - real-optimized FFTs (returns the positive half-spectrum: + (nfft/2+1) complex frequency bins) +> - fast convolution FIR filtering (not available for fixed point) +> - spectrum image creation + +The core fft and most tools/ code can be compiled to use float, double, + Q15 short or Q31 samples. The default is float. + +## BUILDING: + +There are two functionally-equivalent build systems supported by kissfft: + + - Make (traditional Makefiles for Unix / Linux systems) + - CMake (more modern and feature-rich build system developed by Kitware) + +To build kissfft, the following build environment can be used: + + - GNU build environment with GCC, Clang and GNU Make or CMake (>= 3.6) + - Microsoft Visual C++ (MSVC) with CMake (>= 3.6) + +Additional libraries required to build and test kissfft include: + + - libpng for psdpng tool, + - libfftw3 to validate kissfft results against it, + - python 2/3 with Numpy to validate kissfft results against it. + - OpenMP supported by GCC, Clang or MSVC for multi-core FFT transformations + +Environments like Cygwin and MinGW can be highly likely used to build kissfft +targeting Windows platform, but no tests were performed to the date. + +Both Make and CMake builds are easily configurable: + + - `KISSFFT_DATATYPE=<datatype>` (for Make) or `-DKISSFFT_DATATYPE=<datatype>` + (for CMake) denote the principal datatype used by kissfft. It can be one + of the following: + + - float (default) + - double + - int16_t + - int32_t + - SIMD (requires SSE instruction set support on target CPU) + + - `KISSFFT_OPENMP=1` (for Make) or `-DKISSFFT_OPENMP=ON` (for CMake) builds kissfft + with OpenMP support. Please note that a supported compiler is required and this + option is turned off by default. + + - `KISSFFT_STATIC=1` (for Make) or `-DKISSFFT_STATIC=ON` (for CMake) instructs + the builder to create static library ('.lib' for Windows / '.a' for Unix or Linux). + By default, this option is turned off and the shared library is created + ('.dll' for Windows, '.so' for Linux or Unix, '.dylib' for Mac OSX) + + - `-DKISSFFT_TEST=OFF` (for CMake) disables building tests for kissfft. On Make, + building tests is done separately by 'make testall' or 'make testsingle', so + no specific setting is required. + + - `KISSFFT_TOOLS=0` (for Make) or `-DKISSFFT_TOOLS=OFF` (for CMake) builds kissfft + without command-line tools like 'fastconv'. By default the tools are built. + + - `KISSFFT_USE_ALLOCA=1` (for Make) or `-DKISSFFT_USE_ALLOCA=ON` (for CMake) + build kissfft with 'alloca' usage instead of 'malloc' / 'free'. + + - `PREFIX=/full/path/to/installation/prefix/directory` (for Make) or + `-DCMAKE_INSTALL_PREFIX=/full/path/to/installation/prefix/directory` (for CMake) + specifies the prefix directory to install kissfft into. + +For example, to build kissfft as a static library with 'int16_t' datatype and +OpenMP support using Make, run the command from kissfft source tree: + +``` +make KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 all +``` + +The same configuration for CMake is: + +``` +mkdir build && cd build +cmake -DKISSFFT_DATATYPE=int16_t -DKISSFFT_STATIC=ON -DKISSFFT_OPENMP=ON .. +make all +``` + +To specify '/tmp/1234' as installation prefix directory, run: + + +``` +make PREFIX=/tmp/1234 KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 install +``` + +or + +``` +mkdir build && cd build +cmake -DCMAKE_INSTALL_PREFIX=/tmp/1234 -DKISSFFT_DATATYPE=int16_t -DKISSFFT_STATIC=ON -DKISSFFT_OPENMP=ON .. +make all +make install +``` + +## TESTING: + +To validate the build configured as an example above, run the following command from +kissfft source tree: + +``` +make KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 testsingle +``` + +if using Make, or: + +``` +make test +``` + +if using CMake. + +To test all possible build configurations, please run an extended testsuite from +kissfft source tree: + +``` +sh test/kissfft-testsuite.sh +``` + +Please note that the extended testsuite takes around 20-40 minutes depending on device +it runs on. This testsuite is useful for reporting bugs or testing the pull requests. + +## BACKGROUND + +I started coding this because I couldn't find a fixed point FFT that didn't +use assembly code. I started with floating point numbers so I could get the +theory straight before working on fixed point issues. In the end, I had a +little bit of code that could be recompiled easily to do ffts with short, float +or double (other types should be easy too). + +Once I got my FFT working, I was curious about the speed compared to +a well respected and highly optimized fft library. I don't want to criticize +this great library, so let's call it FFT_BRANDX. +During this process, I learned: + +> 1. FFT_BRANDX has more than 100K lines of code. The core of kiss_fft is about 500 lines (cpx 1-d). +> 2. It took me an embarrassingly long time to get FFT_BRANDX working. +> 3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB (without optimizing for size). +> 4. FFT_BRANDX is roughly twice as fast as KISS FFT in default mode. + +It is wonderful that free, highly optimized libraries like FFT_BRANDX exist. +But such libraries carry a huge burden of complexity necessary to extract every +last bit of performance. + +**Sometimes simpler is better, even if it's not better.** + +## FREQUENTLY ASKED QUESTIONS: +> Q: Can I use kissfft in a project with a ___ license?</br> +> A: Yes. See LICENSE below. + +> Q: Why don't I get the output I expect?</br> +> A: The two most common causes of this are +> 1) scaling : is there a constant multiplier between what you got and what you want? +> 2) mixed build environment -- all code must be compiled with same preprocessor +> definitions for FIXED_POINT and kiss_fft_scalar + +> Q: Will you write/debug my code for me?</br> +> A: Probably not unless you pay me. I am happy to answer pointed and topical questions, but +> I may refer you to a book, a forum, or some other resource. + + +## PERFORMANCE + (on Athlon XP 2100+, with gcc 2.96, float data type) + +Kiss performed 10000 1024-pt cpx ffts in .63 s of cpu time. +For comparison, it took md5sum twice as long to process the same amount of data. +Transforming 5 minutes of CD quality audio takes less than a second (nfft=1024). + +**DO NOT:** +- use Kiss if you need the Fastest Fourier Transform in the World +- ask me to add features that will bloat the code + +## UNDER THE HOOD + +Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT. If you give it an input buffer +and output buffer that are the same, a temporary buffer will be created to hold the data. + +No static data is used. The core routines of kiss_fft are thread-safe (but not all of the tools directory).[ + +No scaling is done for the floating point version (for speed). +Scaling is done both ways for the fixed-point version (for overflow prevention). + +Optimized butterflies are used for factors 2,3,4, and 5. + +The real (i.e. not complex) optimization code only works for even length ffts. It does two half-length +FFTs in parallel (packed into real&imag), and then combines them via twiddling. The result is +nfft/2+1 complex frequency bins from DC to Nyquist. If you don't know what this means, search the web. + +The fast convolution filtering uses the overlap-scrap method, slightly +modified to put the scrap at the tail. + +## LICENSE + Revised BSD License, see COPYING for verbiage. + Basically, "free to use&change, give credit where due, no guarantees" + Note this license is compatible with GPL at one end of the spectrum and closed, commercial software at + the other end. See http://www.fsf.org/licensing/licenses + +## TODO + - Add real optimization for odd length FFTs + - Document/revisit the input/output fft scaling + - Make doc describing the overlap (tail) scrap fast convolution filtering in kiss_fastfir.c + - Test all the ./tools/ code with fixed point (kiss_fastfir.c doesn't work, maybe others) + +## AUTHOR + Mark Borgerding + Mark@Borgerding.net diff --git a/kiss/_kiss_fft_guts.h b/kiss/_kiss_fft_guts.h new file mode 100644 index 0000000..4bd8d1c --- /dev/null +++ b/kiss/_kiss_fft_guts.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +/* kiss_fft.h + defines kiss_fft_scalar as either short or a float type + and defines + typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ + +#ifndef _kiss_fft_guts_h +#define _kiss_fft_guts_h + +#include "kiss_fft.h" +#include "kiss_fft_log.h" +#include <limits.h> + +#define MAXFACTORS 32 +/* e.g. an fft of length 128 has 4 factors + as far as kissfft is concerned + 4*4*4*2 + */ + +struct kiss_fft_state{ + int nfft; + int inverse; + int factors[2*MAXFACTORS]; + kiss_fft_cpx twiddles[1]; +}; + +/* + Explanation of macros dealing with complex math: + + C_MUL(m,a,b) : m = a*b + C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise + C_SUB( res, a,b) : res = a - b + C_SUBFROM( res , a) : res -= a + C_ADDTO( res , a) : res += a + * */ +#ifdef FIXED_POINT +#include <stdint.h> +#if (FIXED_POINT==32) +# define FRACBITS 31 +# define SAMPPROD int64_t +#define SAMP_MAX INT32_MAX +#define SAMP_MIN INT32_MIN +#else +# define FRACBITS 15 +# define SAMPPROD int32_t +#define SAMP_MAX INT16_MAX +#define SAMP_MIN INT16_MIN +#endif + +#if defined(CHECK_OVERFLOW) +# define CHECK_OVERFLOW_OP(a,op,b) \ + if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \ + KISS_FFT_WARNING("overflow (%d " #op" %d) = %ld", (a),(b),(SAMPPROD)(a) op (SAMPPROD)(b)); } +#endif + + +# define smul(a,b) ( (SAMPPROD)(a)*(b) ) +# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS ) + +# define S_MUL(a,b) sround( smul(a,b) ) + +# define C_MUL(m,a,b) \ + do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \ + (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0) + +# define DIVSCALAR(x,k) \ + (x) = sround( smul( x, SAMP_MAX/k ) ) + +# define C_FIXDIV(c,div) \ + do { DIVSCALAR( (c).r , div); \ + DIVSCALAR( (c).i , div); }while (0) + +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r = sround( smul( (c).r , s ) ) ;\ + (c).i = sround( smul( (c).i , s ) ) ; }while(0) + +#else /* not FIXED_POINT*/ + +# define S_MUL(a,b) ( (a)*(b) ) +#define C_MUL(m,a,b) \ + do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ + (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) +# define C_FIXDIV(c,div) /* NOOP */ +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r *= (s);\ + (c).i *= (s); }while(0) +#endif + +#ifndef CHECK_OVERFLOW_OP +# define CHECK_OVERFLOW_OP(a,op,b) /* noop */ +#endif + +#define C_ADD( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,+,(b).r)\ + CHECK_OVERFLOW_OP((a).i,+,(b).i)\ + (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ + }while(0) +#define C_SUB( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,-,(b).r)\ + CHECK_OVERFLOW_OP((a).i,-,(b).i)\ + (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ + }while(0) +#define C_ADDTO( res , a)\ + do { \ + CHECK_OVERFLOW_OP((res).r,+,(a).r)\ + CHECK_OVERFLOW_OP((res).i,+,(a).i)\ + (res).r += (a).r; (res).i += (a).i;\ + }while(0) + +#define C_SUBFROM( res , a)\ + do {\ + CHECK_OVERFLOW_OP((res).r,-,(a).r)\ + CHECK_OVERFLOW_OP((res).i,-,(a).i)\ + (res).r -= (a).r; (res).i -= (a).i; \ + }while(0) + + +#ifdef FIXED_POINT +# define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase)) +# define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase)) +# define HALF_OF(x) ((x)>>1) +#elif defined(USE_SIMD) +# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) +# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) +# define HALF_OF(x) ((x)*_mm_set1_ps(.5)) +#else +# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) +# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) +# define HALF_OF(x) ((x)*((kiss_fft_scalar).5)) +#endif + +#define kf_cexp(x,phase) \ + do{ \ + (x)->r = KISS_FFT_COS(phase);\ + (x)->i = KISS_FFT_SIN(phase);\ + }while(0) + + +/* a debugging function */ +#define pcpx(c)\ + KISS_FFT_DEBUG("%g + %gi\n",(double)((c)->r),(double)((c)->i)) + + +#ifdef KISS_FFT_USE_ALLOCA +// define this to allow use of alloca instead of malloc for temporary buffers +// Temporary buffers are used in two case: +// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5 +// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform. +#include <alloca.h> +#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes) +#define KISS_FFT_TMP_FREE(ptr) +#else +#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes) +#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr) +#endif + +#endif /* _kiss_fft_guts_h */ + diff --git a/kiss/kfc.c b/kiss/kfc.c new file mode 100644 index 0000000..a405d9b --- /dev/null +++ b/kiss/kfc.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#include "kfc.h" + +typedef struct cached_fft *kfc_cfg; + +struct cached_fft +{ + int nfft; + int inverse; + kiss_fft_cfg cfg; + kfc_cfg next; +}; + +static kfc_cfg cache_root=NULL; +static int ncached=0; + +static kiss_fft_cfg find_cached_fft(int nfft,int inverse) +{ + size_t len; + kfc_cfg cur=cache_root; + kfc_cfg prev=NULL; + while ( cur ) { + if ( cur->nfft == nfft && inverse == cur->inverse ) + break;/*found the right node*/ + prev = cur; + cur = prev->next; + } + if (cur== NULL) { + /* no cached node found, need to create a new one*/ + kiss_fft_alloc(nfft,inverse,0,&len); +#ifdef USE_SIMD + int padding = (16-sizeof(struct cached_fft)) & 15; + // make sure the cfg aligns on a 16 byte boundary + len += padding; +#endif + cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len )); + if (cur == NULL) + return NULL; + cur->cfg = (kiss_fft_cfg)(cur+1); +#ifdef USE_SIMD + cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding); +#endif + kiss_fft_alloc(nfft,inverse,cur->cfg,&len); + cur->nfft=nfft; + cur->inverse=inverse; + cur->next = NULL; + if ( prev ) + prev->next = cur; + else + cache_root = cur; + ++ncached; + } + return cur->cfg; +} + +void kfc_cleanup(void) +{ + kfc_cfg cur=cache_root; + kfc_cfg next=NULL; + while (cur){ + next = cur->next; + free(cur); + cur=next; + } + ncached=0; + cache_root = NULL; +} +void kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout) +{ + kiss_fft( find_cached_fft(nfft,0),fin,fout ); +} + +void kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout) +{ + kiss_fft( find_cached_fft(nfft,1),fin,fout ); +} + +#ifdef KFC_TEST +static void check(int nc) +{ + if (ncached != nc) { + fprintf(stderr,"ncached should be %d,but it is %d\n",nc,ncached); + exit(1); + } +} + +int main(void) +{ + kiss_fft_cpx buf1[1024],buf2[1024]; + memset(buf1,0,sizeof(buf1)); + check(0); + kfc_fft(512,buf1,buf2); + check(1); + kfc_fft(512,buf1,buf2); + check(1); + kfc_ifft(512,buf1,buf2); + check(2); + kfc_cleanup(); + check(0); + return 0; +} +#endif diff --git a/kiss/kfc.h b/kiss/kfc.h new file mode 100644 index 0000000..d7d8c1b --- /dev/null +++ b/kiss/kfc.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KFC_H +#define KFC_H +#include "kiss_fft.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* +KFC -- Kiss FFT Cache + +Not needing to deal with kiss_fft_alloc and a config +object may be handy for a lot of programs. + +KFC uses the underlying KISS FFT functions, but caches the config object. +The first time kfc_fft or kfc_ifft for a given FFT size, the cfg +object is created for it. All subsequent calls use the cached +configuration object. + +NOTE: +You should probably not use this if your program will be using a lot +of various sizes of FFTs. There is a linear search through the +cached objects. If you are only using one or two FFT sizes, this +will be negligible. Otherwise, you may want to use another method +of managing the cfg objects. + + There is no automated cleanup of the cached objects. This could lead +to large memory usage in a program that uses a lot of *DIFFERENT* +sized FFTs. If you want to force all cached cfg objects to be freed, +call kfc_cleanup. + + */ + +/*forward complex FFT */ +void KISS_FFT_API kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout); +/*reverse complex FFT */ +void KISS_FFT_API kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout); + +/*free all cached objects*/ +void KISS_FFT_API kfc_cleanup(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/kiss/kiss_fft.c b/kiss/kiss_fft.c new file mode 100644 index 0000000..58c24a0 --- /dev/null +++ b/kiss/kiss_fft.c @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + + +#include "_kiss_fft_guts.h" +/* The guts header contains all the multiplication and addition macros that are defined for + fixed or floating point complex numbers. It also delares the kf_ internal functions. + */ + +static void kf_bfly2( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx * Fout2; + kiss_fft_cpx * tw1 = st->twiddles; + kiss_fft_cpx t; + Fout2 = Fout + m; + do{ + C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2); + + C_MUL (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + }while (--m); +} + +static void kf_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + const size_t m + ) +{ + kiss_fft_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + size_t k=m; + const size_t m2=2*m; + const size_t m3=3*m; + + + tw3 = tw2 = tw1 = st->twiddles; + + do { + C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4); + + C_MUL(scratch[0],Fout[m] , *tw1 ); + C_MUL(scratch[1],Fout[m2] , *tw2 ); + C_MUL(scratch[2],Fout[m3] , *tw3 ); + + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + if(st->inverse) { + Fout[m].r = scratch[5].r - scratch[4].i; + Fout[m].i = scratch[5].i + scratch[4].r; + Fout[m3].r = scratch[5].r + scratch[4].i; + Fout[m3].i = scratch[5].i - scratch[4].r; + }else{ + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + } + ++Fout; + }while(--k); +} + +static void kf_bfly3( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + size_t m + ) +{ + size_t k=m; + const size_t m2 = 2*m; + kiss_fft_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_fft_cpx epi3; + epi3 = st->twiddles[fstride*m]; + + tw1=tw2=st->twiddles; + + do{ + C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); + + C_MUL(scratch[1],Fout[m] , *tw1); + C_MUL(scratch[2],Fout[m2] , *tw2); + + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; + + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + + C_MULBYSCALAR( scratch[0] , epi3.i ); + + C_ADDTO(*Fout,scratch[3]); + + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; + + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; + + ++Fout; + }while(--k); +} + +static void kf_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int u; + kiss_fft_cpx scratch[13]; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx *tw; + kiss_fft_cpx ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=st->twiddles; + for ( u=0; u<m; ++u ) { + C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); + scratch[0] = *Fout0; + + C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); + C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); + C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); + C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); + + C_ADD( scratch[7],scratch[1],scratch[4]); + C_SUB( scratch[10],scratch[1],scratch[4]); + C_ADD( scratch[8],scratch[2],scratch[3]); + C_SUB( scratch[9],scratch[2],scratch[3]); + + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); + scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); + scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } +} + +/* perform the butterfly for one stage of a mixed radix FFT */ +static void kf_bfly_generic( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int p + ) +{ + int u,k,q1,q; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx t; + int Norig = st->nfft; + + kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p); + if (scratch == NULL){ + KISS_FFT_ERROR("Memory allocation failed."); + return; + } + + for ( u=0; u<m; ++u ) { + k=u; + for ( q1=0 ; q1<p ; ++q1 ) { + scratch[q1] = Fout[ k ]; + C_FIXDIV(scratch[q1],p); + k += m; + } + + k=u; + for ( q1=0 ; q1<p ; ++q1 ) { + int twidx=0; + Fout[ k ] = scratch[0]; + for (q=1;q<p;++q ) { + twidx += fstride * k; + if (twidx>=Norig) twidx-=Norig; + C_MUL(t,scratch[q] , twiddles[twidx] ); + C_ADDTO( Fout[ k ] ,t); + } + k += m; + } + } + KISS_FFT_TMP_FREE(scratch); +} + +static +void kf_work( + kiss_fft_cpx * Fout, + const kiss_fft_cpx * f, + const size_t fstride, + int in_stride, + int * factors, + const kiss_fft_cfg st + ) +{ + kiss_fft_cpx * Fout_beg=Fout; + const int p=*factors++; /* the radix */ + const int m=*factors++; /* stage's fft length/p */ + const kiss_fft_cpx * Fout_end = Fout + p*m; + +#ifdef _OPENMP + // use openmp extensions at the + // top-level (not recursive) + if (fstride==1 && p<=5 && m!=1) + { + int k; + + // execute the p different work units in different threads +# pragma omp parallel for + for (k=0;k<p;++k) + kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st); + // all threads have joined by this point + + switch (p) { + case 2: kf_bfly2(Fout,fstride,st,m); break; + case 3: kf_bfly3(Fout,fstride,st,m); break; + case 4: kf_bfly4(Fout,fstride,st,m); break; + case 5: kf_bfly5(Fout,fstride,st,m); break; + default: kf_bfly_generic(Fout,fstride,st,m,p); break; + } + return; + } +#endif + + if (m==1) { + do{ + *Fout = *f; + f += fstride*in_stride; + }while(++Fout != Fout_end ); + }else{ + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + kf_work( Fout , f, fstride*p, in_stride, factors,st); + f += fstride*in_stride; + }while( (Fout += m) != Fout_end ); + } + + Fout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: kf_bfly2(Fout,fstride,st,m); break; + case 3: kf_bfly3(Fout,fstride,st,m); break; + case 4: kf_bfly4(Fout,fstride,st,m); break; + case 5: kf_bfly5(Fout,fstride,st,m); break; + default: kf_bfly_generic(Fout,fstride,st,m,p); break; + } +} + +/* facbuf is populated by p1,m1,p2,m2, ... + where + p[i] * m[i] = m[i-1] + m0 = n */ +static +void kf_factor(int n,int * facbuf) +{ + int p=4; + double floor_sqrt; + floor_sqrt = floor( sqrt((double)n) ); + + /*factor out powers of 4, powers of 2, then any remaining primes */ + do { + while (n % p) { + switch (p) { + case 4: p = 2; break; + case 2: p = 3; break; + default: p += 2; break; + } + if (p > floor_sqrt) + p = n; /* no more factors, skip to end */ + } + n /= p; + *facbuf++ = p; + *facbuf++ = n; + } while (n > 1); +} + +/* + * + * User-callable function to allocate all necessary storage space for the fft. + * + * The return value is a contiguous block of memory, allocated with malloc. As such, + * It can be freed with free(), rather than a kiss_fft-specific function. + * */ +kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) +{ + KISS_FFT_ALIGN_CHECK(mem) + + kiss_fft_cfg st=NULL; + size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fft_state) + + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/ + + if ( lenmem==NULL ) { + st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded ); + }else{ + if (mem != NULL && *lenmem >= memneeded) + st = (kiss_fft_cfg)mem; + *lenmem = memneeded; + } + if (st) { + int i; + st->nfft=nfft; + st->inverse = inverse_fft; + + for (i=0;i<nfft;++i) { + const double pi=3.141592653589793238462643383279502884197169399375105820974944; + double phase = -2*pi*i / nfft; + if (st->inverse) + phase *= -1; + kf_cexp(st->twiddles+i, phase ); + } + + kf_factor(nfft,st->factors); + } + return st; +} + + +void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) +{ + if (fin == fout) { + //NOTE: this is not really an in-place FFT algorithm. + //It just performs an out-of-place FFT into a temp buffer + if (fout == NULL){ + KISS_FFT_ERROR("fout buffer NULL."); + return; + } + + kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft); + if (tmpbuf == NULL){ + KISS_FFT_ERROR("Memory allocation error."); + return; + } + + + + kf_work(tmpbuf,fin,1,in_stride, st->factors,st); + memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft); + KISS_FFT_TMP_FREE(tmpbuf); + }else{ + kf_work( fout, fin, 1,in_stride, st->factors,st ); + } +} + +void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + kiss_fft_stride(cfg,fin,fout,1); +} + + +void kiss_fft_cleanup(void) +{ + // nothing needed any more +} + +int kiss_fft_next_fast_size(int n) +{ + while(1) { + int m=n; + while ( (m%2) == 0 ) m/=2; + while ( (m%3) == 0 ) m/=3; + while ( (m%5) == 0 ) m/=5; + if (m<=1) + break; /* n is completely factorable by twos, threes, and fives */ + n++; + } + return n; +} diff --git a/kiss/kiss_fft.h b/kiss/kiss_fft.h new file mode 100644 index 0000000..dce1034 --- /dev/null +++ b/kiss/kiss_fft.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_FFT_H +#define KISS_FFT_H + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> + +// Define KISS_FFT_SHARED macro to properly export symbols +#ifdef KISS_FFT_SHARED +# ifdef _WIN32 +# ifdef KISS_FFT_BUILD +# define KISS_FFT_API __declspec(dllexport) +# else +# define KISS_FFT_API __declspec(dllimport) +# endif +# else +# define KISS_FFT_API __attribute__ ((visibility ("default"))) +# endif +#else +# define KISS_FFT_API +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + ATTENTION! + If you would like a : + -- a utility that will handle the caching of fft objects + -- real-only (no imaginary time component ) FFT + -- a multi-dimensional FFT + -- a command-line utility to perform ffts + -- a command-line utility to perform fast-convolution filtering + + Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c + in the tools/ directory. +*/ + +/* User may override KISS_FFT_MALLOC and/or KISS_FFT_FREE. */ +#ifdef USE_SIMD +# include <xmmintrin.h> +# define kiss_fft_scalar __m128 +# ifndef KISS_FFT_MALLOC +# define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16) +# define KISS_FFT_ALIGN_CHECK(ptr) +# define KISS_FFT_ALIGN_SIZE_UP(size) ((size + 15UL) & ~0xFUL) +# endif +# ifndef KISS_FFT_FREE +# define KISS_FFT_FREE _mm_free +# endif +#else +# define KISS_FFT_ALIGN_CHECK(ptr) +# define KISS_FFT_ALIGN_SIZE_UP(size) (size) +# ifndef KISS_FFT_MALLOC +# define KISS_FFT_MALLOC malloc +# endif +# ifndef KISS_FFT_FREE +# define KISS_FFT_FREE free +# endif +#endif + + +#ifdef FIXED_POINT +#include <stdint.h> +# if (FIXED_POINT == 32) +# define kiss_fft_scalar int32_t +# else +# define kiss_fft_scalar int16_t +# endif +#else +# ifndef kiss_fft_scalar +/* default is float */ +# define kiss_fft_scalar float +# endif +#endif + +typedef struct { + kiss_fft_scalar r; + kiss_fft_scalar i; +}kiss_fft_cpx; + +typedef struct kiss_fft_state* kiss_fft_cfg; + +/* + * kiss_fft_alloc + * + * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. + * + * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL); + * + * The return value from fft_alloc is a cfg buffer used internally + * by the fft routine or NULL. + * + * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc. + * The returned value should be free()d when done to avoid memory leaks. + * + * The state can be placed in a user supplied buffer 'mem': + * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, + * then the function places the cfg in mem and the size used in *lenmem + * and returns mem. + * + * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), + * then the function returns NULL and places the minimum cfg + * buffer size in *lenmem. + * */ + +kiss_fft_cfg KISS_FFT_API kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); + +/* + * kiss_fft(cfg,in_out_buf) + * + * Perform an FFT on a complex input buffer. + * for a forward FFT, + * fin should be f[0] , f[1] , ... ,f[nfft-1] + * fout will be F[0] , F[1] , ... ,F[nfft-1] + * Note that each element is complex and can be accessed like + f[k].r and f[k].i + * */ +void KISS_FFT_API kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); + +/* + A more generic version of the above function. It reads its input from every Nth sample. + * */ +void KISS_FFT_API kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride); + +/* If kiss_fft_alloc allocated a buffer, it is one contiguous + buffer and can be simply free()d when no longer needed*/ +#define kiss_fft_free KISS_FFT_FREE + +/* + Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up + your compiler output to call this before you exit. +*/ +void KISS_FFT_API kiss_fft_cleanup(void); + + +/* + * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) + */ +int KISS_FFT_API kiss_fft_next_fast_size(int n); + +/* for real ffts, we need an even size */ +#define kiss_fftr_next_fast_size_real(n) \ + (kiss_fft_next_fast_size( ((n)+1)>>1)<<1) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/kiss/kiss_fft_log.h b/kiss/kiss_fft_log.h new file mode 100644 index 0000000..b5b631a --- /dev/null +++ b/kiss/kiss_fft_log.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef kiss_fft_log_h +#define kiss_fft_log_h + +#define ERROR 1 +#define WARNING 2 +#define INFO 3 +#define DEBUG 4 + +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + +#if defined(NDEBUG) +# define KISS_FFT_LOG_MSG(severity, ...) ((void)0) +#else +# define KISS_FFT_LOG_MSG(severity, ...) \ + fprintf(stderr, "[" #severity "] " __FILE__ ":" TOSTRING(__LINE__) " "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n") +#endif + +#define KISS_FFT_ERROR(...) KISS_FFT_LOG_MSG(ERROR, __VA_ARGS__) +#define KISS_FFT_WARNING(...) KISS_FFT_LOG_MSG(WARNING, __VA_ARGS__) +#define KISS_FFT_INFO(...) KISS_FFT_LOG_MSG(INFO, __VA_ARGS__) +#define KISS_FFT_DEBUG(...) KISS_FFT_LOG_MSG(DEBUG, __VA_ARGS__) + + + +#endif /* kiss_fft_log_h */
\ No newline at end of file diff --git a/kiss/kiss_fftnd.c b/kiss/kiss_fftnd.c new file mode 100644 index 0000000..5d5b089 --- /dev/null +++ b/kiss/kiss_fftnd.c @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#include "kiss_fftnd.h" +#include "_kiss_fft_guts.h" + +struct kiss_fftnd_state{ + int dimprod; /* dimsum would be mighty tasty right now */ + int ndims; + int *dims; + kiss_fft_cfg *states; /* cfg states for each dimension */ + kiss_fft_cpx * tmpbuf; /*buffer capable of hold the entire input */ +}; + +kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem) +{ + KISS_FFT_ALIGN_CHECK(mem) + + kiss_fftnd_cfg st = NULL; + int i; + int dimprod=1; + size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state)); + char * ptr = NULL; + + for (i=0;i<ndims;++i) { + size_t sublen=0; + kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen); + memneeded += sublen; /* st->states[i] */ + dimprod *= dims[i]; + } + memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims);/* st->dims */ + memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims);/* st->states */ + memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod); /* st->tmpbuf */ + + if (lenmem == NULL) {/* allocate for the caller*/ + ptr = (char *) malloc (memneeded); + } else { /* initialize supplied buffer if big enough */ + if (*lenmem >= memneeded) + ptr = (char *) mem; + *lenmem = memneeded; /*tell caller how big struct is (or would be) */ + } + if (!ptr) + return NULL; /*malloc failed or buffer too small */ + + st = (kiss_fftnd_cfg) ptr; + st->dimprod = dimprod; + st->ndims = ndims; + ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state)); + + st->states = (kiss_fft_cfg *)ptr; + ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims); + + st->dims = (int*)ptr; + ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims); + + st->tmpbuf = (kiss_fft_cpx*)ptr; + ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod); + + for (i=0;i<ndims;++i) { + size_t len; + st->dims[i] = dims[i]; + kiss_fft_alloc (st->dims[i], inverse_fft, NULL, &len); + st->states[i] = kiss_fft_alloc (st->dims[i], inverse_fft, ptr,&len); + ptr += len; + } + /* +Hi there! + +If you're looking at this particular code, it probably means you've got a brain-dead bounds checker +that thinks the above code overwrites the end of the array. + +It doesn't. + +-- Mark + +P.S. +The below code might give you some warm fuzzies and help convince you. + */ + if ( ptr - (char*)st != (int)memneeded ) { + fprintf(stderr, + "################################################################################\n" + "Internal error! Memory allocation miscalculation\n" + "################################################################################\n" + ); + } + return st; +} + +/* + This works by tackling one dimension at a time. + + In effect, + Each stage starts out by reshaping the matrix into a DixSi 2d matrix. + A Di-sized fft is taken of each column, transposing the matrix as it goes. + +Here's a 3-d example: +Take a 2x3x4 matrix, laid out in memory as a contiguous buffer + [ [ [ a b c d ] [ e f g h ] [ i j k l ] ] + [ [ m n o p ] [ q r s t ] [ u v w x ] ] ] + +Stage 0 ( D=2): treat the buffer as a 2x12 matrix + [ [a b ... k l] + [m n ... w x] ] + + FFT each column with size 2. + Transpose the matrix at the same time using kiss_fft_stride. + + [ [ a+m a-m ] + [ b+n b-n] + ... + [ k+w k-w ] + [ l+x l-x ] ] + + Note fft([x y]) == [x+y x-y] + +Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix, + [ [ a+m a-m b+n b-n c+o c-o d+p d-p ] + [ e+q e-q f+r f-r g+s g-s h+t h-t ] + [ i+u i-u j+v j-v k+w k-w l+x l-x ] ] + + And perform FFTs (size=3) on each of the columns as above, transposing + the matrix as it goes. The output of stage 1 is + (Legend: ap = [ a+m e+q i+u ] + am = [ a-m e-q i-u ] ) + + [ [ sum(ap) fft(ap)[0] fft(ap)[1] ] + [ sum(am) fft(am)[0] fft(am)[1] ] + [ sum(bp) fft(bp)[0] fft(bp)[1] ] + [ sum(bm) fft(bm)[0] fft(bm)[1] ] + [ sum(cp) fft(cp)[0] fft(cp)[1] ] + [ sum(cm) fft(cm)[0] fft(cm)[1] ] + [ sum(dp) fft(dp)[0] fft(dp)[1] ] + [ sum(dm) fft(dm)[0] fft(dm)[1] ] ] + +Stage 2 ( D=4) treats this buffer as a 4*6 matrix, + [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ] + [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ] + [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ] + [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ] ] + + Then FFTs each column, transposing as it goes. + + The resulting matrix is the 3d FFT of the 2x3x4 input matrix. + + Note as a sanity check that the first element of the final + stage's output (DC term) is + sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] ) + , i.e. the summation of all 24 input elements. + +*/ +void kiss_fftnd(kiss_fftnd_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + int i,k; + const kiss_fft_cpx * bufin=fin; + kiss_fft_cpx * bufout; + + /*arrange it so the last bufout == fout*/ + if ( st->ndims & 1 ) { + bufout = fout; + if (fin==fout) { + memcpy( st->tmpbuf, fin, sizeof(kiss_fft_cpx) * st->dimprod ); + bufin = st->tmpbuf; + } + }else + bufout = st->tmpbuf; + + for ( k=0; k < st->ndims; ++k) { + int curdim = st->dims[k]; + int stride = st->dimprod / curdim; + + for ( i=0 ; i<stride ; ++i ) + kiss_fft_stride( st->states[k], bufin+i , bufout+i*curdim, stride ); + + /*toggle back and forth between the two buffers*/ + if (bufout == st->tmpbuf){ + bufout = fout; + bufin = st->tmpbuf; + }else{ + bufout = st->tmpbuf; + bufin = fout; + } + } +} diff --git a/kiss/kiss_fftnd.h b/kiss/kiss_fftnd.h new file mode 100644 index 0000000..956ba94 --- /dev/null +++ b/kiss/kiss_fftnd.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_FFTND_H +#define KISS_FFTND_H + +#include "kiss_fft.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct kiss_fftnd_state * kiss_fftnd_cfg; + +kiss_fftnd_cfg KISS_FFT_API kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem); +void KISS_FFT_API kiss_fftnd(kiss_fftnd_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/kiss/kiss_fftndr.c b/kiss/kiss_fftndr.c new file mode 100644 index 0000000..e979d03 --- /dev/null +++ b/kiss/kiss_fftndr.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#include "kiss_fftndr.h" +#include "_kiss_fft_guts.h" +#define MAX(x,y) ( ( (x)<(y) )?(y):(x) ) + +struct kiss_fftndr_state +{ + int dimReal; + int dimOther; + kiss_fftr_cfg cfg_r; + kiss_fftnd_cfg cfg_nd; + void * tmpbuf; +}; + +static int prod(const int *dims, int ndims) +{ + int x=1; + while (ndims--) + x *= *dims++; + return x; +} + +kiss_fftndr_cfg kiss_fftndr_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem) +{ + KISS_FFT_ALIGN_CHECK(mem) + + kiss_fftndr_cfg st = NULL; + size_t nr=0 , nd=0,ntmp=0; + int dimReal = dims[ndims-1]; + int dimOther = prod(dims,ndims-1); + size_t memneeded; + char * ptr = NULL; + + (void)kiss_fftr_alloc(dimReal,inverse_fft,NULL,&nr); + (void)kiss_fftnd_alloc(dims,ndims-1,inverse_fft,NULL,&nd); + ntmp = + MAX( 2*dimOther , dimReal+2) * sizeof(kiss_fft_scalar) // freq buffer for one pass + + dimOther*(dimReal+2) * sizeof(kiss_fft_scalar); // large enough to hold entire input in case of in-place + + memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof( struct kiss_fftndr_state )) + KISS_FFT_ALIGN_SIZE_UP(nr) + KISS_FFT_ALIGN_SIZE_UP(nd) + KISS_FFT_ALIGN_SIZE_UP(ntmp); + + if (lenmem==NULL) { + ptr = (char*) malloc(memneeded); + }else{ + if (*lenmem >= memneeded) + ptr = (char *)mem; + *lenmem = memneeded; + } + if (ptr==NULL) + return NULL; + + st = (kiss_fftndr_cfg) ptr; + memset( st , 0 , memneeded); + ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftndr_state)); + + st->dimReal = dimReal; + st->dimOther = dimOther; + st->cfg_r = kiss_fftr_alloc( dimReal,inverse_fft,ptr,&nr); + ptr += KISS_FFT_ALIGN_SIZE_UP(nr); + st->cfg_nd = kiss_fftnd_alloc(dims,ndims-1,inverse_fft, ptr,&nd); + ptr += KISS_FFT_ALIGN_SIZE_UP(nd); + st->tmpbuf = ptr; + + return st; +} + +void kiss_fftndr(kiss_fftndr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) +{ + int k1,k2; + int dimReal = st->dimReal; + int dimOther = st->dimOther; + int nrbins = dimReal/2+1; + + kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf; + kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther); + + // timedata is N0 x N1 x ... x Nk real + + // take a real chunk of data, fft it and place the output at correct intervals + for (k1=0;k1<dimOther;++k1) { + kiss_fftr( st->cfg_r, timedata + k1*dimReal , tmp1 ); // tmp1 now holds nrbins complex points + for (k2=0;k2<nrbins;++k2) + tmp2[ k2*dimOther+k1 ] = tmp1[k2]; + } + + for (k2=0;k2<nrbins;++k2) { + kiss_fftnd(st->cfg_nd, tmp2+k2*dimOther, tmp1); // tmp1 now holds dimOther complex points + for (k1=0;k1<dimOther;++k1) + freqdata[ k1*(nrbins) + k2] = tmp1[k1]; + } +} + +void kiss_fftndri(kiss_fftndr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) +{ + int k1,k2; + int dimReal = st->dimReal; + int dimOther = st->dimOther; + int nrbins = dimReal/2+1; + kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf; + kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther); + + for (k2=0;k2<nrbins;++k2) { + for (k1=0;k1<dimOther;++k1) + tmp1[k1] = freqdata[ k1*(nrbins) + k2 ]; + kiss_fftnd(st->cfg_nd, tmp1, tmp2+k2*dimOther); + } + + for (k1=0;k1<dimOther;++k1) { + for (k2=0;k2<nrbins;++k2) + tmp1[k2] = tmp2[ k2*dimOther+k1 ]; + kiss_fftri( st->cfg_r,tmp1,timedata + k1*dimReal); + } +} diff --git a/kiss/kiss_fftndr.h b/kiss/kiss_fftndr.h new file mode 100644 index 0000000..0d56a1f --- /dev/null +++ b/kiss/kiss_fftndr.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_NDR_H +#define KISS_NDR_H + +#include "kiss_fft.h" +#include "kiss_fftr.h" +#include "kiss_fftnd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct kiss_fftndr_state *kiss_fftndr_cfg; + + +kiss_fftndr_cfg KISS_FFT_API kiss_fftndr_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem); +/* + dims[0] must be even + + If you don't care to allocate space, use mem = lenmem = NULL +*/ + + +void KISS_FFT_API kiss_fftndr( + kiss_fftndr_cfg cfg, + const kiss_fft_scalar *timedata, + kiss_fft_cpx *freqdata); +/* + input timedata has dims[0] X dims[1] X ... X dims[ndims-1] scalar points + output freqdata has dims[0] X dims[1] X ... X dims[ndims-1]/2+1 complex points +*/ + +void KISS_FFT_API kiss_fftndri( + kiss_fftndr_cfg cfg, + const kiss_fft_cpx *freqdata, + kiss_fft_scalar *timedata); +/* + input and output dimensions are the exact opposite of kiss_fftndr +*/ + + +#define kiss_fftndr_free free + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/kiss/kiss_fftr.c b/kiss/kiss_fftr.c new file mode 100644 index 0000000..778a9a6 --- /dev/null +++ b/kiss/kiss_fftr.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#include "kiss_fftr.h" +#include "_kiss_fft_guts.h" + +struct kiss_fftr_state{ + kiss_fft_cfg substate; + kiss_fft_cpx * tmpbuf; + kiss_fft_cpx * super_twiddles; +#ifdef USE_SIMD + void * pad; +#endif +}; + +kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem) +{ + KISS_FFT_ALIGN_CHECK(mem) + + int i; + kiss_fftr_cfg st = NULL; + size_t subsize = 0, memneeded; + + if (nfft & 1) { + KISS_FFT_ERROR("Real FFT optimization must be even."); + return NULL; + } + nfft >>= 1; + + kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize); + memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2); + + if (lenmem == NULL) { + st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded); + } else { + if (*lenmem >= memneeded) + st = (kiss_fftr_cfg) mem; + *lenmem = memneeded; + } + if (!st) + return NULL; + + st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */ + st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize); + st->super_twiddles = st->tmpbuf + nfft; + kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); + + for (i = 0; i < nfft/2; ++i) { + double phase = + -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5); + if (inverse_fft) + phase *= -1; + kf_cexp (st->super_twiddles+i,phase); + } + return st; +} + +void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) +{ + /* input buffer timedata is stored row-wise */ + int k,ncfft; + kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc; + + if ( st->substate->inverse) { + KISS_FFT_ERROR("kiss fft usage error: improper alloc"); + return;/* The caller did not call the correct function */ + } + + ncfft = st->substate->nfft; + + /*perform the parallel fft of two real signals packed in real,imag*/ + kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf ); + /* The real part of the DC element of the frequency spectrum in st->tmpbuf + * contains the sum of the even-numbered elements of the input time sequence + * The imag part is the sum of the odd-numbered elements + * + * The sum of tdc.r and tdc.i is the sum of the input time sequence. + * yielding DC of input time sequence + * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... + * yielding Nyquist bin of input time sequence + */ + + tdc.r = st->tmpbuf[0].r; + tdc.i = st->tmpbuf[0].i; + C_FIXDIV(tdc,2); + CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); + CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); + freqdata[0].r = tdc.r + tdc.i; + freqdata[ncfft].r = tdc.r - tdc.i; +#ifdef USE_SIMD + freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); +#else + freqdata[ncfft].i = freqdata[0].i = 0; +#endif + + for ( k=1;k <= ncfft/2 ; ++k ) { + fpk = st->tmpbuf[k]; + fpnk.r = st->tmpbuf[ncfft-k].r; + fpnk.i = - st->tmpbuf[ncfft-k].i; + C_FIXDIV(fpk,2); + C_FIXDIV(fpnk,2); + + C_ADD( f1k, fpk , fpnk ); + C_SUB( f2k, fpk , fpnk ); + C_MUL( tw , f2k , st->super_twiddles[k-1]); + + freqdata[k].r = HALF_OF(f1k.r + tw.r); + freqdata[k].i = HALF_OF(f1k.i + tw.i); + freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); + freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); + } +} + +void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) +{ + /* input buffer timedata is stored row-wise */ + int k, ncfft; + + if (st->substate->inverse == 0) { + KISS_FFT_ERROR("kiss fft usage error: improper alloc"); + return;/* The caller did not call the correct function */ + } + + ncfft = st->substate->nfft; + + st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r; + st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r; + C_FIXDIV(st->tmpbuf[0],2); + + for (k = 1; k <= ncfft / 2; ++k) { + kiss_fft_cpx fk, fnkc, fek, fok, tmp; + fk = freqdata[k]; + fnkc.r = freqdata[ncfft - k].r; + fnkc.i = -freqdata[ncfft - k].i; + C_FIXDIV( fk , 2 ); + C_FIXDIV( fnkc , 2 ); + + C_ADD (fek, fk, fnkc); + C_SUB (tmp, fk, fnkc); + C_MUL (fok, tmp, st->super_twiddles[k-1]); + C_ADD (st->tmpbuf[k], fek, fok); + C_SUB (st->tmpbuf[ncfft - k], fek, fok); +#ifdef USE_SIMD + st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); +#else + st->tmpbuf[ncfft - k].i *= -1; +#endif + } + kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); +} diff --git a/kiss/kiss_fftr.h b/kiss/kiss_fftr.h new file mode 100644 index 0000000..7fd73d2 --- /dev/null +++ b/kiss/kiss_fftr.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_FTR_H +#define KISS_FTR_H + +#include "kiss_fft.h" +#ifdef __cplusplus +extern "C" { +#endif + + +/* + + Real optimized version can save about 45% cpu time vs. complex fft of a real seq. + + + + */ + +typedef struct kiss_fftr_state *kiss_fftr_cfg; + + +kiss_fftr_cfg KISS_FFT_API kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem); +/* + nfft must be even + + If you don't care to allocate space, use mem = lenmem = NULL +*/ + + +void KISS_FFT_API kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata); +/* + input timedata has nfft scalar points + output freqdata has nfft/2+1 complex points +*/ + +void KISS_FFT_API kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); +/* + input freqdata has nfft/2+1 complex points + output timedata has nfft scalar points +*/ + +#define kiss_fftr_free KISS_FFT_FREE + +#ifdef __cplusplus +} +#endif +#endif diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4 index 43087b2..8edf515 100644 --- a/m4/ax_cxx_compile_stdcxx.m4 +++ b/m4/ax_cxx_compile_stdcxx.m4 @@ -10,13 +10,13 @@ # # Check for baseline language coverage in the compiler for the specified # version of the C++ standard. If necessary, add switches to CXX and -# CXXCPP to enable support. VERSION may be '11' (for the C++11 standard) -# or '14' (for the C++14 standard). +# CXXCPP to enable support. VERSION may be '11', '14', '17', or '20' for +# the respective C++ standard version. # # The second argument, if specified, indicates whether you insist on an # extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. # -std=c++11). If neither is specified, you get whatever works, with -# preference for an extended mode. +# preference for no added switch, and then for an extended mode. # # The third argument, if specified 'mandatory' or if left unspecified, # indicates that baseline support for the specified C++ standard is @@ -35,13 +35,15 @@ # Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu> # Copyright (c) 2016, 2018 Krzesimir Nowak <qdlacz@gmail.com> # Copyright (c) 2019 Enji Cooper <yaneurabeya@gmail.com> +# Copyright (c) 2020 Jason Merrill <jason@redhat.com> +# Copyright (c) 2021 Jörn Heusipp <osmanx@problemloesungsmaschine.de> # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. -#serial 11 +#serial 18 dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro dnl (serial version number 13). @@ -50,6 +52,7 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl m4_if([$1], [11], [ax_cxx_compile_alternatives="11 0x"], [$1], [14], [ax_cxx_compile_alternatives="14 1y"], [$1], [17], [ax_cxx_compile_alternatives="17 1z"], + [$1], [20], [ax_cxx_compile_alternatives="20"], [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl m4_if([$2], [], [], [$2], [ext], [], @@ -62,6 +65,16 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl AC_LANG_PUSH([C++])dnl ac_success=no + m4_if([$2], [], [dnl + AC_CACHE_CHECK(whether $CXX supports C++$1 features by default, + ax_cv_cxx_compile_cxx$1, + [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], + [ax_cv_cxx_compile_cxx$1=yes], + [ax_cv_cxx_compile_cxx$1=no])]) + if test x$ax_cv_cxx_compile_cxx$1 = xyes; then + ac_success=yes + fi]) + m4_if([$2], [noext], [], [dnl if test x$ac_success = xno; then for alternative in ${ax_cxx_compile_alternatives}; do @@ -91,9 +104,18 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl dnl HP's aCC needs +std=c++11 according to: dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf dnl Cray's crayCC needs "-h std=c++11" + dnl MSVC needs -std:c++NN for C++17 and later (default is C++14) for alternative in ${ax_cxx_compile_alternatives}; do - for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do - cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) + for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}" MSVC; do + if test x"$switch" = xMSVC; then + dnl AS_TR_SH maps both `:` and `=` to `_` so -std:c++17 would collide + dnl with -std=c++17. We suffix the cache variable name with _MSVC to + dnl avoid this. + switch=-std:c++${alternative} + cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_${switch}_MSVC]) + else + cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) + fi AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, $cachevar, [ac_save_CXX="$CXX" @@ -140,7 +162,6 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11], _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 ) - dnl Test body for checking C++14 support m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14], @@ -148,12 +169,24 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14], _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 ) +dnl Test body for checking C++17 support + m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17], _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 _AX_CXX_COMPILE_STDCXX_testbody_new_in_17 ) +dnl Test body for checking C++20 support + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_20], + _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_17 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_20 +) + + dnl Tests for new features in C++11 m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[ @@ -165,7 +198,11 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[ #error "This is not a C++ compiler" -#elif __cplusplus < 201103L +// MSVC always sets __cplusplus to 199711L in older versions; newer versions +// only set it correctly if /Zc:__cplusplus is specified as well as a +// /std:c++NN switch: +// https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ +#elif __cplusplus < 201103L && !defined _MSC_VER #error "This is not a C++11 compiler" @@ -456,7 +493,7 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[ #error "This is not a C++ compiler" -#elif __cplusplus < 201402L +#elif __cplusplus < 201402L && !defined _MSC_VER #error "This is not a C++14 compiler" @@ -580,7 +617,7 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[ #error "This is not a C++ compiler" -#elif __cplusplus < 201703L +#elif __cplusplus < 201703L && !defined _MSC_VER #error "This is not a C++17 compiler" @@ -946,6 +983,36 @@ namespace cxx17 } // namespace cxx17 -#endif // __cplusplus < 201703L +#endif // __cplusplus < 201703L && !defined _MSC_VER + +]]) + + +dnl Tests for new features in C++20 + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_20], [[ + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif __cplusplus < 202002L && !defined _MSC_VER + +#error "This is not a C++20 compiler" + +#else + +#include <version> + +namespace cxx20 +{ + +// As C++20 supports feature test macros in the standard, there is no +// immediate need to actually test for feature availability on the +// Autoconf side. + +} // namespace cxx20 + +#endif // __cplusplus < 202002L && !defined _MSC_VER ]]) diff --git a/src/Buffer.h b/src/Buffer.h index af52e93..2c2a65e 100644 --- a/src/Buffer.h +++ b/src/Buffer.h @@ -33,9 +33,17 @@ #include <vector> #include <memory> +#include <complex> +#include "fpm/fixed.hpp" + +typedef std::complex<float> complexf; + +using fixed_16 = fpm::fixed<std::int16_t, std::int32_t, 14>; +typedef std::complex<fixed_16> complexfix; +typedef std::complex<fpm::fixed_16_16> complexfix_wide; /* Buffer is a container for a byte array, which is memory-aligned - * to 32 bytes for SSE performance. + * to 32 bytes for SIMD performance. * * The allocation/freeing of the data is handled internally. */ diff --git a/src/CicEqualizer.h b/src/CicEqualizer.h index 792da02..4510d0c 100644 --- a/src/CicEqualizer.h +++ b/src/CicEqualizer.h @@ -25,18 +25,10 @@ # include <config.h> #endif - #include "ModPlugin.h" #include <vector> #include <sys/types.h> -#include <complex> -#ifdef __SSE__ -# include <xmmintrin.h> -#endif - - -typedef std::complex<float> complexf; class CicEqualizer : public ModCodec { diff --git a/src/ConfigParser.cpp b/src/ConfigParser.cpp index fb2c1a1..c92a520 100644 --- a/src/ConfigParser.cpp +++ b/src/ConfigParser.cpp @@ -63,6 +63,27 @@ static GainMode parse_gainmode(const std::string &gainMode_setting) throw std::runtime_error("Configuration error"); } +static FFTEngine parse_fft_engine(const std::string &fft_engine_setting) +{ + string fft_engine_minuscule(fft_engine_setting); + std::transform(fft_engine_minuscule.begin(), fft_engine_minuscule.end(), + fft_engine_minuscule.begin(), ::tolower); + + if (fft_engine_minuscule == "fftw") { + return FFTEngine::FFTW; + } + else if (fft_engine_minuscule == "kiss") { + return FFTEngine::KISS; + } + else if (fft_engine_minuscule == "dexter") { + return FFTEngine::DEXTER; + } + + cerr << "Modulator fft_engine setting '" << fft_engine_setting << + "' not recognised." << endl; + throw std::runtime_error("Configuration error"); +} + static void parse_configfile( const std::string& configuration_file, mod_settings_t& mod_settings) @@ -156,6 +177,9 @@ static void parse_configfile( mod_settings.showProcessTime); // modulator parameters: + const string fft_engine_setting = pt.Get("modulator.fft_engine", "fftw"); + mod_settings.fftEngine = parse_fft_engine(fft_engine_setting); + const string gainMode_setting = pt.Get("modulator.gainmode", "var"); mod_settings.gainMode = parse_gainmode(gainMode_setting); mod_settings.gainmodeVariance = pt.GetReal("modulator.normalise_variance", diff --git a/src/ConfigParser.h b/src/ConfigParser.h index ae76dee..3bacfdd 100644 --- a/src/ConfigParser.h +++ b/src/ConfigParser.h @@ -36,6 +36,12 @@ #include "TII.h" #include "output/SDRDevice.h" +enum class FFTEngine { + FFTW, // floating point in software + KISS, // fixed-point in software + DEXTER // fixed-point in FPGA +}; + struct mod_settings_t { std::string startupCheck; @@ -51,6 +57,8 @@ struct mod_settings_t { bool useLimeOutput = false; bool useBladeRFOutput = false; + FFTEngine fftEngine = FFTEngine::FFTW; + size_t outputRate = 2048000; size_t clockRate = 0; unsigned dabMode = 1; diff --git a/src/DabMod.cpp b/src/DabMod.cpp index 3b072c1..7866818 100644 --- a/src/DabMod.cpp +++ b/src/DabMod.cpp @@ -31,10 +31,8 @@ #endif #include <memory> -#include <complex> #include <string> #include <iostream> -#include <iomanip> #include <cstdlib> #include <stdexcept> #include <cstdio> @@ -51,7 +49,6 @@ #include "Utils.h" #include "Log.h" #include "DabModulator.h" -#include "InputMemory.h" #include "OutputFile.h" #include "FormatConverter.h" #include "FrameMultiplexer.h" @@ -75,16 +72,16 @@ * samples can have peaks up to about 48000. The value of 50000 * should guarantee that with a digital gain of 1.0, UHD never clips * our samples. + * + * This only applies when fixed_point == false. */ static const float normalise_factor = 50000.0f; -//Empirical normalisation factors used to normalise the samples to amplitude 1. +// Empirical normalisation factors used to normalise the samples to amplitude 1. static const float normalise_factor_file_fix = 81000.0f; static const float normalise_factor_file_var = 46000.0f; static const float normalise_factor_file_max = 46000.0f; -typedef std::complex<float> complexf; - using namespace std; volatile sig_atomic_t running = 1; @@ -255,7 +252,11 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s) shared_ptr<ModOutput> output; if (s.useFileOutput) { - if (s.fileOutputFormat == "complexf") { + if (s.fftEngine != FFTEngine::FFTW) { + // Intentionally ignore fileOutputFormat, it is always sc16 + output = make_shared<OutputFile>(s.outputName, s.fileOutputShowMetadata); + } + else if (s.fileOutputFormat == "complexf") { output = make_shared<OutputFile>(s.outputName, s.fileOutputShowMetadata); } else if (s.fileOutputFormat == "complexf_normalised") { @@ -291,6 +292,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s) else if (s.useUHDOutput) { s.normalise = 1.0f / normalise_factor; s.sdr_device_config.sampleRate = s.outputRate; + s.sdr_device_config.fixedPoint = (s.fftEngine != FFTEngine::FFTW); auto uhddevice = make_shared<Output::UHD>(s.sdr_device_config); output = make_shared<Output::SDR>(s.sdr_device_config, uhddevice); rcs.enrol((Output::SDR*)output.get()); @@ -301,6 +303,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s) /* We normalise the same way as for the UHD output */ s.normalise = 1.0f / normalise_factor; s.sdr_device_config.sampleRate = s.outputRate; + if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("soapy fixed_point unsupported"); auto soapydevice = make_shared<Output::Soapy>(s.sdr_device_config); output = make_shared<Output::SDR>(s.sdr_device_config, soapydevice); rcs.enrol((Output::SDR*)output.get()); @@ -320,6 +323,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s) else if (s.useLimeOutput) { /* We normalise the same way as for the UHD output */ s.normalise = 1.0f / normalise_factor; + if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("limesdr fixed_point unsupported"); s.sdr_device_config.sampleRate = s.outputRate; auto limedevice = make_shared<Output::Lime>(s.sdr_device_config); output = make_shared<Output::SDR>(s.sdr_device_config, limedevice); @@ -330,6 +334,7 @@ static shared_ptr<ModOutput> prepare_output(mod_settings_t& s) else if (s.useBladeRFOutput) { /* We normalise specifically for the BladeRF output : range [-2048; 2047] */ s.normalise = 2047.0f / normalise_factor; + if (s.fftEngine != FFTEngine::FFTW) throw runtime_error("bladerf fixed_point unsupported"); s.sdr_device_config.sampleRate = s.outputRate; auto bladerfdevice = make_shared<Output::BladeRF>(s.sdr_device_config); output = make_shared<Output::SDR>(s.sdr_device_config, bladerfdevice); @@ -420,7 +425,8 @@ int launch_modulator(int argc, char* argv[]) ModulatorData m; rcs.enrol(&m); - { + // Neither KISS FFT used for fixedpoint nor the FFT Accelerator used for DEXTER need planning. + if (mod_settings.fftEngine == FFTEngine::FFTW) { // This is mostly useful on ARM systems where FFTW planning takes some time. If we do it here // it will be done before the modulator starts up etiLog.level(debug) << "Running FFTW planning..."; @@ -442,7 +448,14 @@ int launch_modulator(int argc, char* argv[]) } std::string output_format; - if (mod_settings.useFileOutput and + if (mod_settings.fftEngine == FFTEngine::KISS) { + output_format = ""; //fixed point is native sc16, no converter needed + } + else if (mod_settings.fftEngine == FFTEngine::DEXTER) { + output_format = "s16"; // FPGA FFT Engine outputs s32 + } + // else FFTW, i.e. floating point + else if (mod_settings.useFileOutput and (mod_settings.fileOutputFormat == "s8" or mod_settings.fileOutputFormat == "u8" or mod_settings.fileOutputFormat == "s16")) { diff --git a/src/DabModulator.cpp b/src/DabModulator.cpp index 4a29132..5f7aaf6 100644 --- a/src/DabModulator.cpp +++ b/src/DabModulator.cpp @@ -3,7 +3,7 @@ Her Majesty the Queen in Right of Canada (Communications Research Center Canada) - Copyright (C) 2023 + Copyright (C) 2024 Matthias P. Braendli, matthias.braendli@mpb.li http://opendigitalradio.org @@ -54,7 +54,6 @@ #include "SignalMultiplexer.h" #include "TII.h" #include "TimeInterleaver.h" -#include "TimestampDecoder.h" using namespace std; @@ -142,14 +141,15 @@ int DabModulator::process(Buffer* dataOut) auto cifMux = make_shared<FrameMultiplexer>(m_etiSource); auto cifPart = make_shared<BlockPartitioner>(mode); - auto cifMap = make_shared<QpskSymbolMapper>(m_nbCarriers); - auto cifRef = make_shared<PhaseReference>(mode); - auto cifFreq = make_shared<FrequencyInterleaver>(mode); - auto cifDiff = make_shared<DifferentialModulator>(m_nbCarriers); + const bool fixedPoint = m_settings.fftEngine != FFTEngine::FFTW; + auto cifMap = make_shared<QpskSymbolMapper>(m_nbCarriers, fixedPoint); + auto cifRef = make_shared<PhaseReference>(mode, fixedPoint); + auto cifFreq = make_shared<FrequencyInterleaver>(mode, fixedPoint); + auto cifDiff = make_shared<DifferentialModulator>(m_nbCarriers, fixedPoint); - auto cifNull = make_shared<NullSymbol>(m_nbCarriers); - auto cifSig = make_shared<SignalMultiplexer>( - (1 + m_nbSymbols) * m_nbCarriers * sizeof(complexf)); + auto cifNull = make_shared<NullSymbol>(m_nbCarriers, + fixedPoint ? sizeof(complexfix) : sizeof(complexf)); + auto cifSig = make_shared<SignalMultiplexer>(); // TODO this needs a review bool useCicEq = false; @@ -180,46 +180,79 @@ int DabModulator::process(Buffer* dataOut) try { tii = make_shared<TII>( m_settings.dabMode, - m_settings.tiiConfig); + m_settings.tiiConfig, + fixedPoint); rcs.enrol(tii.get()); - tiiRef = make_shared<PhaseReference>(mode); + tiiRef = make_shared<PhaseReference>(mode, fixedPoint); } catch (const TIIError& e) { etiLog.level(error) << "Could not initialise TII: " << e.what(); } - auto cifOfdm = make_shared<OfdmGenerator>( - (1 + m_nbSymbols), - m_nbCarriers, - m_spacing, - m_settings.enableCfr, - m_settings.cfrClip, - m_settings.cfrErrorClip); + shared_ptr<ModPlugin> cifOfdm; + + switch (m_settings.fftEngine) { + case FFTEngine::FFTW: + { + auto ofdm = make_shared<OfdmGeneratorCF32>( + (1 + m_nbSymbols), + m_nbCarriers, + m_spacing, + m_settings.enableCfr, + m_settings.cfrClip, + m_settings.cfrErrorClip); + rcs.enrol(ofdm.get()); + cifOfdm = ofdm; + } + break; + case FFTEngine::KISS: + cifOfdm = make_shared<OfdmGeneratorFixed>( + (1 + m_nbSymbols), + m_nbCarriers, + m_spacing); + break; + case FFTEngine::DEXTER: +#if defined(HAVE_DEXTER) + cifOfdm = make_shared<OfdmGeneratorDEXTER>( + (1 + m_nbSymbols), + m_nbCarriers, + m_spacing); +#else + throw std::runtime_error("Cannot use DEXTER fft engine without --enable-dexter"); +#endif + break; + } - rcs.enrol(cifOfdm.get()); + shared_ptr<GainControl> cifGain; - auto cifGain = make_shared<GainControl>( - m_spacing, - m_settings.gainMode, - m_settings.digitalgain, - m_settings.normalise, - m_settings.gainmodeVariance); + if (not fixedPoint) { + cifGain = make_shared<GainControl>( + m_spacing, + m_settings.gainMode, + m_settings.digitalgain, + m_settings.normalise, + m_settings.gainmodeVariance); - rcs.enrol(cifGain.get()); + rcs.enrol(cifGain.get()); + } auto cifGuard = make_shared<GuardIntervalInserter>( m_nbSymbols, m_spacing, m_nullSize, m_symSize, - m_settings.ofdmWindowOverlap); + m_settings.ofdmWindowOverlap, m_settings.fftEngine); rcs.enrol(cifGuard.get()); shared_ptr<FIRFilter> cifFilter; if (not m_settings.filterTapsFilename.empty()) { + if (fixedPoint) throw std::runtime_error("fixed point doesn't support fir filter"); + cifFilter = make_shared<FIRFilter>(m_settings.filterTapsFilename); rcs.enrol(cifFilter.get()); } shared_ptr<MemlessPoly> cifPoly; if (not m_settings.polyCoefFilename.empty()) { + if (fixedPoint) throw std::runtime_error("fixed point doesn't support predistortion"); + cifPoly = make_shared<MemlessPoly>(m_settings.polyCoefFilename, m_settings.polyNumThreads); rcs.enrol(cifPoly.get()); @@ -227,15 +260,21 @@ int DabModulator::process(Buffer* dataOut) shared_ptr<Resampler> cifRes; if (m_settings.outputRate != 2048000) { + if (fixedPoint) throw std::runtime_error("fixed point doesn't support resampler"); + cifRes = make_shared<Resampler>( 2048000, m_settings.outputRate, m_spacing); } - if (not m_format.empty()) { - m_formatConverter = make_shared<FormatConverter>(m_format); + if (m_settings.fftEngine == FFTEngine::FFTW and not m_format.empty()) { + m_formatConverter = make_shared<FormatConverter>(false, m_format); + } + else if (m_settings.fftEngine == FFTEngine::DEXTER) { + m_formatConverter = make_shared<FormatConverter>(true, m_format); } + // KISS is already in s16 m_output = make_shared<OutputMemory>(dataOut); diff --git a/src/DabModulator.h b/src/DabModulator.h index 093a782..82782cd 100644 --- a/src/DabModulator.h +++ b/src/DabModulator.h @@ -40,12 +40,8 @@ #include "EtiReader.h" #include "Flowgraph.h" #include "FormatConverter.h" -#include "GainControl.h" #include "OutputMemory.h" #include "RemoteControl.h" -#include "Log.h" -#include "TII.h" - class DabModulator : public ModInput, public ModMetadata, public RemoteControllable { diff --git a/src/DifferentialModulator.cpp b/src/DifferentialModulator.cpp index 97a7998..21b4c3e 100644 --- a/src/DifferentialModulator.cpp +++ b/src/DifferentialModulator.cpp @@ -22,17 +22,14 @@ #include "DifferentialModulator.h" #include "PcDebug.h" -#include <stdio.h> +#include <cstdio> #include <stdexcept> -#include <complex> -#include <string.h> +#include <cstring> -typedef std::complex<float> complexf; - - -DifferentialModulator::DifferentialModulator(size_t carriers) : +DifferentialModulator::DifferentialModulator(size_t carriers, bool fixedPoint) : ModMux(), - d_carriers(carriers) + m_carriers(carriers), + m_fixedPoint(fixedPoint) { PDEBUG("DifferentialModulator::DifferentialModulator(%zu)\n", carriers); @@ -42,10 +39,42 @@ DifferentialModulator::DifferentialModulator(size_t carriers) : DifferentialModulator::~DifferentialModulator() { PDEBUG("DifferentialModulator::~DifferentialModulator()\n"); - } +template<typename T> +void do_process(size_t carriers, const std::vector<Buffer*>& dataIn, Buffer* dataOut) +{ + size_t phaseSize = dataIn[0]->getLength() / sizeof(T); + size_t dataSize = dataIn[1]->getLength() / sizeof(T); + dataOut->setLength((phaseSize + dataSize) * sizeof(T)); + + const T* phase = reinterpret_cast<const T*>(dataIn[0]->getData()); + const T* in = reinterpret_cast<const T*>(dataIn[1]->getData()); + T* out = reinterpret_cast<T*>(dataOut->getData()); + + if (phaseSize != carriers) { + throw std::runtime_error( + "DifferentialModulator::process input phase size not valid!"); + } + if (dataSize % carriers != 0) { + throw std::runtime_error( + "DifferentialModulator::process input data size not valid!"); + } + + memcpy(dataOut->getData(), phase, phaseSize * sizeof(T)); + for (size_t i = 0; i < dataSize; i += carriers) { + for (size_t j = 0; j < carriers; j += 4) { + out[carriers + j] = out[j] * in[j]; + out[carriers + j + 1] = out[j + 1] * in[j + 1]; + out[carriers + j + 2] = out[j + 2] * in[j + 2]; + out[carriers + j + 3] = out[j + 3] * in[j + 3]; + } + in += carriers; + out += carriers; + } +} + // dataIn[0] -> phase reference // dataIn[1] -> data symbols int DifferentialModulator::process(std::vector<Buffer*> dataIn, Buffer* dataOut) @@ -67,33 +96,11 @@ int DifferentialModulator::process(std::vector<Buffer*> dataIn, Buffer* dataOut) "DifferentialModulator::process nb of input streams not 2!"); } - size_t phaseSize = dataIn[0]->getLength() / sizeof(complexf); - size_t dataSize = dataIn[1]->getLength() / sizeof(complexf); - dataOut->setLength((phaseSize + dataSize) * sizeof(complexf)); - - const complexf* phase = reinterpret_cast<const complexf*>(dataIn[0]->getData()); - const complexf* in = reinterpret_cast<const complexf*>(dataIn[1]->getData()); - complexf* out = reinterpret_cast<complexf*>(dataOut->getData()); - - if (phaseSize != d_carriers) { - throw std::runtime_error( - "DifferentialModulator::process input phase size not valid!"); - } - if (dataSize % d_carriers != 0) { - throw std::runtime_error( - "DifferentialModulator::process input data size not valid!"); + if (m_fixedPoint) { + do_process<complexfix>(m_carriers, dataIn, dataOut); } - - memcpy(dataOut->getData(), phase, phaseSize * sizeof(complexf)); - for (size_t i = 0; i < dataSize; i += d_carriers) { - for (size_t j = 0; j < d_carriers; j += 4) { - out[d_carriers + j] = out[j] * in[j]; - out[d_carriers + j + 1] = out[j + 1] * in[j + 1]; - out[d_carriers + j + 2] = out[j + 2] * in[j + 2]; - out[d_carriers + j + 3] = out[j + 3] * in[j + 3]; - } - in += d_carriers; - out += d_carriers; + else { + do_process<complexf>(m_carriers, dataIn, dataOut); } return dataOut->getLength(); diff --git a/src/DifferentialModulator.h b/src/DifferentialModulator.h index b26ea8b..9cc5081 100644 --- a/src/DifferentialModulator.h +++ b/src/DifferentialModulator.h @@ -35,7 +35,7 @@ class DifferentialModulator : public ModMux { public: - DifferentialModulator(size_t carriers); + DifferentialModulator(size_t carriers, bool fixedPoint); virtual ~DifferentialModulator(); DifferentialModulator(const DifferentialModulator&); DifferentialModulator& operator=(const DifferentialModulator&); @@ -45,6 +45,7 @@ public: const char* name() { return "DifferentialModulator"; } protected: - size_t d_carriers; + size_t m_carriers; + size_t m_fixedPoint; }; diff --git a/src/FIRFilter.h b/src/FIRFilter.h index a4effa1..2d8fba9 100644 --- a/src/FIRFilter.h +++ b/src/FIRFilter.h @@ -33,21 +33,14 @@ #include "RemoteControl.h" #include "ModPlugin.h" -#include "PcDebug.h" #include <sys/types.h> -#include <complex> -#include <thread> #include <vector> -#include <time.h> #include <cstdio> #include <string> -#include <memory> #define FIRFILTER_PIPELINE_DELAY 1 -typedef std::complex<float> complexf; - class FIRFilter : public PipelinedModCodec, public RemoteControllable { public: diff --git a/src/Flowgraph.cpp b/src/Flowgraph.cpp index 3d4cdcc..339e326 100644 --- a/src/Flowgraph.cpp +++ b/src/Flowgraph.cpp @@ -27,12 +27,10 @@ #include "Flowgraph.h" #include "PcDebug.h" #include "Log.h" -#include <string> #include <memory> #include <algorithm> #include <sstream> #include <sys/types.h> -#include <stdexcept> #include <assert.h> #include <sys/time.h> @@ -254,15 +252,15 @@ Flowgraph::~Flowgraph() char node_time_sz[1024] = {}; for (const auto &node : nodes) { - snprintf(node_time_sz, 1023, " %30s: %10lu us (%2.2f %%)\n", + snprintf(node_time_sz, 1023, " %30s: %10lld us (%2.2f %%)\n", node->plugin()->name(), - node->processTime(), + (long long)node->processTime(), node->processTime() * 100.0 / myProcessTime); ss << node_time_sz; } - snprintf(node_time_sz, 1023, " %30s: %10lu us (100.00 %%)\n", "total", - myProcessTime); + snprintf(node_time_sz, 1023, " %30s: %10lld us (100.00 %%)\n", "total", + (long long)myProcessTime); ss << node_time_sz; etiLog.level(debug) << ss.str(); diff --git a/src/FormatConverter.cpp b/src/FormatConverter.cpp index e8e76ed..517f26e 100644 --- a/src/FormatConverter.cpp +++ b/src/FormatConverter.cpp @@ -28,17 +28,37 @@ #include "FormatConverter.h" #include "PcDebug.h" +#include "Log.h" -#include <sys/types.h> -#include <string.h> #include <stdexcept> +#include <cstring> #include <assert.h> +#include <sys/types.h> +#if defined(__ARM_NEON) +#include <arm_neon.h> +#endif -FormatConverter::FormatConverter(const std::string& format) : +FormatConverter::FormatConverter(bool input_is_complexfix_wide, const std::string& format_out) : ModCodec(), - m_format(format) + m_input_complexfix_wide(input_is_complexfix_wide), + m_format_out(format_out) { } +FormatConverter::~FormatConverter() +{ + if ( +#if defined(__ARM_NEON) + not m_input_complexfix_wide +#else + true +#endif + ) { + etiLog.level(debug) << "FormatConverter: " << + m_num_clipped_samples.load() << " clipped"; + } +} + + /* Expect the input samples to be in the correct range for the required format */ int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut) { @@ -47,71 +67,113 @@ int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut) size_t num_clipped_samples = 0; - size_t sizeIn = dataIn->getLength() / sizeof(float); - float* in = reinterpret_cast<float*>(dataIn->getData()); + if (m_input_complexfix_wide) { + size_t sizeIn = dataIn->getLength() / sizeof(int32_t); + if (m_format_out == "s16") { + dataOut->setLength(sizeIn * sizeof(int16_t)); + const int32_t *in = reinterpret_cast<int32_t*>(dataIn->getData()); + int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData()); - if (m_format == "s16") { - dataOut->setLength(sizeIn * sizeof(int16_t)); - int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData()); + constexpr int shift = 6; - for (size_t i = 0; i < sizeIn; i++) { - if (in[i] < INT16_MIN) { - out[i] = INT16_MIN; - num_clipped_samples++; +#if defined(__ARM_NEON) + if (sizeIn % 4 != 0) { + throw std::logic_error("Unexpected length not multiple of 4"); } - else if (in[i] > INT16_MAX) { - out[i] = INT16_MAX; - num_clipped_samples++; + + for (size_t i = 0; i < sizeIn; i += 4) { + int32x4_t input_vec = vld1q_s32(&in[i]); + // Apply shift right, saturate on conversion to int16_t + int16x4_t output_vec = vqshrn_n_s32(input_vec, shift); + vst1_s16(&out[i], output_vec); } - else { - out[i] = in[i]; +#else + for (size_t i = 0; i < sizeIn; i++) { + const int32_t val = in[i] >> shift; + if (val < INT16_MIN) { + out[i] = INT16_MIN; + num_clipped_samples++; + } + else if (val > INT16_MAX) { + out[i] = INT16_MAX; + num_clipped_samples++; + } + else { + out[i] = val; + } } +#endif } - } - else if (m_format == "u8") { - dataOut->setLength(sizeIn * sizeof(int8_t)); - uint8_t* out = reinterpret_cast<uint8_t*>(dataOut->getData()); - - for (size_t i = 0; i < sizeIn; i++) { - const auto samp = in[i] + 128.0f; - if (samp < 0) { - out[i] = 0; - num_clipped_samples++; - } - else if (samp > UINT8_MAX) { - out[i] = UINT8_MAX; - num_clipped_samples++; - } - else { - out[i] = samp; - } - + else { + throw std::runtime_error("FormatConverter: Invalid fix format " + m_format_out); } } - else if (m_format == "s8") { - dataOut->setLength(sizeIn * sizeof(int8_t)); - int8_t* out = reinterpret_cast<int8_t*>(dataOut->getData()); - - for (size_t i = 0; i < sizeIn; i++) { - if (in[i] < INT8_MIN) { - out[i] = INT8_MIN; - num_clipped_samples++; + else { + size_t sizeIn = dataIn->getLength() / sizeof(float); + const float* in = reinterpret_cast<float*>(dataIn->getData()); + + if (m_format_out == "s16") { + dataOut->setLength(sizeIn * sizeof(int16_t)); + int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData()); + + for (size_t i = 0; i < sizeIn; i++) { + if (in[i] < INT16_MIN) { + out[i] = INT16_MIN; + num_clipped_samples++; + } + else if (in[i] > INT16_MAX) { + out[i] = INT16_MAX; + num_clipped_samples++; + } + else { + out[i] = in[i]; + } } - else if (in[i] > INT8_MAX) { - out[i] = INT8_MAX; - num_clipped_samples++; + } + else if (m_format_out == "u8") { + dataOut->setLength(sizeIn * sizeof(int8_t)); + uint8_t* out = reinterpret_cast<uint8_t*>(dataOut->getData()); + + for (size_t i = 0; i < sizeIn; i++) { + const auto samp = in[i] + 128.0f; + if (samp < 0) { + out[i] = 0; + num_clipped_samples++; + } + else if (samp > UINT8_MAX) { + out[i] = UINT8_MAX; + num_clipped_samples++; + } + else { + out[i] = samp; + } + } - else { - out[i] = in[i]; + } + else if (m_format_out == "s8") { + dataOut->setLength(sizeIn * sizeof(int8_t)); + int8_t* out = reinterpret_cast<int8_t*>(dataOut->getData()); + + for (size_t i = 0; i < sizeIn; i++) { + if (in[i] < INT8_MIN) { + out[i] = INT8_MIN; + num_clipped_samples++; + } + else if (in[i] > INT8_MAX) { + out[i] = INT8_MAX; + num_clipped_samples++; + } + else { + out[i] = in[i]; + } } } - } - else { - throw std::runtime_error("FormatConverter: Invalid format " + m_format); + else { + throw std::runtime_error("FormatConverter: Invalid format " + m_format_out); + } } m_num_clipped_samples.store(num_clipped_samples); - return dataOut->getLength(); } diff --git a/src/FormatConverter.h b/src/FormatConverter.h index 05511c0..1ed2283 100644 --- a/src/FormatConverter.h +++ b/src/FormatConverter.h @@ -33,18 +33,19 @@ #endif #include "ModPlugin.h" -#include <complex> #include <atomic> #include <string> -#include <cstdint> class FormatConverter : public ModCodec { public: static size_t get_format_size(const std::string& format); - // Allowed formats: s8, u8 and s16 - FormatConverter(const std::string& format); + // floating-point input allows output formats: s8, u8 and s16 + // complexfix_wide input allows output formats: s16 + // complexfix input is already in s16, and needs no converter + FormatConverter(bool input_is_complexfix_wide, const std::string& format_out); + virtual ~FormatConverter(); int process(Buffer* const dataIn, Buffer* dataOut); const char* name(); @@ -52,7 +53,8 @@ class FormatConverter : public ModCodec size_t get_num_clipped_samples() const; private: - std::string m_format; + bool m_input_complexfix_wide; + std::string m_format_out; std::atomic<size_t> m_num_clipped_samples = 0; }; diff --git a/src/FrameMultiplexer.cpp b/src/FrameMultiplexer.cpp index e893120..ebd8b76 100644 --- a/src/FrameMultiplexer.cpp +++ b/src/FrameMultiplexer.cpp @@ -25,17 +25,11 @@ */ #include "FrameMultiplexer.h" -#include "PcDebug.h" -#include <stdio.h> #include <string> -#include <stdexcept> -#include <complex> -#include <memory> -#include <assert.h> -#include <string.h> - -typedef std::complex<float> complexf; +#include <cstdio> +#include <cassert> +#include <cstring> FrameMultiplexer::FrameMultiplexer( const EtiSource& etiSource) : diff --git a/src/FrequencyInterleaver.cpp b/src/FrequencyInterleaver.cpp index e76d525..6f36dcb 100644 --- a/src/FrequencyInterleaver.cpp +++ b/src/FrequencyInterleaver.cpp @@ -22,17 +22,15 @@ #include "FrequencyInterleaver.h" #include "PcDebug.h" -#include <stdio.h> #include <stdexcept> #include <string> -#include <stdlib.h> -#include <complex> +#include <cstdio> +#include <cstdlib> -typedef std::complex<float> complexf; - -FrequencyInterleaver::FrequencyInterleaver(size_t mode) : - ModCodec() +FrequencyInterleaver::FrequencyInterleaver(size_t mode, bool fixedPoint) : + ModCodec(), + m_fixedPoint(fixedPoint) { PDEBUG("FrequencyInterleaver::FrequencyInterleaver(%zu) @ %p\n", mode, this); @@ -42,54 +40,53 @@ FrequencyInterleaver::FrequencyInterleaver(size_t mode) : size_t beta; switch (mode) { case 1: - d_carriers = 1536; + m_carriers = 1536; num = 2048; beta = 511; break; case 2: - d_carriers = 384; + m_carriers = 384; num = 512; beta = 127; break; case 3: - d_carriers = 192; + m_carriers = 192; num = 256; beta = 63; break; case 0: case 4: - d_carriers = 768; + m_carriers = 768; num = 1024; beta = 255; break; default: - PDEBUG("Carriers: %zu\n", (d_carriers >> 1) << 1); - throw std::runtime_error("FrequencyInterleaver::FrequencyInterleaver " - "nb of carriers invalid!"); - break; + PDEBUG("Carriers: %zu\n", (m_carriers >> 1) << 1); + throw std::runtime_error("FrequencyInterleaver: invalid dab mode"); } - const int ret = posix_memalign((void**)(&d_indexes), 16, d_carriers * sizeof(size_t)); + const int ret = posix_memalign((void**)(&m_indices), 16, m_carriers * sizeof(size_t)); if (ret != 0) { throw std::runtime_error("memory allocation failed: " + std::to_string(ret)); } - size_t* index = d_indexes; + size_t *index = m_indices; size_t perm = 0; PDEBUG("i: %4u, R: %4u\n", 0, 0); for (size_t j = 1; j < num; ++j) { perm = (alpha * perm + beta) & (num - 1); - if (perm >= ((num - d_carriers) / 2) - && perm <= (num - (num - d_carriers) / 2) + if (perm >= ((num - m_carriers) / 2) + && perm <= (num - (num - m_carriers) / 2) && perm != (num / 2)) { PDEBUG("i: %4zu, R: %4zu, d: %4zu, n: %4zu, k: %5zi, index: %zu\n", - j, perm, perm, index - d_indexes, perm - num / 2, + j, perm, perm, index - m_indices, perm - num / 2, perm > num / 2 ? perm - (1 + (num / 2)) - : perm + (d_carriers - (num / 2))); + : perm + (m_carriers - (num / 2))); *(index++) = perm > num / 2 ? - perm - (1 + (num / 2)) : perm + (d_carriers - (num / 2)); - } else { + perm - (1 + (num / 2)) : perm + (m_carriers - (num / 2)); + } + else { PDEBUG("i: %4zu, R: %4zu\n", j, perm); } } @@ -100,9 +97,33 @@ FrequencyInterleaver::~FrequencyInterleaver() { PDEBUG("FrequencyInterleaver::~FrequencyInterleaver() @ %p\n", this); - free(d_indexes); + free(m_indices); } +template<typename T> +void do_process(Buffer* const dataIn, Buffer* dataOut, + size_t carriers, const size_t * const indices) +{ + const T* in = reinterpret_cast<const T*>(dataIn->getData()); + T* out = reinterpret_cast<T*>(dataOut->getData()); + size_t sizeIn = dataIn->getLength() / sizeof(T); + + if (sizeIn % carriers != 0) { + throw std::runtime_error( + "FrequencyInterleaver::process input size not valid!"); + } + + for (size_t i = 0; i < sizeIn;) { +// memset(out, 0, m_carriers * sizeof(T)); + for (size_t j = 0; j < carriers; i += 4, j += 4) { + out[indices[j]] = in[i]; + out[indices[j + 1]] = in[i + 1]; + out[indices[j + 2]] = in[i + 2]; + out[indices[j + 3]] = in[i + 3]; + } + out += carriers; + } +} int FrequencyInterleaver::process(Buffer* const dataIn, Buffer* dataOut) { @@ -112,24 +133,11 @@ int FrequencyInterleaver::process(Buffer* const dataIn, Buffer* dataOut) dataOut->setLength(dataIn->getLength()); - const complexf* in = reinterpret_cast<const complexf*>(dataIn->getData()); - complexf* out = reinterpret_cast<complexf*>(dataOut->getData()); - size_t sizeIn = dataIn->getLength() / sizeof(complexf); - - if (sizeIn % d_carriers != 0) { - throw std::runtime_error( - "FrequencyInterleaver::process input size not valid!"); + if (m_fixedPoint) { + do_process<complexfix>(dataIn, dataOut, m_carriers, m_indices); } - - for (size_t i = 0; i < sizeIn;) { -// memset(out, 0, d_carriers * sizeof(complexf)); - for (size_t j = 0; j < d_carriers; i += 4, j += 4) { - out[d_indexes[j]] = in[i]; - out[d_indexes[j + 1]] = in[i + 1]; - out[d_indexes[j + 2]] = in[i + 2]; - out[d_indexes[j + 3]] = in[i + 3]; - } - out += d_carriers; + else { + do_process<complexf>(dataIn, dataOut, m_carriers, m_indices); } return 1; diff --git a/src/FrequencyInterleaver.h b/src/FrequencyInterleaver.h index 43ca21a..b31b968 100644 --- a/src/FrequencyInterleaver.h +++ b/src/FrequencyInterleaver.h @@ -25,16 +25,14 @@ # include <config.h> #endif - #include "ModPlugin.h" #include <sys/types.h> - class FrequencyInterleaver : public ModCodec { public: - FrequencyInterleaver(size_t mode); + FrequencyInterleaver(size_t mode, bool fixedPoint); virtual ~FrequencyInterleaver(); FrequencyInterleaver(const FrequencyInterleaver&) = delete; FrequencyInterleaver& operator=(const FrequencyInterleaver&) = delete; @@ -43,7 +41,8 @@ public: const char* name() override { return "FrequencyInterleaver"; } protected: - size_t d_carriers; - size_t* d_indexes; + bool m_fixedPoint; + size_t m_carriers; + size_t *m_indices; }; diff --git a/src/GainControl.h b/src/GainControl.h index 04f6b58..d40a7d7 100644 --- a/src/GainControl.h +++ b/src/GainControl.h @@ -35,7 +35,6 @@ #include "RemoteControl.h" #include <sys/types.h> -#include <complex> #include <string> #include <mutex> @@ -43,9 +42,6 @@ # include <xmmintrin.h> #endif - -typedef std::complex<float> complexf; - enum class GainMode { GAIN_FIX = 0, GAIN_MAX = 1, GAIN_VAR = 2 }; class GainControl : public PipelinedModCodec, public RemoteControllable diff --git a/src/GuardIntervalInserter.cpp b/src/GuardIntervalInserter.cpp index 3c2db14..26d4fd1 100644 --- a/src/GuardIntervalInserter.cpp +++ b/src/GuardIntervalInserter.cpp @@ -29,39 +29,47 @@ #include <cstring> #include <cassert> #include <stdexcept> -#include <complex> #include <mutex> -typedef std::complex<float> complexf; +GuardIntervalInserter::Params::Params( + size_t nbSymbols, + size_t spacing, + size_t nullSize, + size_t symSize, + size_t& windowOverlap) : + nbSymbols(nbSymbols), + spacing(spacing), + nullSize(nullSize), + symSize(symSize), + windowOverlap(windowOverlap) {} GuardIntervalInserter::GuardIntervalInserter( size_t nbSymbols, size_t spacing, size_t nullSize, size_t symSize, - size_t& windowOverlap) : + size_t& windowOverlap, + FFTEngine fftEngine) : ModCodec(), RemoteControllable("guardinterval"), - d_nbSymbols(nbSymbols), - d_spacing(spacing), - d_nullSize(nullSize), - d_symSize(symSize), - d_windowOverlap(windowOverlap) + m_fftEngine(fftEngine), + m_params(nbSymbols, spacing, nullSize, symSize, windowOverlap) { - if (d_nullSize == 0) { + if (nullSize == 0) { throw std::logic_error("NULL symbol must be present"); } + RC_ADD_PARAMETER(windowlen, "Window length for OFDM windowng [0 to disable]"); /* We use a raised-cosine window for the OFDM windowing. - * Each symbol is extended on both sides by d_windowOverlap samples. + * Each symbol is extended on both sides by windowOverlap samples. * * * Sym n |####################| * Sym n+1 |####################| * - * We now extend the symbols by d_windowOverlap (one dash) + * We now extend the symbols by windowOverlap (one dash) * * Sym n extended -|####################|- * Sym n+1 extended -|####################|- @@ -75,7 +83,7 @@ GuardIntervalInserter::GuardIntervalInserter( * / \ * ... ________________/ \__ ... * - * The window length is 2*d_windowOverlap. + * The window length is 2*windowOverlap. */ update_window(windowOverlap); @@ -87,44 +95,43 @@ GuardIntervalInserter::GuardIntervalInserter( void GuardIntervalInserter::update_window(size_t new_window_overlap) { - std::lock_guard<std::mutex> lock(d_windowMutex); + std::lock_guard<std::mutex> lock(m_params.windowMutex); - d_windowOverlap = new_window_overlap; + m_params.windowOverlap = new_window_overlap; - // d_window only contains the rising window edge. - d_window.resize(2*d_windowOverlap); - for (size_t i = 0; i < 2*d_windowOverlap; i++) { - d_window[i] = (float)(0.5 * (1.0 - cos(M_PI * i / (2*d_windowOverlap - 1)))); + // m_params.window only contains the rising window edge. + m_params.window.resize(2*m_params.windowOverlap); + for (size_t i = 0; i < 2*m_params.windowOverlap; i++) { + m_params.window[i] = (float)(0.5 * (1.0 - cos(M_PI * i / (2*m_params.windowOverlap - 1)))); } } -int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut) +template<typename T> +int do_process(const GuardIntervalInserter::Params& p, Buffer* const dataIn, Buffer* dataOut) { - PDEBUG("GuardIntervalInserter::process(dataIn: %p, dataOut: %p)\n", + PDEBUG("GuardIntervalInserter do_process(dataIn: %p, dataOut: %p)\n", dataIn, dataOut); - std::lock_guard<std::mutex> lock(d_windowMutex); - - // Every symbol overlaps over a length of d_windowOverlap with + // Every symbol overlaps over a length of windowOverlap with // the previous symbol, and with the next symbol. First symbol // receives no prefix window, because we don't remember the // last symbol from the previous TF (yet). Last symbol also // receives no suffix window, for the same reason. // Overall output buffer length must stay independent of the windowing. - dataOut->setLength((d_nullSize + (d_nbSymbols * d_symSize)) * sizeof(complexf)); + dataOut->setLength((p.nullSize + (p.nbSymbols * p.symSize)) * sizeof(T)); - const complexf* in = reinterpret_cast<const complexf*>(dataIn->getData()); - complexf* out = reinterpret_cast<complexf*>(dataOut->getData()); - size_t sizeIn = dataIn->getLength() / sizeof(complexf); + const T* in = reinterpret_cast<const T*>(dataIn->getData()); + T* out = reinterpret_cast<T*>(dataOut->getData()); + size_t sizeIn = dataIn->getLength() / sizeof(T); - const size_t num_symbols = d_nbSymbols + 1; - if (sizeIn != num_symbols * d_spacing) + const size_t num_symbols = p.nbSymbols + 1; + if (sizeIn != num_symbols * p.spacing) { - PDEBUG("Nb symbols: %zu\n", d_nbSymbols); - PDEBUG("Spacing: %zu\n", d_spacing); - PDEBUG("Null size: %zu\n", d_nullSize); - PDEBUG("Sym size: %zu\n", d_symSize); - PDEBUG("\n%zu != %zu\n", sizeIn, (d_nbSymbols + 1) * d_spacing); + PDEBUG("Nb symbols: %zu\n", p.nbSymbols); + PDEBUG("Spacing: %zu\n", p.spacing); + PDEBUG("Null size: %zu\n", p.nullSize); + PDEBUG("Sym size: %zu\n", p.symSize); + PDEBUG("\n%zu != %zu\n", sizeIn, (p.nbSymbols + 1) * p.spacing); throw std::runtime_error( "GuardIntervalInserter::process input size not valid!"); } @@ -132,139 +139,162 @@ int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut) // TODO remember the end of the last TF so that we can do some // windowing too. - if (d_windowOverlap) { - { - // Handle Null symbol separately because it is longer - const size_t prefixlength = d_nullSize - d_spacing; - - // end = spacing - memcpy(out, &in[d_spacing - prefixlength], - prefixlength * sizeof(complexf)); - - memcpy(&out[prefixlength], in, (d_spacing - d_windowOverlap) * sizeof(complexf)); + std::lock_guard<std::mutex> lock(p.windowMutex); + if (p.windowOverlap) { + if constexpr (std::is_same_v<complexf, T>) { + { + // Handle Null symbol separately because it is longer + const size_t prefixlength = p.nullSize - p.spacing; + + // end = spacing + memcpy(out, &in[p.spacing - prefixlength], + prefixlength * sizeof(T)); + + memcpy(&out[prefixlength], in, (p.spacing - p.windowOverlap) * sizeof(T)); + + // The remaining part of the symbol must have half of the window applied, + // sloping down from 1 to 0.5 + for (size_t i = 0; i < p.windowOverlap; i++) { + const size_t out_ix = prefixlength + p.spacing - p.windowOverlap + i; + const size_t in_ix = p.spacing - p.windowOverlap + i; + out[out_ix] = in[in_ix] * p.window[2*p.windowOverlap - (i+1)]; + } - // The remaining part of the symbol must have half of the window applied, - // sloping down from 1 to 0.5 - for (size_t i = 0; i < d_windowOverlap; i++) { - const size_t out_ix = prefixlength + d_spacing - d_windowOverlap + i; - const size_t in_ix = d_spacing - d_windowOverlap + i; - out[out_ix] = in[in_ix] * d_window[2*d_windowOverlap - (i+1)]; - } + // Suffix is taken from the beginning of the symbol, and sees the other + // half of the window applied. + for (size_t i = 0; i < p.windowOverlap; i++) { + const size_t out_ix = prefixlength + p.spacing + i; + out[out_ix] = in[i] * p.window[p.windowOverlap - (i+1)]; + } - // Suffix is taken from the beginning of the symbol, and sees the other - // half of the window applied. - for (size_t i = 0; i < d_windowOverlap; i++) { - const size_t out_ix = prefixlength + d_spacing + i; - out[out_ix] = in[i] * d_window[d_windowOverlap - (i+1)]; + in += p.spacing; + out += p.nullSize; + // out is now pointing to the proper end of symbol. There are + // windowOverlap samples ahead that were already written. } - in += d_spacing; - out += d_nullSize; - // out is now pointing to the proper end of symbol. There are - // d_windowOverlap samples ahead that were already written. - } - - // Data symbols - for (size_t sym_ix = 0; sym_ix < d_nbSymbols; sym_ix++) { - /* _ix variables are indices into in[], _ox variables are - * indices for out[] */ - const ssize_t start_rise_ox = -d_windowOverlap; - const size_t start_rise_ix = 2 * d_spacing - d_symSize - d_windowOverlap; - /* - const size_t start_real_symbol_ox = 0; - const size_t start_real_symbol_ix = 2 * d_spacing - d_symSize; - */ - const ssize_t end_rise_ox = d_windowOverlap; - const size_t end_rise_ix = 2 * d_spacing - d_symSize + d_windowOverlap; - const ssize_t end_cyclic_prefix_ox = d_symSize - d_spacing; - /* end_cyclic_prefix_ix = end of symbol - const size_t begin_fall_ox = d_symSize - d_windowOverlap; - const size_t begin_fall_ix = d_spacing - d_windowOverlap; - const size_t end_real_symbol_ox = d_symSize; - end_real_symbol_ix = end of symbol - const size_t end_fall_ox = d_symSize + d_windowOverlap; - const size_t end_fall_ix = d_spacing + d_windowOverlap; - */ - - ssize_t ox = start_rise_ox; - size_t ix = start_rise_ix; - - for (size_t i = 0; ix < end_rise_ix; i++) { - out[ox] += in[ix] * d_window.at(i); - ix++; - ox++; - } - assert(ox == end_rise_ox); - - const size_t remaining_prefix_length = end_cyclic_prefix_ox - end_rise_ox; - memcpy( &out[ox], &in[ix], - remaining_prefix_length * sizeof(complexf)); - ox += remaining_prefix_length; - assert(ox == end_cyclic_prefix_ox); - ix = 0; - - const bool last_symbol = (sym_ix + 1 >= d_nbSymbols); - if (last_symbol) { - // No windowing at all at end - memcpy(&out[ox], &in[ix], d_spacing * sizeof(complexf)); - ox += d_spacing; - } - else { - // Copy the middle part of the symbol, d_windowOverlap samples - // short of the end. - memcpy( &out[ox], - &in[ix], - (d_spacing - d_windowOverlap) * sizeof(complexf)); - ox += d_spacing - d_windowOverlap; - ix += d_spacing - d_windowOverlap; - assert(ox == (ssize_t)(d_symSize - d_windowOverlap)); - - // Apply window from 1 to 0.5 for the end of the symbol - for (size_t i = 0; ox < (ssize_t)d_symSize; i++) { - out[ox] = in[ix] * d_window[2*d_windowOverlap - (i+1)]; - ox++; + // Data symbols + for (size_t sym_ix = 0; sym_ix < p.nbSymbols; sym_ix++) { + /* _ix variables are indices into in[], _ox variables are + * indices for out[] */ + const ssize_t start_rise_ox = -p.windowOverlap; + const size_t start_rise_ix = 2 * p.spacing - p.symSize - p.windowOverlap; + /* + const size_t start_real_symbol_ox = 0; + const size_t start_real_symbol_ix = 2 * p.spacing - p.symSize; + */ + const ssize_t end_rise_ox = p.windowOverlap; + const size_t end_rise_ix = 2 * p.spacing - p.symSize + p.windowOverlap; + const ssize_t end_cyclic_prefix_ox = p.symSize - p.spacing; + /* end_cyclic_prefix_ix = end of symbol + const size_t begin_fall_ox = p.symSize - p.windowOverlap; + const size_t begin_fall_ix = p.spacing - p.windowOverlap; + const size_t end_real_symbol_ox = p.symSize; + end_real_symbol_ix = end of symbol + const size_t end_fall_ox = p.symSize + p.windowOverlap; + const size_t end_fall_ix = p.spacing + p.windowOverlap; + */ + + ssize_t ox = start_rise_ox; + size_t ix = start_rise_ix; + + for (size_t i = 0; ix < end_rise_ix; i++) { + out[ox] += in[ix] * p.window.at(i); ix++; + ox++; } - assert(ix == d_spacing); + assert(ox == end_rise_ox); + const size_t remaining_prefix_length = end_cyclic_prefix_ox - end_rise_ox; + memcpy( &out[ox], &in[ix], + remaining_prefix_length * sizeof(T)); + ox += remaining_prefix_length; + assert(ox == end_cyclic_prefix_ox); ix = 0; - // Cyclic suffix, with window from 0.5 to 0 - for (size_t i = 0; ox < (ssize_t)(d_symSize + d_windowOverlap); i++) { - out[ox] = in[ix] * d_window[d_windowOverlap - (i+1)]; - ox++; - ix++; + + const bool last_symbol = (sym_ix + 1 >= p.nbSymbols); + if (last_symbol) { + // No windowing at all at end + memcpy(&out[ox], &in[ix], p.spacing * sizeof(T)); + ox += p.spacing; + } + else { + // Copy the middle part of the symbol, p.windowOverlap samples + // short of the end. + memcpy( &out[ox], + &in[ix], + (p.spacing - p.windowOverlap) * sizeof(T)); + ox += p.spacing - p.windowOverlap; + ix += p.spacing - p.windowOverlap; + assert(ox == (ssize_t)(p.symSize - p.windowOverlap)); + + // Apply window from 1 to 0.5 for the end of the symbol + for (size_t i = 0; ox < (ssize_t)p.symSize; i++) { + out[ox] = in[ix] * p.window[2*p.windowOverlap - (i+1)]; + ox++; + ix++; + } + assert(ix == p.spacing); + + ix = 0; + // Cyclic suffix, with window from 0.5 to 0 + for (size_t i = 0; ox < (ssize_t)(p.symSize + p.windowOverlap); i++) { + out[ox] = in[ix] * p.window[p.windowOverlap - (i+1)]; + ox++; + ix++; + } + + assert(ix == p.windowOverlap); } - assert(ix == d_windowOverlap); + out += p.symSize; + in += p.spacing; + // out is now pointing to the proper end of symbol. There are + // windowOverlap samples ahead that were already written. } - - out += d_symSize; - in += d_spacing; - // out is now pointing to the proper end of symbol. There are - // d_windowOverlap samples ahead that were already written. + } + else { + throw std::runtime_error("fixed-point doesn't support window overlap"); } } else { // Handle Null symbol separately because it is longer // end - (nullSize - spacing) = 2 * spacing - nullSize - memcpy(out, &in[2 * d_spacing - d_nullSize], - (d_nullSize - d_spacing) * sizeof(complexf)); - memcpy(&out[d_nullSize - d_spacing], in, d_spacing * sizeof(complexf)); - in += d_spacing; - out += d_nullSize; + memcpy(out, &in[2 * p.spacing - p.nullSize], + (p.nullSize - p.spacing) * sizeof(T)); + memcpy(&out[p.nullSize - p.spacing], in, p.spacing * sizeof(T)); + in += p.spacing; + out += p.nullSize; // Data symbols - for (size_t i = 0; i < d_nbSymbols; ++i) { + for (size_t i = 0; i < p.nbSymbols; ++i) { // end - (symSize - spacing) = 2 * spacing - symSize - memcpy(out, &in[2 * d_spacing - d_symSize], - (d_symSize - d_spacing) * sizeof(complexf)); - memcpy(&out[d_symSize - d_spacing], in, d_spacing * sizeof(complexf)); - in += d_spacing; - out += d_symSize; + memcpy(out, &in[2 * p.spacing - p.symSize], + (p.symSize - p.spacing) * sizeof(T)); + memcpy(&out[p.symSize - p.spacing], in, p.spacing * sizeof(T)); + in += p.spacing; + out += p.symSize; } } - return sizeIn; + const auto sizeOut = dataOut->getLength(); + return sizeOut; +} + +int GuardIntervalInserter::process(Buffer* const dataIn, Buffer* dataOut) +{ + switch (m_fftEngine) { + case FFTEngine::FFTW: + return do_process<complexf>(m_params, dataIn, dataOut); + case FFTEngine::KISS: + if (m_params.windowOverlap) { + throw std::runtime_error("fixed point and ofdm windowing not supported"); + } + return do_process<complexfix>(m_params, dataIn, dataOut); + case FFTEngine::DEXTER: + return do_process<complexfix_wide>(m_params, dataIn, dataOut); + } + throw std::logic_error("Unhandled fftEngine variant"); } void GuardIntervalInserter::set_parameter( @@ -293,7 +323,7 @@ const std::string GuardIntervalInserter::get_parameter(const std::string& parame using namespace std; stringstream ss; if (parameter == "windowlen") { - ss << d_windowOverlap; + ss << m_params.windowOverlap; } else { ss << "Parameter '" << parameter << @@ -306,6 +336,6 @@ const std::string GuardIntervalInserter::get_parameter(const std::string& parame const json::map_t GuardIntervalInserter::get_all_values() const { json::map_t map; - map["windowlen"].v = d_windowOverlap; + map["windowlen"].v = m_params.windowOverlap; return map; } diff --git a/src/GuardIntervalInserter.h b/src/GuardIntervalInserter.h index f78ac91..8d329ff 100644 --- a/src/GuardIntervalInserter.h +++ b/src/GuardIntervalInserter.h @@ -30,6 +30,7 @@ # include <config.h> #endif +#include "ConfigParser.h" #include "ModPlugin.h" #include "RemoteControl.h" #include <stdint.h> @@ -50,7 +51,8 @@ class GuardIntervalInserter : public ModCodec, public RemoteControllable size_t spacing, size_t nullSize, size_t symSize, - size_t& windowOverlap); + size_t& windowOverlap, + FFTEngine fftEngine); virtual ~GuardIntervalInserter() {} @@ -62,16 +64,30 @@ class GuardIntervalInserter : public ModCodec, public RemoteControllable virtual const std::string get_parameter(const std::string& parameter) const override; virtual const json::map_t get_all_values() const override; + struct Params { + Params( + size_t nbSymbols, + size_t spacing, + size_t nullSize, + size_t symSize, + size_t& windowOverlap); + + size_t nbSymbols; + size_t spacing; + size_t nullSize; + size_t symSize; + size_t& windowOverlap; + + mutable std::mutex windowMutex; + std::vector<float> window; + }; + protected: void update_window(size_t new_window_overlap); - size_t d_nbSymbols; - size_t d_spacing; - size_t d_nullSize; - size_t d_symSize; + FFTEngine m_fftEngine; + + Params m_params; - mutable std::mutex d_windowMutex; - size_t& d_windowOverlap; - std::vector<float> d_window; }; diff --git a/src/MemlessPoly.h b/src/MemlessPoly.h index 91e6860..72de62c 100644 --- a/src/MemlessPoly.h +++ b/src/MemlessPoly.h @@ -32,13 +32,10 @@ #include "RemoteControl.h" #include "ModPlugin.h" -#include "PcDebug.h" #include "ThreadsafeQueue.h" #include <sys/types.h> #include <array> -#include <complex> -#include <memory> #include <string> #include <thread> #include <vector> @@ -47,8 +44,6 @@ #define MEMLESSPOLY_PIPELINE_DELAY 1 -typedef std::complex<float> complexf; - enum class dpd_type_t { odd_only_poly, lookup_table diff --git a/src/ModPlugin.h b/src/ModPlugin.h index 470508f..bb3ee2c 100644 --- a/src/ModPlugin.h +++ b/src/ModPlugin.h @@ -33,9 +33,7 @@ #include "Buffer.h" #include "ThreadsafeQueue.h" #include "TimestampDecoder.h" -#include <cstddef> #include <vector> -#include <memory> #include <thread> #include <atomic> diff --git a/src/NullSymbol.cpp b/src/NullSymbol.cpp index 4684dfe..526e662 100644 --- a/src/NullSymbol.cpp +++ b/src/NullSymbol.cpp @@ -27,18 +27,16 @@ #include "NullSymbol.h" #include "PcDebug.h" -#include <stdio.h> -#include <stdlib.h> -#include <complex> -#include <string.h> +#include <cstdio> +#include <cstdlib> +#include <cstring> -typedef std::complex<float> complexf; - -NullSymbol::NullSymbol(size_t nbCarriers) : +NullSymbol::NullSymbol(size_t numCarriers, size_t typeSize) : ModInput(), - myNbCarriers(nbCarriers) + m_numCarriers(numCarriers), + m_typeSize(typeSize) { - PDEBUG("NullSymbol::NullSymbol(%zu) @ %p\n", nbCarriers, this); + PDEBUG("NullSymbol::NullSymbol(%zu) @ %p\n", numCarriers, this); } @@ -52,7 +50,7 @@ int NullSymbol::process(Buffer* dataOut) { PDEBUG("NullSymbol::process(dataOut: %p)\n", dataOut); - dataOut->setLength(myNbCarriers * 2 * sizeof(float)); + dataOut->setLength(m_numCarriers * m_typeSize); memset(dataOut->getData(), 0, dataOut->getLength()); return dataOut->getLength(); diff --git a/src/NullSymbol.h b/src/NullSymbol.h index 814e434..6ba9e63 100644 --- a/src/NullSymbol.h +++ b/src/NullSymbol.h @@ -39,14 +39,14 @@ class NullSymbol : public ModInput { public: - NullSymbol(size_t nbCarriers); + NullSymbol(size_t nunCarriers, size_t typeSize); virtual ~NullSymbol(); int process(Buffer* dataOut); const char* name() { return "NullSymbol"; } private: - size_t myNbCarriers; - + size_t m_numCarriers; + size_t m_typeSize; }; diff --git a/src/OfdmGenerator.cpp b/src/OfdmGenerator.cpp index cb799d3..38648c9 100644 --- a/src/OfdmGenerator.cpp +++ b/src/OfdmGenerator.cpp @@ -2,7 +2,7 @@ Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty the Queen in Right of Canada (Communications Research Center Canada) - Copyright (C) 2023 + Copyright (C) 2024 Matthias P. Braendli, matthias.braendli@mpb.li http://opendigitalradio.org @@ -27,17 +27,19 @@ #include "OfdmGenerator.h" #include "PcDebug.h" -#define FFT_TYPE fftwf_complex - -#include <string.h> #include <stdexcept> #include <assert.h> #include <string> #include <numeric> +#include <vector> +#include <cstring> +#include <complex> static const size_t MAX_CLIP_STATS = 10; -OfdmGenerator::OfdmGenerator(size_t nbSymbols, +using FFTW_TYPE = fftwf_complex; + +OfdmGeneratorCF32::OfdmGeneratorCF32(size_t nbSymbols, size_t nbCarriers, size_t spacing, bool& enableCfr, @@ -62,8 +64,7 @@ OfdmGenerator::OfdmGenerator(size_t nbSymbols, nbSymbols, nbCarriers, spacing, inverse ? "true" : "false", this); if (nbCarriers > spacing) { - throw std::runtime_error( - "OfdmGenerator::OfdmGenerator nbCarriers > spacing!"); + throw std::runtime_error("OfdmGenerator nbCarriers > spacing!"); } /* register the parameters that can be remote controlled */ @@ -102,29 +103,29 @@ OfdmGenerator::OfdmGenerator(size_t nbSymbols, PDEBUG(" myZeroSize: %u\n", myZeroSize); const int N = mySpacing; // The size of the FFT - myFftIn = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N); - myFftOut = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N); + myFftIn = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N); + myFftOut = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N); fftwf_set_timelimit(2); myFftPlan = fftwf_plan_dft_1d(N, myFftIn, myFftOut, FFTW_BACKWARD, FFTW_MEASURE); - myCfrPostClip = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N); - myCfrPostFft = (FFT_TYPE*)fftwf_malloc(sizeof(FFT_TYPE) * N); + myCfrPostClip = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N); + myCfrPostFft = (FFTW_TYPE*)fftwf_malloc(sizeof(FFTW_TYPE) * N); myCfrFft = fftwf_plan_dft_1d(N, myCfrPostClip, myCfrPostFft, FFTW_FORWARD, FFTW_MEASURE); - if (sizeof(complexf) != sizeof(FFT_TYPE)) { + if (sizeof(complexf) != sizeof(FFTW_TYPE)) { printf("sizeof(complexf) %zu\n", sizeof(complexf)); - printf("sizeof(FFT_TYPE) %zu\n", sizeof(FFT_TYPE)); + printf("sizeof(FFT_TYPE) %zu\n", sizeof(FFTW_TYPE)); throw std::runtime_error( "OfdmGenerator::process complexf size is not FFT_TYPE size!"); } } -OfdmGenerator::~OfdmGenerator() +OfdmGeneratorCF32::~OfdmGeneratorCF32() { PDEBUG("OfdmGenerator::~OfdmGenerator() @ %p\n", this); @@ -153,15 +154,15 @@ OfdmGenerator::~OfdmGenerator() } } -int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) +int OfdmGeneratorCF32::process(Buffer* const dataIn, Buffer* dataOut) { PDEBUG("OfdmGenerator::process(dataIn: %p, dataOut: %p)\n", dataIn, dataOut); dataOut->setLength(myNbSymbols * mySpacing * sizeof(complexf)); - FFT_TYPE* in = reinterpret_cast<FFT_TYPE*>(dataIn->getData()); - FFT_TYPE* out = reinterpret_cast<FFT_TYPE*>(dataOut->getData()); + FFTW_TYPE *in = reinterpret_cast<FFTW_TYPE*>(dataIn->getData()); + FFTW_TYPE *out = reinterpret_cast<FFTW_TYPE*>(dataOut->getData()); size_t sizeIn = dataIn->getLength() / sizeof(complexf); size_t sizeOut = dataOut->getLength() / sizeof(complexf); @@ -203,7 +204,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) myPaprAfterCFR.clear(); } - for (size_t i = 0; i < myNbSymbols; ++i) { + for (size_t i = 0; i < myNbSymbols; i++) { myFftIn[0][0] = 0; myFftIn[0][1] = 0; @@ -212,22 +213,20 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) * PosSrc=0 PosDst=1 PosSize=768 * NegSrc=768 NegDst=1280 NegSize=768 */ - memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(FFT_TYPE)); + memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(FFTW_TYPE)); memcpy(&myFftIn[myPosDst], &in[myPosSrc], - myPosSize * sizeof(FFT_TYPE)); + myPosSize * sizeof(FFTW_TYPE)); memcpy(&myFftIn[myNegDst], &in[myNegSrc], - myNegSize * sizeof(FFT_TYPE)); - + myNegSize * sizeof(FFTW_TYPE)); if (myCfr) { reference.resize(mySpacing); memcpy(reinterpret_cast<fftwf_complex*>(reference.data()), - myFftIn, mySpacing * sizeof(FFT_TYPE)); + myFftIn, mySpacing * sizeof(FFTW_TYPE)); } fftwf_execute(myFftPlan); // IFFT from myFftIn to myFftOut - if (myCfr) { complexf *symbol = reinterpret_cast<complexf*>(myFftOut); myPaprBeforeCFR.process_block(symbol, mySpacing); @@ -235,7 +234,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) if (myMERCalcIndex == i) { before_cfr.resize(mySpacing); memcpy(reinterpret_cast<fftwf_complex*>(before_cfr.data()), - myFftOut, mySpacing * sizeof(FFT_TYPE)); + myFftOut, mySpacing * sizeof(FFTW_TYPE)); } /* cfr_one_iteration runs the myFftPlan again at the end, and @@ -277,7 +276,7 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) num_error_clip += stat.errclip_count; } - memcpy(out, myFftOut, mySpacing * sizeof(FFT_TYPE)); + memcpy(out, myFftOut, mySpacing * sizeof(FFTW_TYPE)); in += myNbCarriers; out += mySpacing; @@ -308,14 +307,14 @@ int OfdmGenerator::process(Buffer* const dataIn, Buffer* dataOut) return sizeOut; } -OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration( +OfdmGeneratorCF32::cfr_iter_stat_t OfdmGeneratorCF32::cfr_one_iteration( complexf *symbol, const complexf *reference) { // use std::norm instead of std::abs to avoid calculating the // square roots const float clip_squared = myCfrClip * myCfrClip; - OfdmGenerator::cfr_iter_stat_t ret; + OfdmGeneratorCF32::cfr_iter_stat_t ret; // Clip for (size_t i = 0; i < mySpacing; i++) { @@ -331,7 +330,7 @@ OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration( } // Take FFT of our clipped signal - memcpy(myCfrPostClip, symbol, mySpacing * sizeof(FFT_TYPE)); + memcpy(myCfrPostClip, symbol, mySpacing * sizeof(FFTW_TYPE)); fftwf_execute(myCfrFft); // FFT from myCfrPostClip to myCfrPostFft // Calculate the error in frequency domain by subtracting our reference @@ -374,7 +373,7 @@ OfdmGenerator::cfr_iter_stat_t OfdmGenerator::cfr_one_iteration( } -void OfdmGenerator::set_parameter(const std::string& parameter, +void OfdmGeneratorCF32::set_parameter(const std::string& parameter, const std::string& value) { using namespace std; @@ -404,7 +403,7 @@ void OfdmGenerator::set_parameter(const std::string& parameter, } } -const std::string OfdmGenerator::get_parameter(const std::string& parameter) const +const std::string OfdmGeneratorCF32::get_parameter(const std::string& parameter) const { using namespace std; stringstream ss; @@ -458,9 +457,333 @@ const std::string OfdmGenerator::get_parameter(const std::string& parameter) con return ss.str(); } -const json::map_t OfdmGenerator::get_all_values() const +const json::map_t OfdmGeneratorCF32::get_all_values() const { json::map_t map; // TODO needs rework of the values return map; } + +OfdmGeneratorFixed::OfdmGeneratorFixed(size_t nbSymbols, + size_t nbCarriers, + size_t spacing, + bool inverse) : + ModCodec(), + myNbSymbols(nbSymbols), + myNbCarriers(nbCarriers), + mySpacing(spacing) +{ + PDEBUG("OfdmGenerator::OfdmGenerator(%zu, %zu, %zu, %s) @ %p\n", + nbSymbols, nbCarriers, spacing, inverse ? "true" : "false", this); + + etiLog.level(info) << "Using KISS FFT by Mark Borgerding for fixed-point transform"; + + if (nbCarriers > spacing) { + throw std::runtime_error("OfdmGenerator nbCarriers > spacing!"); + } + + if (inverse) { + myPosDst = (nbCarriers & 1 ? 0 : 1); + myPosSrc = 0; + myPosSize = (nbCarriers + 1) / 2; + myNegDst = spacing - (nbCarriers / 2); + myNegSrc = (nbCarriers + 1) / 2; + myNegSize = nbCarriers / 2; + } + else { + myPosDst = (nbCarriers & 1 ? 0 : 1); + myPosSrc = nbCarriers / 2; + myPosSize = (nbCarriers + 1) / 2; + myNegDst = spacing - (nbCarriers / 2); + myNegSrc = 0; + myNegSize = nbCarriers / 2; + } + myZeroDst = myPosDst + myPosSize; + myZeroSize = myNegDst - myZeroDst; + + PDEBUG(" myPosDst: %u\n", myPosDst); + PDEBUG(" myPosSrc: %u\n", myPosSrc); + PDEBUG(" myPosSize: %u\n", myPosSize); + PDEBUG(" myNegDst: %u\n", myNegDst); + PDEBUG(" myNegSrc: %u\n", myNegSrc); + PDEBUG(" myNegSize: %u\n", myNegSize); + PDEBUG(" myZeroDst: %u\n", myZeroDst); + PDEBUG(" myZeroSize: %u\n", myZeroSize); + + const int N = mySpacing; // The size of the FFT + + const size_t nbytes = N * sizeof(kiss_fft_cpx); + myFftIn = (kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes); + myFftOut = (kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes); + memset(myFftIn, 0, nbytes); + + myKissCfg = kiss_fft_alloc(N, inverse, nullptr, nullptr); +} + +OfdmGeneratorFixed::~OfdmGeneratorFixed() +{ + if (myKissCfg) KISS_FFT_FREE(myKissCfg); + if (myFftIn) KISS_FFT_FREE(myFftIn); + if (myFftOut) KISS_FFT_FREE(myFftOut); +} + +int OfdmGeneratorFixed::process(Buffer* const dataIn, Buffer* dataOut) +{ + dataOut->setLength(myNbSymbols * mySpacing * sizeof(kiss_fft_cpx)); + + kiss_fft_cpx *in = reinterpret_cast<kiss_fft_cpx*>(dataIn->getData()); + kiss_fft_cpx *out = reinterpret_cast<kiss_fft_cpx*>(dataOut->getData()); + + size_t sizeIn = dataIn->getLength() / sizeof(kiss_fft_cpx); + size_t sizeOut = dataOut->getLength() / sizeof(kiss_fft_cpx); + + if (sizeIn != myNbSymbols * myNbCarriers) { + PDEBUG("Nb symbols: %zu\n", myNbSymbols); + PDEBUG("Nb carriers: %zu\n", myNbCarriers); + PDEBUG("Spacing: %zu\n", mySpacing); + PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * myNbCarriers); + throw std::runtime_error( + "OfdmGenerator::process input size not valid!"); + } + if (sizeOut != myNbSymbols * mySpacing) { + PDEBUG("Nb symbols: %zu\n", myNbSymbols); + PDEBUG("Nb carriers: %zu\n", myNbCarriers); + PDEBUG("Spacing: %zu\n", mySpacing); + PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * mySpacing); + throw std::runtime_error( + "OfdmGenerator::process output size not valid!"); + } + + for (size_t i = 0; i < myNbSymbols; i++) { + myFftIn[0].r = 0; + myFftIn[0].i = 0; + + /* For TM I this is: + * ZeroDst=769 ZeroSize=511 + * PosSrc=0 PosDst=1 PosSize=768 + * NegSrc=768 NegDst=1280 NegSize=768 + */ + memset(&myFftIn[myZeroDst], 0, myZeroSize * sizeof(kiss_fft_cpx)); + memcpy(&myFftIn[myPosDst], &in[myPosSrc], myPosSize * sizeof(kiss_fft_cpx)); + memcpy(&myFftIn[myNegDst], &in[myNegSrc], myNegSize * sizeof(kiss_fft_cpx)); + + kiss_fft(myKissCfg, myFftIn, myFftOut); + + memcpy(out, myFftOut, mySpacing * sizeof(kiss_fft_cpx)); + + in += myNbCarriers; + out += mySpacing; + } + + return sizeOut; +} + +#ifdef HAVE_DEXTER +OfdmGeneratorDEXTER::OfdmGeneratorDEXTER(size_t nbSymbols, + size_t nbCarriers, + size_t spacing) : + ModCodec(), + myNbSymbols(nbSymbols), + myNbCarriers(nbCarriers), + mySpacing(spacing) +{ + PDEBUG("OfdmGeneratorDEXTER::OfdmGeneratorDEXTER(%zu, %zu, %zu) @ %p\n", + nbSymbols, nbCarriers, spacing, this); + + etiLog.level(info) << "Using DEXTER FFT Accelerator for fixed-point transform"; + + if (nbCarriers > spacing) { + throw std::runtime_error("OfdmGenerator nbCarriers > spacing!"); + } + + myPosDst = (nbCarriers & 1 ? 0 : 1); + myPosSrc = 0; + myPosSize = (nbCarriers + 1) / 2; + myNegDst = spacing - (nbCarriers / 2); + myNegSrc = (nbCarriers + 1) / 2; + myNegSize = nbCarriers / 2; + + myZeroDst = myPosDst + myPosSize; + myZeroSize = myNegDst - myZeroDst; + + PDEBUG(" myPosDst: %u\n", myPosDst); + PDEBUG(" myPosSrc: %u\n", myPosSrc); + PDEBUG(" myPosSize: %u\n", myPosSize); + PDEBUG(" myNegDst: %u\n", myNegDst); + PDEBUG(" myNegSrc: %u\n", myNegSrc); + PDEBUG(" myNegSize: %u\n", myNegSize); + PDEBUG(" myZeroDst: %u\n", myZeroDst); + PDEBUG(" myZeroSize: %u\n", myZeroSize); + + const size_t nbytes_in = mySpacing * sizeof(complexfix); + const size_t nbytes_out = mySpacing * sizeof(complexfix_wide); + +#define IIO_ENSURE(expr, err) { \ + if (!(expr)) { \ + etiLog.log(error, "%s (%s:%d)\n", err, __FILE__, __LINE__); \ + throw std::runtime_error("Failed to set FFT for OfdmGeneratorDEXTER"); \ + } \ +} + IIO_ENSURE((m_ctx = iio_create_default_context()), "No context"); + IIO_ENSURE(m_dev_in = iio_context_find_device(m_ctx, "fft-accelerator-in"), "no dev"); + IIO_ENSURE(m_dev_out = iio_context_find_device(m_ctx, "fft-accelerator-out"), "no dev"); + IIO_ENSURE(m_channel_in = iio_device_find_channel(m_dev_in, "voltage0", true), "no channel"); + IIO_ENSURE(m_channel_out = iio_device_find_channel(m_dev_out, "voltage0", false), "no channel"); + + iio_channel_enable(m_channel_in); + iio_channel_enable(m_channel_out); + + m_buf_in = iio_device_create_buffer(m_dev_in, nbytes_in, false); + if (!m_buf_in) { + throw std::runtime_error("OfdmGeneratorDEXTER could not create in buffer"); + } + + m_buf_out = iio_device_create_buffer(m_dev_out, nbytes_out, false); + if (!m_buf_out) { + throw std::runtime_error("OfdmGeneratorDEXTER could not create out buffer"); + } +} + +OfdmGeneratorDEXTER::~OfdmGeneratorDEXTER() +{ + if (m_buf_in) { + iio_buffer_destroy(m_buf_in); + m_buf_in = nullptr; + } + + if (m_buf_out) { + iio_buffer_destroy(m_buf_out); + m_buf_out = nullptr; + } + + if (m_channel_in) { + iio_channel_disable(m_channel_in); + m_channel_in = nullptr; + } + + if (m_channel_out) { + iio_channel_disable(m_channel_out); + m_channel_out = nullptr; + } + + if (m_ctx) { + iio_context_destroy(m_ctx); + m_ctx = nullptr; + } +} + +int OfdmGeneratorDEXTER::process(Buffer* const dataIn, Buffer* dataOut) +{ + dataOut->setLength(myNbSymbols * mySpacing * sizeof(complexfix_wide)); + + complexfix *in = reinterpret_cast<complexfix*>(dataIn->getData()); + complexfix_wide *out = reinterpret_cast<complexfix_wide*>(dataOut->getData()); + + size_t sizeIn = dataIn->getLength() / sizeof(complexfix); + size_t sizeOut = dataOut->getLength() / sizeof(complexfix_wide); + + if (sizeIn != myNbSymbols * myNbCarriers) { + PDEBUG("Nb symbols: %zu\n", myNbSymbols); + PDEBUG("Nb carriers: %zu\n", myNbCarriers); + PDEBUG("Spacing: %zu\n", mySpacing); + PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * myNbCarriers); + throw std::runtime_error( + "OfdmGenerator::process input size not valid!"); + } + if (sizeOut != myNbSymbols * mySpacing) { + PDEBUG("Nb symbols: %zu\n", myNbSymbols); + PDEBUG("Nb carriers: %zu\n", myNbCarriers); + PDEBUG("Spacing: %zu\n", mySpacing); + PDEBUG("\n%zu != %zu\n", sizeIn, myNbSymbols * mySpacing); + throw std::runtime_error("OfdmGenerator::process output size not valid!"); + } + + ptrdiff_t iio_buf_size = (uint8_t*)iio_buffer_end(m_buf_in) - (uint8_t*)iio_buffer_start(m_buf_in); + if (iio_buf_size != (ssize_t)(mySpacing * sizeof(complexfix))) { + throw std::runtime_error("OfdmGenerator::process incorrect iio buffer size!"); + } + + for (size_t i = 0; i < myNbSymbols; i++) { + complexfix *fft_in = reinterpret_cast<complexfix*>(iio_buffer_start(m_buf_in)); + + /* For TM I this is: + * ZeroDst=769 ZeroSize=511 + * PosSrc=0 PosDst=1 PosSize=768 + * NegSrc=768 NegDst=1280 NegSize=768 + */ + + fft_in[0] = static_cast<complexfix::value_type>(0); + for (size_t i = 0; i < myZeroSize; i++) { + fft_in[myZeroDst + i] = static_cast<complexfix::value_type>(0); + } + + memcpy(&fft_in[myPosDst], &in[myPosSrc], myPosSize * sizeof(complexfix)); + memcpy(&fft_in[myNegDst], &in[myNegSrc], myNegSize * sizeof(complexfix)); + + ssize_t nbytes_tx = iio_buffer_push(m_buf_in); + if (nbytes_tx < 0) { + throw std::runtime_error("OfdmGenerator::process error pushing IIO buffer!"); + } + + in += myNbCarriers; + + // Keep one buffer in flight while we're doing shuffling data around here, + // this improves performance. + // I believe that, by default, IIO allocates four buffers in total. + if (i > 0) { + ssize_t nbytes_rx = iio_buffer_refill(m_buf_out); + if (nbytes_rx < 0) { + throw std::runtime_error("OfdmGenerator::process error refilling IIO buffer!"); + } + + ptrdiff_t p_inc = iio_buffer_step(m_buf_out); + if (p_inc != 1) { + throw std::runtime_error("OfdmGenerator::process Wrong p_inc"); + } + + // The FFT Accelerator takes 16-bit I + 16-bit Q, and outputs 32-bit I and 32-bit Q. + // The formatconvert will take care of this + const uint8_t *fft_out = (const uint8_t*)iio_buffer_first(m_buf_out, m_channel_out); + const uint8_t *fft_out_end = (const uint8_t*)iio_buffer_end(m_buf_out); + constexpr size_t sizeof_out_iq = sizeof(complexfix_wide); + if ((fft_out_end - fft_out) != (ssize_t)(mySpacing * sizeof_out_iq)) { + fprintf(stderr, "FFT_OUT: %p %p %zu %zu\n", + fft_out, fft_out_end, (fft_out_end - fft_out), + mySpacing * sizeof_out_iq); + throw std::runtime_error("OfdmGenerator::process fft_out length invalid!"); + } + + memcpy(out, fft_out, mySpacing * sizeof_out_iq); + + out += mySpacing; + } + } + + ssize_t nbytes_rx = iio_buffer_refill(m_buf_out); + if (nbytes_rx < 0) { + throw std::runtime_error("OfdmGenerator::process error refilling IIO buffer!"); + } + + ptrdiff_t p_inc = iio_buffer_step(m_buf_out); + if (p_inc != 1) { + throw std::runtime_error("OfdmGenerator::process Wrong p_inc"); + } + + // The FFT Accelerator takes 16-bit I + 16-bit Q, and outputs 32-bit I and 32-bit Q. + // The formatconvert will take care of this + const uint8_t *fft_out = (const uint8_t*)iio_buffer_first(m_buf_out, m_channel_out); + const uint8_t *fft_out_end = (const uint8_t*)iio_buffer_end(m_buf_out); + constexpr size_t sizeof_out_iq = sizeof(complexfix_wide); + if ((fft_out_end - fft_out) != (ssize_t)(mySpacing * sizeof_out_iq)) { + fprintf(stderr, "FFT_OUT: %p %p %zu %zu\n", + fft_out, fft_out_end, (fft_out_end - fft_out), + mySpacing * sizeof_out_iq); + throw std::runtime_error("OfdmGenerator::process fft_out length invalid!"); + } + + memcpy(out, fft_out, mySpacing * sizeof_out_iq); + + return sizeOut; +} + +#endif // HAVE_DEXTER diff --git a/src/OfdmGenerator.h b/src/OfdmGenerator.h index dc1ad46..475b2a4 100644 --- a/src/OfdmGenerator.h +++ b/src/OfdmGenerator.h @@ -2,7 +2,7 @@ Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty the Queen in Right of Canada (Communications Research Center Canada) - Copyright (C) 2023 + Copyright (C) 2024 Matthias P. Braendli, matthias.braendli@mpb.li http://opendigitalradio.org @@ -33,27 +33,30 @@ #include "ModPlugin.h" #include "RemoteControl.h" #include "PAPRStats.h" -#include "fftw3.h" +#include "kiss_fft.h" + #include <cstddef> -#include <vector> -#include <complex> #include <atomic> +#include <fftw3.h> -typedef std::complex<float> complexf; +#ifdef HAVE_DEXTER +# include <iio.h> +#endif -class OfdmGenerator : public ModCodec, public RemoteControllable +// Complex Float uses FFTW +class OfdmGeneratorCF32 : public ModCodec, public RemoteControllable { public: - OfdmGenerator(size_t nbSymbols, + OfdmGeneratorCF32(size_t nbSymbols, size_t nbCarriers, size_t spacing, bool& enableCfr, float& cfrClip, float& cfrErrorClip, bool inverse = true); - virtual ~OfdmGenerator(); - OfdmGenerator(const OfdmGenerator&) = delete; - OfdmGenerator& operator=(const OfdmGenerator&) = delete; + virtual ~OfdmGeneratorCF32(); + OfdmGeneratorCF32(const OfdmGeneratorCF32&) = delete; + OfdmGeneratorCF32& operator=(const OfdmGeneratorCF32&) = delete; int process(Buffer* const dataIn, Buffer* dataOut) override; const char* name() override { return "OfdmGenerator"; } @@ -107,4 +110,76 @@ class OfdmGenerator : public ModCodec, public RemoteControllable std::deque<double> myMERs; }; +// Fixed point implementation uses KISS FFT with -DFIXED_POINT=32 +class OfdmGeneratorFixed : public ModCodec +{ + public: + OfdmGeneratorFixed(size_t nbSymbols, + size_t nbCarriers, + size_t spacing, + bool inverse = true); + virtual ~OfdmGeneratorFixed(); + OfdmGeneratorFixed(const OfdmGeneratorFixed&) = delete; + OfdmGeneratorFixed& operator=(const OfdmGeneratorFixed&) = delete; + + int process(Buffer* const dataIn, Buffer* dataOut) override; + const char* name() override { return "OfdmGenerator"; } + + private: + kiss_fft_cfg myKissCfg = nullptr; + kiss_fft_cpx *myFftIn, *myFftOut; + + const size_t myNbSymbols; + const size_t myNbCarriers; + const size_t mySpacing; + unsigned myPosSrc; + unsigned myPosDst; + unsigned myPosSize; + unsigned myNegSrc; + unsigned myNegDst; + unsigned myNegSize; + unsigned myZeroDst; + unsigned myZeroSize; +}; + +#ifdef HAVE_DEXTER +// The PrecisionWave DEXTER device contains an FFT accelerator in FPGA +// It only does inverse FFTs +class OfdmGeneratorDEXTER : public ModCodec +{ + public: + OfdmGeneratorDEXTER(size_t nbSymbols, + size_t nbCarriers, + size_t spacing); + virtual ~OfdmGeneratorDEXTER(); + OfdmGeneratorDEXTER(const OfdmGeneratorDEXTER&) = delete; + OfdmGeneratorDEXTER& operator=(const OfdmGeneratorDEXTER&) = delete; + + int process(Buffer* const dataIn, Buffer* dataOut) override; + const char* name() override { return "OfdmGenerator"; } + + private: + struct iio_context *m_ctx = nullptr; + // "in" and "out" are from the point of view of the FFT Accelerator block + struct iio_device *m_dev_in = nullptr; + struct iio_channel *m_channel_in = nullptr; + struct iio_buffer *m_buf_in = nullptr; + + struct iio_device *m_dev_out = nullptr; + struct iio_channel *m_channel_out = nullptr; + struct iio_buffer *m_buf_out = nullptr; + + const size_t myNbSymbols; + const size_t myNbCarriers; + const size_t mySpacing; + unsigned myPosSrc; + unsigned myPosDst; + unsigned myPosSize; + unsigned myNegSrc; + unsigned myNegDst; + unsigned myNegSize; + unsigned myZeroDst; + unsigned myZeroSize; +}; +#endif // HAVE_DEXTER diff --git a/src/OutputMemory.cpp b/src/OutputMemory.cpp index d6ef917..f673555 100644 --- a/src/OutputMemory.cpp +++ b/src/OutputMemory.cpp @@ -26,20 +26,14 @@ #include "OutputMemory.h" #include "PcDebug.h" -#include "Log.h" -#include "TimestampDecoder.h" - -#include <stdexcept> -#include <string.h> -#include <math.h> - +#include <cmath> OutputMemory::OutputMemory(Buffer* dataOut) : ModOutput() { PDEBUG("OutputMemory::OutputMemory(%p) @ %p\n", dataOut, this); - setOutput(dataOut); + m_dataOut = dataOut; #if OUTPUT_MEM_HISTOGRAM myMax = 0.0f; @@ -49,7 +43,6 @@ OutputMemory::OutputMemory(Buffer* dataOut) #endif } - OutputMemory::~OutputMemory() { #if OUTPUT_MEM_HISTOGRAM @@ -66,19 +59,12 @@ OutputMemory::~OutputMemory() PDEBUG("OutputMemory::~OutputMemory() @ %p\n", this); } - -void OutputMemory::setOutput(Buffer* dataOut) -{ - myDataOut = dataOut; -} - - int OutputMemory::process(Buffer* dataIn) { PDEBUG("OutputMemory::process(dataIn: %p)\n", dataIn); - *myDataOut = *dataIn; + *m_dataOut = *dataIn; #if OUTPUT_MEM_HISTOGRAM const float* in = (const float*)dataIn->getData(); @@ -93,17 +79,17 @@ int OutputMemory::process(Buffer* dataIn) } #endif - return myDataOut->getLength(); + return m_dataOut->getLength(); } meta_vec_t OutputMemory::process_metadata(const meta_vec_t& metadataIn) { - myMetadata = metadataIn; + m_metadata = metadataIn; return {}; } meta_vec_t OutputMemory::get_latest_metadata() { - return myMetadata; + return m_metadata; } diff --git a/src/OutputMemory.h b/src/OutputMemory.h index f0a5fbb..299d31d 100644 --- a/src/OutputMemory.h +++ b/src/OutputMemory.h @@ -61,11 +61,9 @@ public: meta_vec_t get_latest_metadata(void); - void setOutput(Buffer* dataOut); - protected: - Buffer* myDataOut; - meta_vec_t myMetadata; + Buffer* m_dataOut; + meta_vec_t m_metadata; #if OUTPUT_MEM_HISTOGRAM // keep track of max value diff --git a/src/PAPRStats.cpp b/src/PAPRStats.cpp index 0c9764a..103f02f 100644 --- a/src/PAPRStats.cpp +++ b/src/PAPRStats.cpp @@ -33,7 +33,6 @@ # include <iostream> #endif - PAPRStats::PAPRStats(size_t num_blocks_to_accumulate) : m_num_blocks_to_accumulate(num_blocks_to_accumulate) { diff --git a/src/PAPRStats.h b/src/PAPRStats.h index 86ad8b0..a4ded86 100644 --- a/src/PAPRStats.h +++ b/src/PAPRStats.h @@ -31,12 +31,9 @@ #endif #include <cstddef> -#include <vector> #include <deque> #include <complex> -typedef std::complex<float> complexf; - /* Helper class to calculate Peak-to-average-power ratio. * Definition of PAPR: * @@ -53,6 +50,8 @@ typedef std::complex<float> complexf; */ class PAPRStats { + typedef std::complex<float> complexf; + public: PAPRStats(size_t num_blocks_to_accumulate); diff --git a/src/PhaseReference.cpp b/src/PhaseReference.cpp index 568e15e..71dec87 100644 --- a/src/PhaseReference.cpp +++ b/src/PhaseReference.cpp @@ -29,12 +29,10 @@ #include <stdexcept> -using complexf = std::complex<float>; - /* ETSI EN 300 401 Table 43 (Clause 14.3.2) * Contains h_{i,k} values */ -const uint8_t PhaseReference::d_h[4][32] = { +static const uint8_t d_h[4][32] = { /* h0 */ { 0, 2, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 2, 2, 1, 1, 0, 2, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 2, 2, 1, 1 }, /* h1 */ { 0, 3, 2, 3, 0, 1, 3, 0, 2, 1, 2, 3, 2, 3, 3, 0, @@ -54,41 +52,80 @@ const uint8_t PhaseReference::d_h[4][32] = { * Tables 44 to 47 describe the frequency interleaving done in * FrequencyInterleaver. */ -PhaseReference::PhaseReference(unsigned int dabmode) : +PhaseReference::PhaseReference(unsigned int dabmode, bool fixedPoint) : ModInput(), - d_dabmode(dabmode) + d_dabmode(dabmode), + d_fixedPoint(fixedPoint) { PDEBUG("PhaseReference::PhaseReference(%u) @ %p\n", dabmode, this); switch (d_dabmode) { case 1: d_carriers = 1536; - d_num = 2048; break; case 2: d_carriers = 384; - d_num = 512; break; case 3: d_carriers = 192; - d_num = 256; break; case 4: d_dabmode = 0; case 0: d_carriers = 768; - d_num = 1024; break; default: throw std::runtime_error( "PhaseReference::PhaseReference DAB mode not valid!"); } - d_dataIn.resize(d_carriers); - fillData(); + + if (d_fixedPoint) { + d_phaseRefFixed.fillData(d_dabmode, d_carriers); + } + else { + d_phaseRefCF32.fillData(d_dabmode, d_carriers); + } } -complexf convert(uint8_t data) { +static const int table[][48][2] = { + { // Mode 0/4 + // Positive part + { 0, 0 }, { 3, 1 }, { 2, 0 }, { 1, 2 }, { 0, 0 }, { 3, 1 }, + { 2, 2 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 3 }, { 1, 0 }, + // Negative part + { 0, 0 }, { 1, 1 }, { 2, 1 }, { 3, 2 }, { 0, 2 }, { 1, 2 }, + { 2, 0 }, { 3, 3 }, { 0, 3 }, { 1, 1 }, { 2, 3 }, { 3, 2 }, + }, + { // Mode 1 + // Positive part + { 0, 3 }, { 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 2 }, { 3, 2 }, + { 2, 1 }, { 1, 0 }, { 0, 2 }, { 3, 2 }, { 2, 3 }, { 1, 3 }, + { 0, 0 }, { 3, 2 }, { 2, 1 }, { 1, 3 }, { 0, 3 }, { 3, 3 }, + { 2, 3 }, { 1, 0 }, { 0, 3 }, { 3, 0 }, { 2, 1 }, { 1, 1 }, + // Negative part + { 0, 1 }, { 1, 2 }, { 2, 0 }, { 3, 1 }, { 0, 3 }, { 1, 2 }, + { 2, 2 }, { 3, 3 }, { 0, 2 }, { 1, 1 }, { 2, 2 }, { 3, 3 }, + { 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 3 }, { 0, 2 }, { 1, 2 }, + { 2, 2 }, { 3, 1 }, { 0, 1 }, { 1, 3 }, { 2, 1 }, { 3, 2 }, + }, + { // Mode 2 + // Positive part + { 2, 0 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 0 }, { 1, 3 }, + // Negative part + { 0, 2 }, { 1, 3 }, { 2, 2 }, { 3, 2 }, { 0, 1 }, { 1, 2 }, + }, + { // Mode 3 + // Positive part + { 3, 2 }, { 2, 2 }, { 1, 2 }, + // Negative part + { 0, 2 }, { 1, 3 }, { 2, 0 }, + }, +}; + + +template <> +complexf PhaseRefGen<complexf>::convert(uint8_t data) { const complexf value[] = { complexf(1, 0), complexf(0, 1), @@ -98,62 +135,37 @@ complexf convert(uint8_t data) { return value[data % 4]; } +template <> +complexfix PhaseRefGen<complexfix>::convert(uint8_t data) { + constexpr auto one = fixed_16{1}; + constexpr auto zero = fixed_16{0}; -void PhaseReference::fillData() -{ - const int table[][48][2] = { - { // Mode 0/4 - // Positive part - { 0, 0 }, { 3, 1 }, { 2, 0 }, { 1, 2 }, { 0, 0 }, { 3, 1 }, - { 2, 2 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 3 }, { 1, 0 }, - // Negative part - { 0, 0 }, { 1, 1 }, { 2, 1 }, { 3, 2 }, { 0, 2 }, { 1, 2 }, - { 2, 0 }, { 3, 3 }, { 0, 3 }, { 1, 1 }, { 2, 3 }, { 3, 2 }, - }, - { // Mode 1 - // Positive part - { 0, 3 }, { 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 2 }, { 3, 2 }, - { 2, 1 }, { 1, 0 }, { 0, 2 }, { 3, 2 }, { 2, 3 }, { 1, 3 }, - { 0, 0 }, { 3, 2 }, { 2, 1 }, { 1, 3 }, { 0, 3 }, { 3, 3 }, - { 2, 3 }, { 1, 0 }, { 0, 3 }, { 3, 0 }, { 2, 1 }, { 1, 1 }, - // Negative part - { 0, 1 }, { 1, 2 }, { 2, 0 }, { 3, 1 }, { 0, 3 }, { 1, 2 }, - { 2, 2 }, { 3, 3 }, { 0, 2 }, { 1, 1 }, { 2, 2 }, { 3, 3 }, - { 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 3 }, { 0, 2 }, { 1, 2 }, - { 2, 2 }, { 3, 1 }, { 0, 1 }, { 1, 3 }, { 2, 1 }, { 3, 2 }, - }, - { // Mode 2 - // Positive part - { 2, 0 }, { 1, 2 }, { 0, 2 }, { 3, 1 }, { 2, 0 }, { 1, 3 }, - // Negative part - { 0, 2 }, { 1, 3 }, { 2, 2 }, { 3, 2 }, { 0, 1 }, { 1, 2 }, - }, - { // Mode 3 - // Positive part - { 3, 2 }, { 2, 2 }, { 1, 2 }, - // Negative part - { 0, 2 }, { 1, 3 }, { 2, 0 }, - }, + const complexfix value[] = { + complexfix(one, zero), + complexfix(zero, one), + complexfix(-one, zero), + complexfix(zero, -one), }; + return value[data % 4]; +} - if (d_dabmode > 3) { - throw std::runtime_error( - "PhaseReference::fillData invalid DAB mode!"); - } - - if (d_dataIn.size() != d_carriers) { +template <typename T> +void PhaseRefGen<T>::fillData(unsigned int dabmode, size_t carriers) +{ + dataIn.resize(carriers); + if (dataIn.size() != carriers) { throw std::runtime_error( - "PhaseReference::fillData d_dataIn has incorrect size!"); + "PhaseReference::fillData dataIn has incorrect size!"); } for (size_t index = 0, offset = 0; - index < d_dataIn.size(); + index < dataIn.size(); ++offset) { for (size_t k = 0; k < 32; ++k) { - d_dataIn[index++] = convert( - d_h[ table[d_dabmode][offset][0] ][k] + - table[d_dabmode][offset][1] ); + dataIn[index++] = convert( + d_h[ table[dabmode][offset][0] ][k] + + table[dabmode][offset][1] ); } } } @@ -163,7 +175,12 @@ int PhaseReference::process(Buffer* dataOut) { PDEBUG("PhaseReference::process(dataOut: %p)\n", dataOut); - dataOut->setData(&d_dataIn[0], d_carriers * sizeof(complexf)); + if (d_fixedPoint) { + dataOut->setData(d_phaseRefFixed.dataIn.data(), d_carriers * sizeof(complexfix)); + } + else { + dataOut->setData(d_phaseRefCF32.dataIn.data(), d_carriers * sizeof(complexf)); + } return 1; } diff --git a/src/PhaseReference.h b/src/PhaseReference.h index 6ecdc4e..735009c 100644 --- a/src/PhaseReference.h +++ b/src/PhaseReference.h @@ -32,25 +32,33 @@ #include "ModPlugin.h" -#include <cstddef> -#include <complex> #include <vector> +#include <cstddef> + +template <typename T> +struct PhaseRefGen { + std::vector<T> dataIn; + void fillData(unsigned int dabmode, size_t carriers); + + private: + T convert(uint8_t data); +}; + class PhaseReference : public ModInput { public: - PhaseReference(unsigned int dabmode); + PhaseReference(unsigned int dabmode, bool fixedPoint); int process(Buffer* dataOut) override; const char* name() override { return "PhaseReference"; } protected: unsigned int d_dabmode; + bool d_fixedPoint; size_t d_carriers; - size_t d_num; - const static uint8_t d_h[4][32]; - std::vector<std::complex<float> > d_dataIn; - void fillData(); + PhaseRefGen<complexf> d_phaseRefCF32; + PhaseRefGen<complexfix> d_phaseRefFixed; }; diff --git a/src/QpskSymbolMapper.cpp b/src/QpskSymbolMapper.cpp index e26853a..c12ad80 100644 --- a/src/QpskSymbolMapper.cpp +++ b/src/QpskSymbolMapper.cpp @@ -23,7 +23,6 @@ #include <cstdio> #include <cstring> #include <stdexcept> -#include <complex> #include <cmath> #ifdef __SSE__ # include <xmmintrin.h> @@ -32,12 +31,10 @@ #include "QpskSymbolMapper.h" #include "PcDebug.h" - -typedef std::complex<float> complexf; - -QpskSymbolMapper::QpskSymbolMapper(size_t carriers) : +QpskSymbolMapper::QpskSymbolMapper(size_t carriers, bool fixedPoint) : ModCodec(), - d_carriers(carriers) { } + m_fixedPoint(fixedPoint), + m_carriers(carriers) { } int QpskSymbolMapper::process(Buffer* const dataIn, Buffer* dataOut) { @@ -45,112 +42,172 @@ int QpskSymbolMapper::process(Buffer* const dataIn, Buffer* dataOut) "(dataIn: %p, dataOut: %p)\n", dataIn, dataOut); - dataOut->setLength(dataIn->getLength() * 4 * 2 * sizeof(float)); // 4 output complex symbols per input byte -#ifdef __SSE__ - const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData()); - __m128* out = reinterpret_cast<__m128*>(dataOut->getData()); - - if (dataIn->getLength() % (d_carriers / 4) != 0) { - throw std::runtime_error( - "QpskSymbolMapper::process input size not valid: " + - std::to_string(dataIn->getLength()) + - "(input size) % (" + std::to_string(d_carriers) + - " (carriers) / 4) != 0"); - } + // 4 output complex symbols per input byte + + if (m_fixedPoint) { + dataOut->setLength(dataIn->getLength() * 4 * sizeof(complexfix)); + + using fixed_t = complexfix::value_type; - const static __m128 symbols[16] = { - _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), - _mm_setr_ps(-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2) - }; - size_t inOffset = 0; - size_t outOffset = 0; - uint8_t tmp = 0; - for (size_t i = 0; i < dataIn->getLength(); i += d_carriers / 4) { - for (size_t j = 0; j < d_carriers / 8; ++j) { - tmp = (in[inOffset] & 0xc0) >> 4; - tmp |= (in[inOffset + (d_carriers / 8)] & 0xc0) >> 6; - out[outOffset] = symbols[tmp]; - tmp = (in[inOffset] & 0x30) >> 2; - tmp |= (in[inOffset + (d_carriers / 8)] & 0x30) >> 4; - out[outOffset + 1] = symbols[tmp]; - tmp = (in[inOffset] & 0x0c); - tmp |= (in[inOffset + (d_carriers / 8)] & 0x0c) >> 2; - out[outOffset + 2] = symbols[tmp]; - tmp = (in[inOffset] & 0x03) << 2; - tmp |= (in[inOffset + (d_carriers / 8)] & 0x03); - out[outOffset + 3] = symbols[tmp]; - ++inOffset; - outOffset += 4; + const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData()); + fixed_t* out = reinterpret_cast<fixed_t*>(dataOut->getData()); + + if (dataIn->getLength() % (m_carriers / 4) != 0) { + throw std::runtime_error( + "QpskSymbolMapper::process input size not valid!"); + } + + constexpr fixed_t v = static_cast<fixed_t>(M_SQRT1_2); + + const static fixed_t symbols[16][4] = { + { v, v, v, v}, + { v, v, v, -v}, + { v, -v, v, v}, + { v, -v, v, -v}, + { v, v, -v, v}, + { v, v, -v, -v}, + { v, -v, -v, v}, + { v, -v, -v, -v}, + {-v, v, v, v}, + {-v, v, v, -v}, + {-v, -v, v, v}, + {-v, -v, v, -v}, + {-v, v, -v, v}, + {-v, v, -v, -v}, + {-v, -v, -v, v}, + {-v, -v, -v, -v} + }; + size_t inOffset = 0; + size_t outOffset = 0; + uint8_t tmp; + for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) { + for (size_t j = 0; j < m_carriers / 8; ++j) { + tmp = (in[inOffset] & 0xc0) >> 4; + tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6; + memcpy(&out[outOffset], symbols[tmp], sizeof(fixed_t) * 4); + tmp = (in[inOffset] & 0x30) >> 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4; + memcpy(&out[outOffset + 4], symbols[tmp], sizeof(fixed_t) * 4); + tmp = (in[inOffset] & 0x0c); + tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2; + memcpy(&out[outOffset + 8], symbols[tmp], sizeof(fixed_t) * 4); + tmp = (in[inOffset] & 0x03) << 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x03); + memcpy(&out[outOffset + 12], symbols[tmp], sizeof(fixed_t) * 4); + ++inOffset; + outOffset += 4*4; + } + inOffset += m_carriers / 8; } - inOffset += d_carriers / 8; } + else { + dataOut->setLength(dataIn->getLength() * 4 * sizeof(complexf)); +#ifdef __SSE__ + const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData()); + __m128* out = reinterpret_cast<__m128*>(dataOut->getData()); + + if (dataIn->getLength() % (m_carriers / 4) != 0) { + throw std::runtime_error( + "QpskSymbolMapper::process input size not valid: " + + std::to_string(dataIn->getLength()) + + "(input size) % (" + std::to_string(m_carriers) + + " (carriers) / 4) != 0"); + } + + const static __m128 symbols[16] = { + _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps( M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2), + _mm_setr_ps(-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2) + }; + size_t inOffset = 0; + size_t outOffset = 0; + uint8_t tmp = 0; + for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) { + for (size_t j = 0; j < m_carriers / 8; ++j) { + tmp = (in[inOffset] & 0xc0) >> 4; + tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6; + out[outOffset] = symbols[tmp]; + tmp = (in[inOffset] & 0x30) >> 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4; + out[outOffset + 1] = symbols[tmp]; + tmp = (in[inOffset] & 0x0c); + tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2; + out[outOffset + 2] = symbols[tmp]; + tmp = (in[inOffset] & 0x03) << 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x03); + out[outOffset + 3] = symbols[tmp]; + ++inOffset; + outOffset += 4; + } + inOffset += m_carriers / 8; + } #else // !__SSE__ - const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData()); - float* out = reinterpret_cast<float*>(dataOut->getData()); - if (dataIn->getLength() % (d_carriers / 4) != 0) { - throw std::runtime_error( - "QpskSymbolMapper::process input size not valid!"); - } - if (dataOut->getLength() / sizeof(float) != dataIn->getLength() * 4 * 2) { // 4 output complex symbols per input byte - throw std::runtime_error( - "QpskSymbolMapper::process output size not valid!"); - } + const uint8_t* in = reinterpret_cast<const uint8_t*>(dataIn->getData()); + float* out = reinterpret_cast<float*>(dataOut->getData()); + if (dataIn->getLength() % (m_carriers / 4) != 0) { + throw std::runtime_error( + "QpskSymbolMapper::process input size not valid!"); + } + if (dataOut->getLength() / sizeof(float) != dataIn->getLength() * 4 * 2) { // 4 output complex symbols per input byte + throw std::runtime_error( + "QpskSymbolMapper::process output size not valid!"); + } - const static float symbols[16][4] = { - { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, - { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, - { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, - { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, - { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, - { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, - { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, - { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, - {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, - {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, - {-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, - {-M_SQRT1_2,- M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, - {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, - {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, - {-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, - {-M_SQRT1_2,- M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2} - }; - size_t inOffset = 0; - size_t outOffset = 0; - uint8_t tmp; - for (size_t i = 0; i < dataIn->getLength(); i += d_carriers / 4) { - for (size_t j = 0; j < d_carriers / 8; ++j) { - tmp = (in[inOffset] & 0xc0) >> 4; - tmp |= (in[inOffset + (d_carriers / 8)] & 0xc0) >> 6; - memcpy(&out[outOffset], symbols[tmp], sizeof(float) * 4); - tmp = (in[inOffset] & 0x30) >> 2; - tmp |= (in[inOffset + (d_carriers / 8)] & 0x30) >> 4; - memcpy(&out[outOffset + 4], symbols[tmp], sizeof(float) * 4); - tmp = (in[inOffset] & 0x0c); - tmp |= (in[inOffset + (d_carriers / 8)] & 0x0c) >> 2; - memcpy(&out[outOffset + 8], symbols[tmp], sizeof(float) * 4); - tmp = (in[inOffset] & 0x03) << 2; - tmp |= (in[inOffset + (d_carriers / 8)] & 0x03); - memcpy(&out[outOffset + 12], symbols[tmp], sizeof(float) * 4); - ++inOffset; - outOffset += 4*4; + const static float symbols[16][4] = { + { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, + { M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, + { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, + { M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, + { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, + { M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, + { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, + { M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, + {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, + {-M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, + {-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2}, + {-M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2}, + {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, + {-M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2}, + {-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2}, + {-M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2} + }; + size_t inOffset = 0; + size_t outOffset = 0; + uint8_t tmp; + for (size_t i = 0; i < dataIn->getLength(); i += m_carriers / 4) { + for (size_t j = 0; j < m_carriers / 8; ++j) { + tmp = (in[inOffset] & 0xc0) >> 4; + tmp |= (in[inOffset + (m_carriers / 8)] & 0xc0) >> 6; + memcpy(&out[outOffset], symbols[tmp], sizeof(float) * 4); + tmp = (in[inOffset] & 0x30) >> 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x30) >> 4; + memcpy(&out[outOffset + 4], symbols[tmp], sizeof(float) * 4); + tmp = (in[inOffset] & 0x0c); + tmp |= (in[inOffset + (m_carriers / 8)] & 0x0c) >> 2; + memcpy(&out[outOffset + 8], symbols[tmp], sizeof(float) * 4); + tmp = (in[inOffset] & 0x03) << 2; + tmp |= (in[inOffset + (m_carriers / 8)] & 0x03); + memcpy(&out[outOffset + 12], symbols[tmp], sizeof(float) * 4); + ++inOffset; + outOffset += 4*4; + } + inOffset += m_carriers / 8; } - inOffset += d_carriers / 8; - } #endif // __SSE__ + } return 1; } diff --git a/src/QpskSymbolMapper.h b/src/QpskSymbolMapper.h index dbcf4dd..6cf7a2e 100644 --- a/src/QpskSymbolMapper.h +++ b/src/QpskSymbolMapper.h @@ -31,12 +31,13 @@ class QpskSymbolMapper : public ModCodec { public: - QpskSymbolMapper(size_t carriers); + QpskSymbolMapper(size_t carriers, bool fixedPoint); int process(Buffer* const dataIn, Buffer* dataOut); const char* name() { return "QpskSymbolMapper"; } protected: - size_t d_carriers; + bool m_fixedPoint; + size_t m_carriers; }; diff --git a/src/Resampler.h b/src/Resampler.h index d1a9f7a..2c810f6 100644 --- a/src/Resampler.h +++ b/src/Resampler.h @@ -37,9 +37,6 @@ #define FFT_TYPE fftwf_complex #define FFT_PLAN fftwf_plan -#include <complex> -typedef std::complex<float> complexf; - class Resampler : public ModCodec { diff --git a/src/SignalMultiplexer.cpp b/src/SignalMultiplexer.cpp index 1d95bdd..d4955d0 100644 --- a/src/SignalMultiplexer.cpp +++ b/src/SignalMultiplexer.cpp @@ -22,25 +22,20 @@ #include "SignalMultiplexer.h" #include "PcDebug.h" -#include <stdio.h> -#include <stdexcept> +#include <cstdio> #include <assert.h> -#include <string.h> -SignalMultiplexer::SignalMultiplexer(size_t framesize) : - ModMux(), - d_frameSize(framesize) +SignalMultiplexer::SignalMultiplexer() : + ModMux() { - PDEBUG("SignalMultiplexer::SignalMultiplexer(%zu) @ %p\n", framesize, this); - + PDEBUG("SignalMultiplexer::SignalMultiplexer() @ %p\n", this); } SignalMultiplexer::~SignalMultiplexer() { PDEBUG("SignalMultiplexer::~SignalMultiplexer() @ %p\n", this); - } diff --git a/src/SignalMultiplexer.h b/src/SignalMultiplexer.h index 5186a8d..1f6bc12 100644 --- a/src/SignalMultiplexer.h +++ b/src/SignalMultiplexer.h @@ -36,7 +36,7 @@ class SignalMultiplexer : public ModMux { public: - SignalMultiplexer(size_t frameSize); + SignalMultiplexer(); virtual ~SignalMultiplexer(); SignalMultiplexer(const SignalMultiplexer&); SignalMultiplexer& operator=(const SignalMultiplexer&); @@ -44,8 +44,5 @@ public: int process(std::vector<Buffer*> dataIn, Buffer* dataOut); const char* name() { return "SignalMultiplexer"; } - -protected: - size_t d_frameSize; }; diff --git a/src/TII.cpp b/src/TII.cpp index 2656cbf..bce15aa 100644 --- a/src/TII.cpp +++ b/src/TII.cpp @@ -2,7 +2,7 @@ Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty the Queen in Right of Canada (Communications Research Center Canada) - Copyright (C) 2023 + Copyright (C) 2024 Matthias P. Braendli, matthias.braendli@mpb.li http://opendigitalradio.org @@ -27,11 +27,8 @@ #include "TII.h" #include "PcDebug.h" -#include <stdio.h> -#include <stdexcept> -#include <string.h> - -typedef std::complex<float> complexf; +#include <cstdio> +#include <cstring> /* TII pattern for TM I, II, IV */ const int pattern_tm1_2_4[][8] = { // {{{ @@ -106,11 +103,12 @@ const int pattern_tm1_2_4[][8] = { // {{{ {1,1,1,0,1,0,0,0}, {1,1,1,1,0,0,0,0} }; // }}} -TII::TII(unsigned int dabmode, tii_config_t& tii_config) : +TII::TII(unsigned int dabmode, tii_config_t& tii_config, bool fixedPoint) : ModCodec(), RemoteControllable("tii"), m_dabmode(dabmode), - m_conf(tii_config) + m_conf(tii_config), + m_fixedPoint(fixedPoint) { PDEBUG("TII::TII(%u) @ %p\n", dabmode, this); @@ -171,56 +169,72 @@ const char* TII::name() return m_name.c_str(); } +template<typename T> +void do_process(size_t carriers, bool old_variant, const std::vector<bool>& Acp, Buffer* dataIn, Buffer* dataOut) +{ + const T* in = reinterpret_cast<const T*>(dataIn->getData()); + T* out = reinterpret_cast<T*>(dataOut->getData()); + + /* Normalise the TII carrier power according to ETSI TR 101 496-3 + * Clause 5.4.2.2 Paragraph 7: + * + * > The ratio of carriers in a TII symbol to a normal DAB symbol + * > is 1:48 for all Modes, so that the signal power in a TII symbol is + * > 16 dB below the signal power of the other symbols. + * + * This is because we only enable 32 out of 1536 carriers, not because + * every carrier is lower power. + */ + for (size_t i = 0; i < Acp.size(); i++) { + /* See header file for an explanation of the old variant. + * + * A_{c,p}(k) and A_{c,p}(k-1) are never both simultaneously true, + * so instead of doing the sum inside z_{m,0,k}, we could do + * + * if (m_Acp[i]) out[i] = in[i]; + * if (m_Acp[i-1]) out[i] = in[i-1] + * + * (Considering only the new variant) + * + * To avoid messing with indices, we substitute j = i-1 + * + * if (m_Acp[i]) out[i] = in[i]; + * if (m_Acp[j]) out[j+1] = in[j] + * + * and fuse the two conditionals together: + */ + if (Acp[i]) { + out[i] = in[i]; + out[i+1] = (old_variant ? in[i+1] : in[i]); + } + } +} int TII::process(Buffer* dataIn, Buffer* dataOut) { + const size_t sizeof_samples = m_fixedPoint ? sizeof(complexfix) : sizeof(complexf); + PDEBUG("TII::process(dataOut: %p)\n", dataOut); if ( (dataIn == NULL) or - (dataIn->getLength() != m_carriers * sizeof(complexf))) { + (dataIn->getLength() != m_carriers * sizeof_samples)) { throw TIIError("TII::process input size not valid!"); } - dataOut->setLength(m_carriers * sizeof(complexf)); - memset(dataOut->getData(), 0, dataOut->getLength()); + dataOut->setLength(m_carriers * sizeof_samples); + memset(dataOut->getData(), 0, dataOut->getLength()); if (m_conf.enable and m_insert) { std::lock_guard<std::mutex> lock(m_enabled_carriers_mutex); - complexf* in = reinterpret_cast<complexf*>(dataIn->getData()); - complexf* out = reinterpret_cast<complexf*>(dataOut->getData()); - - /* Normalise the TII carrier power according to ETSI TR 101 496-3 - * Clause 5.4.2.2 Paragraph 7: - * - * > The ratio of carriers in a TII symbol to a normal DAB symbol - * > is 1:48 for all Modes, so that the signal power in a TII symbol is - * > 16 dB below the signal power of the other symbols. - * - * This is because we only enable 32 out of 1536 carriers, not because - * every carrier is lower power. - */ - for (size_t i = 0; i < m_Acp.size(); i++) { - /* See header file for an explanation of the old variant. - * - * A_{c,p}(k) and A_{c,p}(k-1) are never both simultaneously true, - * so instead of doing the sum inside z_{m,0,k}, we could do - * - * if (m_Acp[i]) out[i] = in[i]; - * if (m_Acp[i-1]) out[i] = in[i-1] - * - * (Considering only the new variant) - * - * To avoid messing with indices, we substitute j = i-1 - * - * if (m_Acp[i]) out[i] = in[i]; - * if (m_Acp[j]) out[j+1] = in[j] - * - * and fuse the two conditionals together: - */ - if (m_Acp[i]) { - out[i] = in[i]; - out[i+1] = (m_conf.old_variant ? in[i+1] : in[i]); - } + if (m_fixedPoint) { + do_process<complexfix>( + m_carriers, m_conf.old_variant, m_Acp, + dataIn, dataOut); + } + else { + do_process<complexf>( + m_carriers, m_conf.old_variant, m_Acp, + dataIn, dataOut); } } @@ -2,7 +2,7 @@ Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Her Majesty the Queen in Right of Canada (Communications Research Center Canada) - Copyright (C) 2023 + Copyright (C) 2024 Matthias P. Braendli, matthias.braendli@mpb.li http://opendigitalradio.org @@ -36,8 +36,6 @@ #include "RemoteControl.h" #include <cstddef> -#include <thread> -#include <complex> #include <vector> #include <string> @@ -81,7 +79,7 @@ class TIIError : public std::runtime_error { class TII : public ModCodec, public RemoteControllable { public: - TII(unsigned int dabmode, tii_config_t& tii_config); + TII(unsigned int dabmode, tii_config_t& tii_config, bool fixedPoint); virtual ~TII() {} int process(Buffer* dataIn, Buffer* dataOut) override; @@ -106,6 +104,8 @@ class TII : public ModCodec, public RemoteControllable // Remote-controllable settings tii_config_t& m_conf; + bool m_fixedPoint = false; + // Internal flag when to insert TII bool m_insert = true; diff --git a/src/Utils.cpp b/src/Utils.cpp index fa2fd5d..f947acd 100644 --- a/src/Utils.cpp +++ b/src/Utils.cpp @@ -66,6 +66,9 @@ static void printHeader() #if defined(__SSE__) "SSE " << #endif +#if defined(__ARM_NEON) + "NEON " << +#endif "\n"; } diff --git a/src/output/Dexter.h b/src/output/Dexter.h index d4f425f..f8a17ba 100644 --- a/src/output/Dexter.h +++ b/src/output/Dexter.h @@ -98,16 +98,16 @@ class Dexter : public Output::SDRDevice SDRDeviceConfig& m_conf; - struct iio_context* m_ctx = nullptr; - struct iio_device* m_dexter_dsp_tx = nullptr; + struct iio_context *m_ctx = nullptr; + struct iio_device *m_dexter_dsp_tx = nullptr; - struct iio_device* m_ad9957 = nullptr; - struct iio_device* m_ad9957_tx0 = nullptr; - struct iio_channel* m_tx_channel = nullptr; + struct iio_device *m_ad9957 = nullptr; + struct iio_device *m_ad9957_tx0 = nullptr; + struct iio_channel *m_tx_channel = nullptr; struct iio_buffer *m_buffer = nullptr; /* Underflows are counted in a separate thread */ - struct iio_context* m_underflow_ctx = nullptr; + struct iio_context *m_underflow_ctx = nullptr; std::atomic<bool> m_running = ATOMIC_VAR_INIT(false); std::thread m_underflow_read_thread; void underflow_read_process(); diff --git a/src/output/SDR.cpp b/src/output/SDR.cpp index 594171f..22398c7 100644 --- a/src/output/SDR.cpp +++ b/src/output/SDR.cpp @@ -34,6 +34,7 @@ #include "RemoteControl.h" #include "Utils.h" +#include <chrono> #include <cmath> #include <iostream> #include <assert.h> diff --git a/src/output/SDR.h b/src/output/SDR.h index 960de0c..86bf295 100644 --- a/src/output/SDR.h +++ b/src/output/SDR.h @@ -34,16 +34,12 @@ DESCRIPTION: # include <config.h> #endif -#include <chrono> #include "ModPlugin.h" -#include "EtiReader.h" #include "output/SDRDevice.h" #include "output/Feedback.h" namespace Output { -using complexf = std::complex<float>; - class SDR : public ModOutput, public ModMetadata, public RemoteControllable { public: SDR(SDRDeviceConfig& config, std::shared_ptr<SDRDevice> device); diff --git a/src/output/SDRDevice.h b/src/output/SDRDevice.h index 378829c..ec9373d 100644 --- a/src/output/SDRDevice.h +++ b/src/output/SDRDevice.h @@ -38,9 +38,7 @@ DESCRIPTION: #include <string> #include <vector> #include <complex> -#include <variant> #include <optional> -#include <unordered_map> #include "TimestampDecoder.h" @@ -59,6 +57,8 @@ struct SDRDeviceConfig { std::string tx_antenna; std::string rx_antenna; + bool fixedPoint = false; + long masterClockRate = 32768000; unsigned sampleRate = 2048000; double frequency = 0.0; diff --git a/src/output/UHD.cpp b/src/output/UHD.cpp index e097692..b30f9e1 100644 --- a/src/output/UHD.cpp +++ b/src/output/UHD.cpp @@ -31,10 +31,7 @@ //#define MDEBUG(fmt, args...) fprintf(LOG, fmt , ## args) #define MDEBUG(fmt, args...) -#include "PcDebug.h" #include "Log.h" -#include "RemoteControl.h" -#include "Utils.h" #include <thread> #include <iomanip> @@ -52,14 +49,12 @@ # include <uhd/utils/thread_priority.hpp> #endif - -#include <cmath> #include <iostream> -#include <assert.h> +#include <cmath> +#include <cassert> #include <stdexcept> -#include <stdio.h> +#include <cstdio> #include <time.h> -#include <errno.h> #include <unistd.h> #include <pthread.h> @@ -235,7 +230,8 @@ UHD::UHD(SDRDeviceConfig& config) : m_usrp->set_rx_gain(m_conf.rxgain); etiLog.log(debug, "OutputUHD:Actual RX Gain: %f", m_usrp->get_rx_gain()); - const uhd::stream_args_t stream_args("fc32"); //complex floats + const uhd::stream_args_t stream_args( + m_conf.fixedPoint ? "sc16" : "fc32"); m_rx_stream = m_usrp->get_rx_stream(stream_args); m_tx_stream = m_usrp->get_tx_stream(stream_args); @@ -319,8 +315,9 @@ double UHD::get_bandwidth(void) const void UHD::transmit_frame(struct FrameData&& frame) { const double tx_timeout = 20.0; - const size_t sizeIn = frame.buf.size() / sizeof(complexf); - const complexf* in_data = reinterpret_cast<const complexf*>(&frame.buf[0]); + + const size_t sample_size = m_conf.fixedPoint ? (2 * sizeof(int16_t)) : sizeof(complexf); + const size_t sizeIn = frame.buf.size() / sample_size; uhd::tx_metadata_t md_tx; @@ -353,9 +350,9 @@ void UHD::transmit_frame(struct FrameData&& frame) samps_to_send <= usrp_max_num_samps ); m_require_timestamp_refresh = false; - //send a single packet + // send a single packet size_t num_tx_samps = m_tx_stream->send( - &in_data[num_acc_samps], + frame.buf.data() + sample_size * num_acc_samps, samps_to_send, md_tx, tx_timeout); etiLog.log(trace, "UHD,sent %zu of %zu", num_tx_samps, samps_to_send); diff --git a/src/output/UHD.h b/src/output/UHD.h index 9891c7a..c4f1a45 100644 --- a/src/output/UHD.h +++ b/src/output/UHD.h @@ -45,12 +45,9 @@ DESCRIPTION: #include <atomic> #include <thread> -#include "Log.h" #include "output/SDR.h" #include "output/USRPTime.h" #include "TimestampDecoder.h" -#include "RemoteControl.h" -#include "ThreadsafeQueue.h" #include <stdio.h> #include <sys/types.h> |