aboutsummaryrefslogtreecommitdiffstats
path: root/kiss
diff options
context:
space:
mode:
authorMatthias P. Braendli <matthias.braendli@mpb.li>2024-11-25 21:02:36 +0100
committerMatthias P. Braendli <matthias.braendli@mpb.li>2024-11-25 21:02:36 +0100
commit2e9500d4854a3db9e0f407021934407155b82776 (patch)
tree72681993fb7ebdadb9b9bc41fe9a6a8130ab1da3 /kiss
parent23b5d884dbdb4ce6a20872cce6a48ea0eed39f39 (diff)
parentd45cca6f447c9a72bc9eaeb9d861fa6fcff9e597 (diff)
downloaddabmod-2e9500d4854a3db9e0f407021934407155b82776.tar.gz
dabmod-2e9500d4854a3db9e0f407021934407155b82776.tar.bz2
dabmod-2e9500d4854a3db9e0f407021934407155b82776.zip
Merge branch 'fixedpoint' into next
Diffstat (limited to 'kiss')
-rw-r--r--kiss/CHANGELOG123
-rw-r--r--kiss/COPYING11
-rw-r--r--kiss/README.md245
-rw-r--r--kiss/_kiss_fft_guts.h167
-rw-r--r--kiss/kfc.c109
-rw-r--r--kiss/kfc.h54
-rw-r--r--kiss/kiss_fft.c420
-rw-r--r--kiss/kiss_fft.h160
-rw-r--r--kiss/kiss_fft_log.h36
-rw-r--r--kiss/kiss_fftnd.c188
-rw-r--r--kiss/kiss_fftnd.h26
-rw-r--r--kiss/kiss_fftndr.c120
-rw-r--r--kiss/kiss_fftndr.h55
-rw-r--r--kiss/kiss_fftr.c155
-rw-r--r--kiss/kiss_fftr.h54
15 files changed, 1923 insertions, 0 deletions
diff --git a/kiss/CHANGELOG b/kiss/CHANGELOG
new file mode 100644
index 0000000..2dd3603
--- /dev/null
+++ b/kiss/CHANGELOG
@@ -0,0 +1,123 @@
+1.3.0 2012-07-18
+ removed non-standard malloc.h from kiss_fft.h
+
+ moved -lm to end of link line
+
+ checked various return values
+
+ converted python Numeric code to NumPy
+
+ fixed test of int32_t on 64 bit OS
+
+ added padding in a couple of places to allow SIMD alignment of structs
+
+1.2.9 2010-05-27
+ threadsafe ( including OpenMP )
+
+ first edition of kissfft.hh the C++ template fft engine
+
+1.2.8
+ Changed memory.h to string.h -- apparently more standard
+
+ Added openmp extensions. This can have fairly linear speedups for larger FFT sizes.
+
+1.2.7
+ Shrank the real-fft memory footprint. Thanks to Galen Seitz.
+
+1.2.6 (Nov 14, 2006) The "thanks to GenArts" release.
+ Added multi-dimensional real-optimized FFT, see tools/kiss_fftndr
+ Thanks go to GenArts, Inc. for sponsoring the development.
+
+1.2.5 (June 27, 2006) The "release for no good reason" release.
+ Changed some harmless code to make some compilers' warnings go away.
+ Added some more digits to pi -- why not.
+ Added kiss_fft_next_fast_size() function to help people decide how much to pad.
+ Changed multidimensional test from 8 dimensions to only 3 to avoid testing
+ problems with fixed point (sorry Buckaroo Banzai).
+
+1.2.4 (Oct 27, 2005) The "oops, inverse fixed point real fft was borked" release.
+ Fixed scaling bug for inverse fixed point real fft -- also fixed test code that should've been failing.
+ Thanks to Jean-Marc Valin for bug report.
+
+ Use sys/types.h for more portable types than short,int,long => int16_t,int32_t,int64_t
+ If your system does not have these, you may need to define them -- but at least it breaks in a
+ loud and easily fixable way -- unlike silently using the wrong size type.
+
+ Hopefully tools/psdpng.c is fixed -- thanks to Steve Kellog for pointing out the weirdness.
+
+1.2.3 (June 25, 2005) The "you want to use WHAT as a sample" release.
+ Added ability to use 32 bit fixed point samples -- requires a 64 bit intermediate result, a la 'long long'
+
+ Added ability to do 4 FFTs in parallel by using SSE SIMD instructions. This is accomplished by
+ using the __m128 (vector of 4 floats) as kiss_fft_scalar. Define USE_SIMD to use this.
+
+ I know, I know ... this is drifting a bit from the "kiss" principle, but the speed advantages
+ make it worth it for some. Also recent gcc makes it SOO easy to use vectors of 4 floats like a POD type.
+
+1.2.2 (May 6, 2005) The Matthew release
+ Replaced fixed point division with multiply&shift. Thanks to Jean-Marc Valin for
+ discussions regarding. Considerable speedup for fixed-point.
+
+ Corrected overflow protection in real fft routines when using fixed point.
+ Finder's Credit goes to Robert Oschler of robodance for pointing me at the bug.
+ This also led to the CHECK_OVERFLOW_OP macro.
+
+1.2.1 (April 4, 2004)
+ compiles cleanly with just about every -W warning flag under the sun
+
+ reorganized kiss_fft_state so it could be read-only/const. This may be useful for embedded systems
+ that are willing to predeclare twiddle factors, factorization.
+
+ Fixed C_MUL,S_MUL on 16-bit platforms.
+
+ tmpbuf will only be allocated if input & output buffers are same
+ scratchbuf will only be allocated for ffts that are not multiples of 2,3,5
+
+ NOTE: The tmpbuf,scratchbuf changes may require synchronization code for multi-threaded apps.
+
+
+1.2 (Feb 23, 2004)
+ interface change -- cfg object is forward declaration of struct instead of void*
+ This maintains type saftey and lets the compiler warn/error about stupid mistakes.
+ (prompted by suggestion from Erik de Castro Lopo)
+
+ small speed improvements
+
+ added psdpng.c -- sample utility that will create png spectrum "waterfalls" from an input file
+ ( not terribly useful yet)
+
+1.1.1 (Feb 1, 2004 )
+ minor bug fix -- only affects odd rank, in-place, multi-dimensional FFTs
+
+1.1 : (Jan 30,2004)
+ split sample_code/ into test/ and tools/
+
+ Removed 2-D fft and added N-D fft (arbitrary)
+
+ modified fftutil.c to allow multi-d FFTs
+
+ Modified core fft routine to allow an input stride via kiss_fft_stride()
+ (eased support of multi-D ffts)
+
+ Added fast convolution filtering (FIR filtering using overlap-scrap method, with tail scrap)
+
+ Add kfc.[ch]: the KISS FFT Cache. It takes care of allocs for you ( suggested by Oscar Lesta ).
+
+1.0.1 (Dec 15, 2003)
+ fixed bug that occurred when nfft==1. Thanks to Steven Johnson.
+
+1.0 : (Dec 14, 2003)
+ changed kiss_fft function from using a single buffer, to two buffers.
+ If the same buffer pointer is supplied for both in and out, kiss will
+ manage the buffer copies.
+
+ added kiss_fft2d and kiss_fftr as separate source files (declarations in kiss_fft.h )
+
+0.4 :(Nov 4,2003) optimized for radix 2,3,4,5
+
+0.3 :(Oct 28, 2003) woops, version 2 didn't actually factor out any radices other than 2.
+ Thanks to Steven Johnson for finding this one.
+
+0.2 :(Oct 27, 2003) added mixed radix, only radix 2,4 optimized versions
+
+0.1 :(May 19 2003) initial release, radix 2 only
diff --git a/kiss/COPYING b/kiss/COPYING
new file mode 100644
index 0000000..6b4b622
--- /dev/null
+++ b/kiss/COPYING
@@ -0,0 +1,11 @@
+Copyright (c) 2003-2010 Mark Borgerding . All rights reserved.
+
+KISS FFT is provided under:
+
+ SPDX-License-Identifier: BSD-3-Clause
+
+Being under the terms of the BSD 3-clause "New" or "Revised" License,
+according with:
+
+ LICENSES/BSD-3-Clause
+
diff --git a/kiss/README.md b/kiss/README.md
new file mode 100644
index 0000000..1138a0c
--- /dev/null
+++ b/kiss/README.md
@@ -0,0 +1,245 @@
+# KISS FFT [![Build Status](https://travis-ci.com/mborgerding/kissfft.svg?branch=master)](https://travis-ci.com/mborgerding/kissfft)
+
+KISS FFT - A mixed-radix Fast Fourier Transform based up on the principle,
+"Keep It Simple, Stupid."
+
+There are many great fft libraries already around. Kiss FFT is not trying
+to be better than any of them. It only attempts to be a reasonably efficient,
+moderately useful FFT that can use fixed or floating data types and can be
+incorporated into someone's C program in a few minutes with trivial licensing.
+
+## USAGE:
+
+The basic usage for 1-d complex FFT is:
+
+```c
+ #include "kiss_fft.h"
+ kiss_fft_cfg cfg = kiss_fft_alloc( nfft ,is_inverse_fft ,0,0 );
+ while ...
+
+ ... // put kth sample in cx_in[k].r and cx_in[k].i
+
+ kiss_fft( cfg , cx_in , cx_out );
+
+ ... // transformed. DC is in cx_out[0].r and cx_out[0].i
+
+ kiss_fft_free(cfg);
+```
+ - **Note**: frequency-domain data is stored from dc up to 2pi.
+ so cx_out[0] is the dc bin of the FFT
+ and cx_out[nfft/2] is the Nyquist bin (if exists)
+
+Declarations are in "kiss_fft.h", along with a brief description of the
+functions you'll need to use.
+
+Code definitions for 1d complex FFTs are in kiss_fft.c.
+
+You can do other cool stuff with the extras you'll find in tools/
+> - multi-dimensional FFTs
+> - real-optimized FFTs (returns the positive half-spectrum:
+ (nfft/2+1) complex frequency bins)
+> - fast convolution FIR filtering (not available for fixed point)
+> - spectrum image creation
+
+The core fft and most tools/ code can be compiled to use float, double,
+ Q15 short or Q31 samples. The default is float.
+
+## BUILDING:
+
+There are two functionally-equivalent build systems supported by kissfft:
+
+ - Make (traditional Makefiles for Unix / Linux systems)
+ - CMake (more modern and feature-rich build system developed by Kitware)
+
+To build kissfft, the following build environment can be used:
+
+ - GNU build environment with GCC, Clang and GNU Make or CMake (>= 3.6)
+ - Microsoft Visual C++ (MSVC) with CMake (>= 3.6)
+
+Additional libraries required to build and test kissfft include:
+
+ - libpng for psdpng tool,
+ - libfftw3 to validate kissfft results against it,
+ - python 2/3 with Numpy to validate kissfft results against it.
+ - OpenMP supported by GCC, Clang or MSVC for multi-core FFT transformations
+
+Environments like Cygwin and MinGW can be highly likely used to build kissfft
+targeting Windows platform, but no tests were performed to the date.
+
+Both Make and CMake builds are easily configurable:
+
+ - `KISSFFT_DATATYPE=<datatype>` (for Make) or `-DKISSFFT_DATATYPE=<datatype>`
+ (for CMake) denote the principal datatype used by kissfft. It can be one
+ of the following:
+
+ - float (default)
+ - double
+ - int16_t
+ - int32_t
+ - SIMD (requires SSE instruction set support on target CPU)
+
+ - `KISSFFT_OPENMP=1` (for Make) or `-DKISSFFT_OPENMP=ON` (for CMake) builds kissfft
+ with OpenMP support. Please note that a supported compiler is required and this
+ option is turned off by default.
+
+ - `KISSFFT_STATIC=1` (for Make) or `-DKISSFFT_STATIC=ON` (for CMake) instructs
+ the builder to create static library ('.lib' for Windows / '.a' for Unix or Linux).
+ By default, this option is turned off and the shared library is created
+ ('.dll' for Windows, '.so' for Linux or Unix, '.dylib' for Mac OSX)
+
+ - `-DKISSFFT_TEST=OFF` (for CMake) disables building tests for kissfft. On Make,
+ building tests is done separately by 'make testall' or 'make testsingle', so
+ no specific setting is required.
+
+ - `KISSFFT_TOOLS=0` (for Make) or `-DKISSFFT_TOOLS=OFF` (for CMake) builds kissfft
+ without command-line tools like 'fastconv'. By default the tools are built.
+
+ - `KISSFFT_USE_ALLOCA=1` (for Make) or `-DKISSFFT_USE_ALLOCA=ON` (for CMake)
+ build kissfft with 'alloca' usage instead of 'malloc' / 'free'.
+
+ - `PREFIX=/full/path/to/installation/prefix/directory` (for Make) or
+ `-DCMAKE_INSTALL_PREFIX=/full/path/to/installation/prefix/directory` (for CMake)
+ specifies the prefix directory to install kissfft into.
+
+For example, to build kissfft as a static library with 'int16_t' datatype and
+OpenMP support using Make, run the command from kissfft source tree:
+
+```
+make KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 all
+```
+
+The same configuration for CMake is:
+
+```
+mkdir build && cd build
+cmake -DKISSFFT_DATATYPE=int16_t -DKISSFFT_STATIC=ON -DKISSFFT_OPENMP=ON ..
+make all
+```
+
+To specify '/tmp/1234' as installation prefix directory, run:
+
+
+```
+make PREFIX=/tmp/1234 KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 install
+```
+
+or
+
+```
+mkdir build && cd build
+cmake -DCMAKE_INSTALL_PREFIX=/tmp/1234 -DKISSFFT_DATATYPE=int16_t -DKISSFFT_STATIC=ON -DKISSFFT_OPENMP=ON ..
+make all
+make install
+```
+
+## TESTING:
+
+To validate the build configured as an example above, run the following command from
+kissfft source tree:
+
+```
+make KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 testsingle
+```
+
+if using Make, or:
+
+```
+make test
+```
+
+if using CMake.
+
+To test all possible build configurations, please run an extended testsuite from
+kissfft source tree:
+
+```
+sh test/kissfft-testsuite.sh
+```
+
+Please note that the extended testsuite takes around 20-40 minutes depending on device
+it runs on. This testsuite is useful for reporting bugs or testing the pull requests.
+
+## BACKGROUND
+
+I started coding this because I couldn't find a fixed point FFT that didn't
+use assembly code. I started with floating point numbers so I could get the
+theory straight before working on fixed point issues. In the end, I had a
+little bit of code that could be recompiled easily to do ffts with short, float
+or double (other types should be easy too).
+
+Once I got my FFT working, I was curious about the speed compared to
+a well respected and highly optimized fft library. I don't want to criticize
+this great library, so let's call it FFT_BRANDX.
+During this process, I learned:
+
+> 1. FFT_BRANDX has more than 100K lines of code. The core of kiss_fft is about 500 lines (cpx 1-d).
+> 2. It took me an embarrassingly long time to get FFT_BRANDX working.
+> 3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB (without optimizing for size).
+> 4. FFT_BRANDX is roughly twice as fast as KISS FFT in default mode.
+
+It is wonderful that free, highly optimized libraries like FFT_BRANDX exist.
+But such libraries carry a huge burden of complexity necessary to extract every
+last bit of performance.
+
+**Sometimes simpler is better, even if it's not better.**
+
+## FREQUENTLY ASKED QUESTIONS:
+> Q: Can I use kissfft in a project with a ___ license?</br>
+> A: Yes. See LICENSE below.
+
+> Q: Why don't I get the output I expect?</br>
+> A: The two most common causes of this are
+> 1) scaling : is there a constant multiplier between what you got and what you want?
+> 2) mixed build environment -- all code must be compiled with same preprocessor
+> definitions for FIXED_POINT and kiss_fft_scalar
+
+> Q: Will you write/debug my code for me?</br>
+> A: Probably not unless you pay me. I am happy to answer pointed and topical questions, but
+> I may refer you to a book, a forum, or some other resource.
+
+
+## PERFORMANCE
+ (on Athlon XP 2100+, with gcc 2.96, float data type)
+
+Kiss performed 10000 1024-pt cpx ffts in .63 s of cpu time.
+For comparison, it took md5sum twice as long to process the same amount of data.
+Transforming 5 minutes of CD quality audio takes less than a second (nfft=1024).
+
+**DO NOT:**
+- use Kiss if you need the Fastest Fourier Transform in the World
+- ask me to add features that will bloat the code
+
+## UNDER THE HOOD
+
+Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT. If you give it an input buffer
+and output buffer that are the same, a temporary buffer will be created to hold the data.
+
+No static data is used. The core routines of kiss_fft are thread-safe (but not all of the tools directory).[
+
+No scaling is done for the floating point version (for speed).
+Scaling is done both ways for the fixed-point version (for overflow prevention).
+
+Optimized butterflies are used for factors 2,3,4, and 5.
+
+The real (i.e. not complex) optimization code only works for even length ffts. It does two half-length
+FFTs in parallel (packed into real&imag), and then combines them via twiddling. The result is
+nfft/2+1 complex frequency bins from DC to Nyquist. If you don't know what this means, search the web.
+
+The fast convolution filtering uses the overlap-scrap method, slightly
+modified to put the scrap at the tail.
+
+## LICENSE
+ Revised BSD License, see COPYING for verbiage.
+ Basically, "free to use&change, give credit where due, no guarantees"
+ Note this license is compatible with GPL at one end of the spectrum and closed, commercial software at
+ the other end. See http://www.fsf.org/licensing/licenses
+
+## TODO
+ - Add real optimization for odd length FFTs
+ - Document/revisit the input/output fft scaling
+ - Make doc describing the overlap (tail) scrap fast convolution filtering in kiss_fastfir.c
+ - Test all the ./tools/ code with fixed point (kiss_fastfir.c doesn't work, maybe others)
+
+## AUTHOR
+ Mark Borgerding
+ Mark@Borgerding.net
diff --git a/kiss/_kiss_fft_guts.h b/kiss/_kiss_fft_guts.h
new file mode 100644
index 0000000..4bd8d1c
--- /dev/null
+++ b/kiss/_kiss_fft_guts.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+/* kiss_fft.h
+ defines kiss_fft_scalar as either short or a float type
+ and defines
+ typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
+
+#ifndef _kiss_fft_guts_h
+#define _kiss_fft_guts_h
+
+#include "kiss_fft.h"
+#include "kiss_fft_log.h"
+#include <limits.h>
+
+#define MAXFACTORS 32
+/* e.g. an fft of length 128 has 4 factors
+ as far as kissfft is concerned
+ 4*4*4*2
+ */
+
+struct kiss_fft_state{
+ int nfft;
+ int inverse;
+ int factors[2*MAXFACTORS];
+ kiss_fft_cpx twiddles[1];
+};
+
+/*
+ Explanation of macros dealing with complex math:
+
+ C_MUL(m,a,b) : m = a*b
+ C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise
+ C_SUB( res, a,b) : res = a - b
+ C_SUBFROM( res , a) : res -= a
+ C_ADDTO( res , a) : res += a
+ * */
+#ifdef FIXED_POINT
+#include <stdint.h>
+#if (FIXED_POINT==32)
+# define FRACBITS 31
+# define SAMPPROD int64_t
+#define SAMP_MAX INT32_MAX
+#define SAMP_MIN INT32_MIN
+#else
+# define FRACBITS 15
+# define SAMPPROD int32_t
+#define SAMP_MAX INT16_MAX
+#define SAMP_MIN INT16_MIN
+#endif
+
+#if defined(CHECK_OVERFLOW)
+# define CHECK_OVERFLOW_OP(a,op,b) \
+ if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
+ KISS_FFT_WARNING("overflow (%d " #op" %d) = %ld", (a),(b),(SAMPPROD)(a) op (SAMPPROD)(b)); }
+#endif
+
+
+# define smul(a,b) ( (SAMPPROD)(a)*(b) )
+# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
+
+# define S_MUL(a,b) sround( smul(a,b) )
+
+# define C_MUL(m,a,b) \
+ do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
+ (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
+
+# define DIVSCALAR(x,k) \
+ (x) = sround( smul( x, SAMP_MAX/k ) )
+
+# define C_FIXDIV(c,div) \
+ do { DIVSCALAR( (c).r , div); \
+ DIVSCALAR( (c).i , div); }while (0)
+
+# define C_MULBYSCALAR( c, s ) \
+ do{ (c).r = sround( smul( (c).r , s ) ) ;\
+ (c).i = sround( smul( (c).i , s ) ) ; }while(0)
+
+#else /* not FIXED_POINT*/
+
+# define S_MUL(a,b) ( (a)*(b) )
+#define C_MUL(m,a,b) \
+ do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
+ (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
+# define C_FIXDIV(c,div) /* NOOP */
+# define C_MULBYSCALAR( c, s ) \
+ do{ (c).r *= (s);\
+ (c).i *= (s); }while(0)
+#endif
+
+#ifndef CHECK_OVERFLOW_OP
+# define CHECK_OVERFLOW_OP(a,op,b) /* noop */
+#endif
+
+#define C_ADD( res, a,b)\
+ do { \
+ CHECK_OVERFLOW_OP((a).r,+,(b).r)\
+ CHECK_OVERFLOW_OP((a).i,+,(b).i)\
+ (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \
+ }while(0)
+#define C_SUB( res, a,b)\
+ do { \
+ CHECK_OVERFLOW_OP((a).r,-,(b).r)\
+ CHECK_OVERFLOW_OP((a).i,-,(b).i)\
+ (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \
+ }while(0)
+#define C_ADDTO( res , a)\
+ do { \
+ CHECK_OVERFLOW_OP((res).r,+,(a).r)\
+ CHECK_OVERFLOW_OP((res).i,+,(a).i)\
+ (res).r += (a).r; (res).i += (a).i;\
+ }while(0)
+
+#define C_SUBFROM( res , a)\
+ do {\
+ CHECK_OVERFLOW_OP((res).r,-,(a).r)\
+ CHECK_OVERFLOW_OP((res).i,-,(a).i)\
+ (res).r -= (a).r; (res).i -= (a).i; \
+ }while(0)
+
+
+#ifdef FIXED_POINT
+# define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase))
+# define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase))
+# define HALF_OF(x) ((x)>>1)
+#elif defined(USE_SIMD)
+# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
+# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
+# define HALF_OF(x) ((x)*_mm_set1_ps(.5))
+#else
+# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
+# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
+# define HALF_OF(x) ((x)*((kiss_fft_scalar).5))
+#endif
+
+#define kf_cexp(x,phase) \
+ do{ \
+ (x)->r = KISS_FFT_COS(phase);\
+ (x)->i = KISS_FFT_SIN(phase);\
+ }while(0)
+
+
+/* a debugging function */
+#define pcpx(c)\
+ KISS_FFT_DEBUG("%g + %gi\n",(double)((c)->r),(double)((c)->i))
+
+
+#ifdef KISS_FFT_USE_ALLOCA
+// define this to allow use of alloca instead of malloc for temporary buffers
+// Temporary buffers are used in two case:
+// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
+// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform.
+#include <alloca.h>
+#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
+#define KISS_FFT_TMP_FREE(ptr)
+#else
+#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
+#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
+#endif
+
+#endif /* _kiss_fft_guts_h */
+
diff --git a/kiss/kfc.c b/kiss/kfc.c
new file mode 100644
index 0000000..a405d9b
--- /dev/null
+++ b/kiss/kfc.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#include "kfc.h"
+
+typedef struct cached_fft *kfc_cfg;
+
+struct cached_fft
+{
+ int nfft;
+ int inverse;
+ kiss_fft_cfg cfg;
+ kfc_cfg next;
+};
+
+static kfc_cfg cache_root=NULL;
+static int ncached=0;
+
+static kiss_fft_cfg find_cached_fft(int nfft,int inverse)
+{
+ size_t len;
+ kfc_cfg cur=cache_root;
+ kfc_cfg prev=NULL;
+ while ( cur ) {
+ if ( cur->nfft == nfft && inverse == cur->inverse )
+ break;/*found the right node*/
+ prev = cur;
+ cur = prev->next;
+ }
+ if (cur== NULL) {
+ /* no cached node found, need to create a new one*/
+ kiss_fft_alloc(nfft,inverse,0,&len);
+#ifdef USE_SIMD
+ int padding = (16-sizeof(struct cached_fft)) & 15;
+ // make sure the cfg aligns on a 16 byte boundary
+ len += padding;
+#endif
+ cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len ));
+ if (cur == NULL)
+ return NULL;
+ cur->cfg = (kiss_fft_cfg)(cur+1);
+#ifdef USE_SIMD
+ cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding);
+#endif
+ kiss_fft_alloc(nfft,inverse,cur->cfg,&len);
+ cur->nfft=nfft;
+ cur->inverse=inverse;
+ cur->next = NULL;
+ if ( prev )
+ prev->next = cur;
+ else
+ cache_root = cur;
+ ++ncached;
+ }
+ return cur->cfg;
+}
+
+void kfc_cleanup(void)
+{
+ kfc_cfg cur=cache_root;
+ kfc_cfg next=NULL;
+ while (cur){
+ next = cur->next;
+ free(cur);
+ cur=next;
+ }
+ ncached=0;
+ cache_root = NULL;
+}
+void kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout)
+{
+ kiss_fft( find_cached_fft(nfft,0),fin,fout );
+}
+
+void kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout)
+{
+ kiss_fft( find_cached_fft(nfft,1),fin,fout );
+}
+
+#ifdef KFC_TEST
+static void check(int nc)
+{
+ if (ncached != nc) {
+ fprintf(stderr,"ncached should be %d,but it is %d\n",nc,ncached);
+ exit(1);
+ }
+}
+
+int main(void)
+{
+ kiss_fft_cpx buf1[1024],buf2[1024];
+ memset(buf1,0,sizeof(buf1));
+ check(0);
+ kfc_fft(512,buf1,buf2);
+ check(1);
+ kfc_fft(512,buf1,buf2);
+ check(1);
+ kfc_ifft(512,buf1,buf2);
+ check(2);
+ kfc_cleanup();
+ check(0);
+ return 0;
+}
+#endif
diff --git a/kiss/kfc.h b/kiss/kfc.h
new file mode 100644
index 0000000..d7d8c1b
--- /dev/null
+++ b/kiss/kfc.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef KFC_H
+#define KFC_H
+#include "kiss_fft.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+KFC -- Kiss FFT Cache
+
+Not needing to deal with kiss_fft_alloc and a config
+object may be handy for a lot of programs.
+
+KFC uses the underlying KISS FFT functions, but caches the config object.
+The first time kfc_fft or kfc_ifft for a given FFT size, the cfg
+object is created for it. All subsequent calls use the cached
+configuration object.
+
+NOTE:
+You should probably not use this if your program will be using a lot
+of various sizes of FFTs. There is a linear search through the
+cached objects. If you are only using one or two FFT sizes, this
+will be negligible. Otherwise, you may want to use another method
+of managing the cfg objects.
+
+ There is no automated cleanup of the cached objects. This could lead
+to large memory usage in a program that uses a lot of *DIFFERENT*
+sized FFTs. If you want to force all cached cfg objects to be freed,
+call kfc_cleanup.
+
+ */
+
+/*forward complex FFT */
+void KISS_FFT_API kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout);
+/*reverse complex FFT */
+void KISS_FFT_API kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout);
+
+/*free all cached objects*/
+void KISS_FFT_API kfc_cleanup(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/kiss/kiss_fft.c b/kiss/kiss_fft.c
new file mode 100644
index 0000000..58c24a0
--- /dev/null
+++ b/kiss/kiss_fft.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+
+#include "_kiss_fft_guts.h"
+/* The guts header contains all the multiplication and addition macros that are defined for
+ fixed or floating point complex numbers. It also delares the kf_ internal functions.
+ */
+
+static void kf_bfly2(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ kiss_fft_cpx * Fout2;
+ kiss_fft_cpx * tw1 = st->twiddles;
+ kiss_fft_cpx t;
+ Fout2 = Fout + m;
+ do{
+ C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
+
+ C_MUL (t, *Fout2 , *tw1);
+ tw1 += fstride;
+ C_SUB( *Fout2 , *Fout , t );
+ C_ADDTO( *Fout , t );
+ ++Fout2;
+ ++Fout;
+ }while (--m);
+}
+
+static void kf_bfly4(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ const size_t m
+ )
+{
+ kiss_fft_cpx *tw1,*tw2,*tw3;
+ kiss_fft_cpx scratch[6];
+ size_t k=m;
+ const size_t m2=2*m;
+ const size_t m3=3*m;
+
+
+ tw3 = tw2 = tw1 = st->twiddles;
+
+ do {
+ C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
+
+ C_MUL(scratch[0],Fout[m] , *tw1 );
+ C_MUL(scratch[1],Fout[m2] , *tw2 );
+ C_MUL(scratch[2],Fout[m3] , *tw3 );
+
+ C_SUB( scratch[5] , *Fout, scratch[1] );
+ C_ADDTO(*Fout, scratch[1]);
+ C_ADD( scratch[3] , scratch[0] , scratch[2] );
+ C_SUB( scratch[4] , scratch[0] , scratch[2] );
+ C_SUB( Fout[m2], *Fout, scratch[3] );
+ tw1 += fstride;
+ tw2 += fstride*2;
+ tw3 += fstride*3;
+ C_ADDTO( *Fout , scratch[3] );
+
+ if(st->inverse) {
+ Fout[m].r = scratch[5].r - scratch[4].i;
+ Fout[m].i = scratch[5].i + scratch[4].r;
+ Fout[m3].r = scratch[5].r + scratch[4].i;
+ Fout[m3].i = scratch[5].i - scratch[4].r;
+ }else{
+ Fout[m].r = scratch[5].r + scratch[4].i;
+ Fout[m].i = scratch[5].i - scratch[4].r;
+ Fout[m3].r = scratch[5].r - scratch[4].i;
+ Fout[m3].i = scratch[5].i + scratch[4].r;
+ }
+ ++Fout;
+ }while(--k);
+}
+
+static void kf_bfly3(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ size_t m
+ )
+{
+ size_t k=m;
+ const size_t m2 = 2*m;
+ kiss_fft_cpx *tw1,*tw2;
+ kiss_fft_cpx scratch[5];
+ kiss_fft_cpx epi3;
+ epi3 = st->twiddles[fstride*m];
+
+ tw1=tw2=st->twiddles;
+
+ do{
+ C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
+
+ C_MUL(scratch[1],Fout[m] , *tw1);
+ C_MUL(scratch[2],Fout[m2] , *tw2);
+
+ C_ADD(scratch[3],scratch[1],scratch[2]);
+ C_SUB(scratch[0],scratch[1],scratch[2]);
+ tw1 += fstride;
+ tw2 += fstride*2;
+
+ Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+ Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+ C_MULBYSCALAR( scratch[0] , epi3.i );
+
+ C_ADDTO(*Fout,scratch[3]);
+
+ Fout[m2].r = Fout[m].r + scratch[0].i;
+ Fout[m2].i = Fout[m].i - scratch[0].r;
+
+ Fout[m].r -= scratch[0].i;
+ Fout[m].i += scratch[0].r;
+
+ ++Fout;
+ }while(--k);
+}
+
+static void kf_bfly5(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+ int u;
+ kiss_fft_cpx scratch[13];
+ kiss_fft_cpx * twiddles = st->twiddles;
+ kiss_fft_cpx *tw;
+ kiss_fft_cpx ya,yb;
+ ya = twiddles[fstride*m];
+ yb = twiddles[fstride*2*m];
+
+ Fout0=Fout;
+ Fout1=Fout0+m;
+ Fout2=Fout0+2*m;
+ Fout3=Fout0+3*m;
+ Fout4=Fout0+4*m;
+
+ tw=st->twiddles;
+ for ( u=0; u<m; ++u ) {
+ C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+ scratch[0] = *Fout0;
+
+ C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+ C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+ C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+ C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+ C_ADD( scratch[7],scratch[1],scratch[4]);
+ C_SUB( scratch[10],scratch[1],scratch[4]);
+ C_ADD( scratch[8],scratch[2],scratch[3]);
+ C_SUB( scratch[9],scratch[2],scratch[3]);
+
+ Fout0->r += scratch[7].r + scratch[8].r;
+ Fout0->i += scratch[7].i + scratch[8].i;
+
+ scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+ scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+ scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
+ scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
+
+ C_SUB(*Fout1,scratch[5],scratch[6]);
+ C_ADD(*Fout4,scratch[5],scratch[6]);
+
+ scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+ scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+ scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
+ scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
+
+ C_ADD(*Fout2,scratch[11],scratch[12]);
+ C_SUB(*Fout3,scratch[11],scratch[12]);
+
+ ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+ }
+}
+
+/* perform the butterfly for one stage of a mixed radix FFT */
+static void kf_bfly_generic(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m,
+ int p
+ )
+{
+ int u,k,q1,q;
+ kiss_fft_cpx * twiddles = st->twiddles;
+ kiss_fft_cpx t;
+ int Norig = st->nfft;
+
+ kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
+ if (scratch == NULL){
+ KISS_FFT_ERROR("Memory allocation failed.");
+ return;
+ }
+
+ for ( u=0; u<m; ++u ) {
+ k=u;
+ for ( q1=0 ; q1<p ; ++q1 ) {
+ scratch[q1] = Fout[ k ];
+ C_FIXDIV(scratch[q1],p);
+ k += m;
+ }
+
+ k=u;
+ for ( q1=0 ; q1<p ; ++q1 ) {
+ int twidx=0;
+ Fout[ k ] = scratch[0];
+ for (q=1;q<p;++q ) {
+ twidx += fstride * k;
+ if (twidx>=Norig) twidx-=Norig;
+ C_MUL(t,scratch[q] , twiddles[twidx] );
+ C_ADDTO( Fout[ k ] ,t);
+ }
+ k += m;
+ }
+ }
+ KISS_FFT_TMP_FREE(scratch);
+}
+
+static
+void kf_work(
+ kiss_fft_cpx * Fout,
+ const kiss_fft_cpx * f,
+ const size_t fstride,
+ int in_stride,
+ int * factors,
+ const kiss_fft_cfg st
+ )
+{
+ kiss_fft_cpx * Fout_beg=Fout;
+ const int p=*factors++; /* the radix */
+ const int m=*factors++; /* stage's fft length/p */
+ const kiss_fft_cpx * Fout_end = Fout + p*m;
+
+#ifdef _OPENMP
+ // use openmp extensions at the
+ // top-level (not recursive)
+ if (fstride==1 && p<=5 && m!=1)
+ {
+ int k;
+
+ // execute the p different work units in different threads
+# pragma omp parallel for
+ for (k=0;k<p;++k)
+ kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
+ // all threads have joined by this point
+
+ switch (p) {
+ case 2: kf_bfly2(Fout,fstride,st,m); break;
+ case 3: kf_bfly3(Fout,fstride,st,m); break;
+ case 4: kf_bfly4(Fout,fstride,st,m); break;
+ case 5: kf_bfly5(Fout,fstride,st,m); break;
+ default: kf_bfly_generic(Fout,fstride,st,m,p); break;
+ }
+ return;
+ }
+#endif
+
+ if (m==1) {
+ do{
+ *Fout = *f;
+ f += fstride*in_stride;
+ }while(++Fout != Fout_end );
+ }else{
+ do{
+ // recursive call:
+ // DFT of size m*p performed by doing
+ // p instances of smaller DFTs of size m,
+ // each one takes a decimated version of the input
+ kf_work( Fout , f, fstride*p, in_stride, factors,st);
+ f += fstride*in_stride;
+ }while( (Fout += m) != Fout_end );
+ }
+
+ Fout=Fout_beg;
+
+ // recombine the p smaller DFTs
+ switch (p) {
+ case 2: kf_bfly2(Fout,fstride,st,m); break;
+ case 3: kf_bfly3(Fout,fstride,st,m); break;
+ case 4: kf_bfly4(Fout,fstride,st,m); break;
+ case 5: kf_bfly5(Fout,fstride,st,m); break;
+ default: kf_bfly_generic(Fout,fstride,st,m,p); break;
+ }
+}
+
+/* facbuf is populated by p1,m1,p2,m2, ...
+ where
+ p[i] * m[i] = m[i-1]
+ m0 = n */
+static
+void kf_factor(int n,int * facbuf)
+{
+ int p=4;
+ double floor_sqrt;
+ floor_sqrt = floor( sqrt((double)n) );
+
+ /*factor out powers of 4, powers of 2, then any remaining primes */
+ do {
+ while (n % p) {
+ switch (p) {
+ case 4: p = 2; break;
+ case 2: p = 3; break;
+ default: p += 2; break;
+ }
+ if (p > floor_sqrt)
+ p = n; /* no more factors, skip to end */
+ }
+ n /= p;
+ *facbuf++ = p;
+ *facbuf++ = n;
+ } while (n > 1);
+}
+
+/*
+ *
+ * User-callable function to allocate all necessary storage space for the fft.
+ *
+ * The return value is a contiguous block of memory, allocated with malloc. As such,
+ * It can be freed with free(), rather than a kiss_fft-specific function.
+ * */
+kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
+{
+ KISS_FFT_ALIGN_CHECK(mem)
+
+ kiss_fft_cfg st=NULL;
+ size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fft_state)
+ + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/
+
+ if ( lenmem==NULL ) {
+ st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
+ }else{
+ if (mem != NULL && *lenmem >= memneeded)
+ st = (kiss_fft_cfg)mem;
+ *lenmem = memneeded;
+ }
+ if (st) {
+ int i;
+ st->nfft=nfft;
+ st->inverse = inverse_fft;
+
+ for (i=0;i<nfft;++i) {
+ const double pi=3.141592653589793238462643383279502884197169399375105820974944;
+ double phase = -2*pi*i / nfft;
+ if (st->inverse)
+ phase *= -1;
+ kf_cexp(st->twiddles+i, phase );
+ }
+
+ kf_factor(nfft,st->factors);
+ }
+ return st;
+}
+
+
+void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
+{
+ if (fin == fout) {
+ //NOTE: this is not really an in-place FFT algorithm.
+ //It just performs an out-of-place FFT into a temp buffer
+ if (fout == NULL){
+ KISS_FFT_ERROR("fout buffer NULL.");
+ return;
+ }
+
+ kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
+ if (tmpbuf == NULL){
+ KISS_FFT_ERROR("Memory allocation error.");
+ return;
+ }
+
+
+
+ kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
+ memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
+ KISS_FFT_TMP_FREE(tmpbuf);
+ }else{
+ kf_work( fout, fin, 1,in_stride, st->factors,st );
+ }
+}
+
+void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+ kiss_fft_stride(cfg,fin,fout,1);
+}
+
+
+void kiss_fft_cleanup(void)
+{
+ // nothing needed any more
+}
+
+int kiss_fft_next_fast_size(int n)
+{
+ while(1) {
+ int m=n;
+ while ( (m%2) == 0 ) m/=2;
+ while ( (m%3) == 0 ) m/=3;
+ while ( (m%5) == 0 ) m/=5;
+ if (m<=1)
+ break; /* n is completely factorable by twos, threes, and fives */
+ n++;
+ }
+ return n;
+}
diff --git a/kiss/kiss_fft.h b/kiss/kiss_fft.h
new file mode 100644
index 0000000..dce1034
--- /dev/null
+++ b/kiss/kiss_fft.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef KISS_FFT_H
+#define KISS_FFT_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+
+// Define KISS_FFT_SHARED macro to properly export symbols
+#ifdef KISS_FFT_SHARED
+# ifdef _WIN32
+# ifdef KISS_FFT_BUILD
+# define KISS_FFT_API __declspec(dllexport)
+# else
+# define KISS_FFT_API __declspec(dllimport)
+# endif
+# else
+# define KISS_FFT_API __attribute__ ((visibility ("default")))
+# endif
+#else
+# define KISS_FFT_API
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ ATTENTION!
+ If you would like a :
+ -- a utility that will handle the caching of fft objects
+ -- real-only (no imaginary time component ) FFT
+ -- a multi-dimensional FFT
+ -- a command-line utility to perform ffts
+ -- a command-line utility to perform fast-convolution filtering
+
+ Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c
+ in the tools/ directory.
+*/
+
+/* User may override KISS_FFT_MALLOC and/or KISS_FFT_FREE. */
+#ifdef USE_SIMD
+# include <xmmintrin.h>
+# define kiss_fft_scalar __m128
+# ifndef KISS_FFT_MALLOC
+# define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
+# define KISS_FFT_ALIGN_CHECK(ptr)
+# define KISS_FFT_ALIGN_SIZE_UP(size) ((size + 15UL) & ~0xFUL)
+# endif
+# ifndef KISS_FFT_FREE
+# define KISS_FFT_FREE _mm_free
+# endif
+#else
+# define KISS_FFT_ALIGN_CHECK(ptr)
+# define KISS_FFT_ALIGN_SIZE_UP(size) (size)
+# ifndef KISS_FFT_MALLOC
+# define KISS_FFT_MALLOC malloc
+# endif
+# ifndef KISS_FFT_FREE
+# define KISS_FFT_FREE free
+# endif
+#endif
+
+
+#ifdef FIXED_POINT
+#include <stdint.h>
+# if (FIXED_POINT == 32)
+# define kiss_fft_scalar int32_t
+# else
+# define kiss_fft_scalar int16_t
+# endif
+#else
+# ifndef kiss_fft_scalar
+/* default is float */
+# define kiss_fft_scalar float
+# endif
+#endif
+
+typedef struct {
+ kiss_fft_scalar r;
+ kiss_fft_scalar i;
+}kiss_fft_cpx;
+
+typedef struct kiss_fft_state* kiss_fft_cfg;
+
+/*
+ * kiss_fft_alloc
+ *
+ * Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
+ *
+ * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
+ *
+ * The return value from fft_alloc is a cfg buffer used internally
+ * by the fft routine or NULL.
+ *
+ * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
+ * The returned value should be free()d when done to avoid memory leaks.
+ *
+ * The state can be placed in a user supplied buffer 'mem':
+ * If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
+ * then the function places the cfg in mem and the size used in *lenmem
+ * and returns mem.
+ *
+ * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
+ * then the function returns NULL and places the minimum cfg
+ * buffer size in *lenmem.
+ * */
+
+kiss_fft_cfg KISS_FFT_API kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem);
+
+/*
+ * kiss_fft(cfg,in_out_buf)
+ *
+ * Perform an FFT on a complex input buffer.
+ * for a forward FFT,
+ * fin should be f[0] , f[1] , ... ,f[nfft-1]
+ * fout will be F[0] , F[1] , ... ,F[nfft-1]
+ * Note that each element is complex and can be accessed like
+ f[k].r and f[k].i
+ * */
+void KISS_FFT_API kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+/*
+ A more generic version of the above function. It reads its input from every Nth sample.
+ * */
+void KISS_FFT_API kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride);
+
+/* If kiss_fft_alloc allocated a buffer, it is one contiguous
+ buffer and can be simply free()d when no longer needed*/
+#define kiss_fft_free KISS_FFT_FREE
+
+/*
+ Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up
+ your compiler output to call this before you exit.
+*/
+void KISS_FFT_API kiss_fft_cleanup(void);
+
+
+/*
+ * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
+ */
+int KISS_FFT_API kiss_fft_next_fast_size(int n);
+
+/* for real ffts, we need an even size */
+#define kiss_fftr_next_fast_size_real(n) \
+ (kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/kiss/kiss_fft_log.h b/kiss/kiss_fft_log.h
new file mode 100644
index 0000000..b5b631a
--- /dev/null
+++ b/kiss/kiss_fft_log.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef kiss_fft_log_h
+#define kiss_fft_log_h
+
+#define ERROR 1
+#define WARNING 2
+#define INFO 3
+#define DEBUG 4
+
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+
+#if defined(NDEBUG)
+# define KISS_FFT_LOG_MSG(severity, ...) ((void)0)
+#else
+# define KISS_FFT_LOG_MSG(severity, ...) \
+ fprintf(stderr, "[" #severity "] " __FILE__ ":" TOSTRING(__LINE__) " "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, "\n")
+#endif
+
+#define KISS_FFT_ERROR(...) KISS_FFT_LOG_MSG(ERROR, __VA_ARGS__)
+#define KISS_FFT_WARNING(...) KISS_FFT_LOG_MSG(WARNING, __VA_ARGS__)
+#define KISS_FFT_INFO(...) KISS_FFT_LOG_MSG(INFO, __VA_ARGS__)
+#define KISS_FFT_DEBUG(...) KISS_FFT_LOG_MSG(DEBUG, __VA_ARGS__)
+
+
+
+#endif /* kiss_fft_log_h */ \ No newline at end of file
diff --git a/kiss/kiss_fftnd.c b/kiss/kiss_fftnd.c
new file mode 100644
index 0000000..5d5b089
--- /dev/null
+++ b/kiss/kiss_fftnd.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#include "kiss_fftnd.h"
+#include "_kiss_fft_guts.h"
+
+struct kiss_fftnd_state{
+ int dimprod; /* dimsum would be mighty tasty right now */
+ int ndims;
+ int *dims;
+ kiss_fft_cfg *states; /* cfg states for each dimension */
+ kiss_fft_cpx * tmpbuf; /*buffer capable of hold the entire input */
+};
+
+kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem)
+{
+ KISS_FFT_ALIGN_CHECK(mem)
+
+ kiss_fftnd_cfg st = NULL;
+ int i;
+ int dimprod=1;
+ size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state));
+ char * ptr = NULL;
+
+ for (i=0;i<ndims;++i) {
+ size_t sublen=0;
+ kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen);
+ memneeded += sublen; /* st->states[i] */
+ dimprod *= dims[i];
+ }
+ memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims);/* st->dims */
+ memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims);/* st->states */
+ memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod); /* st->tmpbuf */
+
+ if (lenmem == NULL) {/* allocate for the caller*/
+ ptr = (char *) malloc (memneeded);
+ } else { /* initialize supplied buffer if big enough */
+ if (*lenmem >= memneeded)
+ ptr = (char *) mem;
+ *lenmem = memneeded; /*tell caller how big struct is (or would be) */
+ }
+ if (!ptr)
+ return NULL; /*malloc failed or buffer too small */
+
+ st = (kiss_fftnd_cfg) ptr;
+ st->dimprod = dimprod;
+ st->ndims = ndims;
+ ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state));
+
+ st->states = (kiss_fft_cfg *)ptr;
+ ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims);
+
+ st->dims = (int*)ptr;
+ ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims);
+
+ st->tmpbuf = (kiss_fft_cpx*)ptr;
+ ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod);
+
+ for (i=0;i<ndims;++i) {
+ size_t len;
+ st->dims[i] = dims[i];
+ kiss_fft_alloc (st->dims[i], inverse_fft, NULL, &len);
+ st->states[i] = kiss_fft_alloc (st->dims[i], inverse_fft, ptr,&len);
+ ptr += len;
+ }
+ /*
+Hi there!
+
+If you're looking at this particular code, it probably means you've got a brain-dead bounds checker
+that thinks the above code overwrites the end of the array.
+
+It doesn't.
+
+-- Mark
+
+P.S.
+The below code might give you some warm fuzzies and help convince you.
+ */
+ if ( ptr - (char*)st != (int)memneeded ) {
+ fprintf(stderr,
+ "################################################################################\n"
+ "Internal error! Memory allocation miscalculation\n"
+ "################################################################################\n"
+ );
+ }
+ return st;
+}
+
+/*
+ This works by tackling one dimension at a time.
+
+ In effect,
+ Each stage starts out by reshaping the matrix into a DixSi 2d matrix.
+ A Di-sized fft is taken of each column, transposing the matrix as it goes.
+
+Here's a 3-d example:
+Take a 2x3x4 matrix, laid out in memory as a contiguous buffer
+ [ [ [ a b c d ] [ e f g h ] [ i j k l ] ]
+ [ [ m n o p ] [ q r s t ] [ u v w x ] ] ]
+
+Stage 0 ( D=2): treat the buffer as a 2x12 matrix
+ [ [a b ... k l]
+ [m n ... w x] ]
+
+ FFT each column with size 2.
+ Transpose the matrix at the same time using kiss_fft_stride.
+
+ [ [ a+m a-m ]
+ [ b+n b-n]
+ ...
+ [ k+w k-w ]
+ [ l+x l-x ] ]
+
+ Note fft([x y]) == [x+y x-y]
+
+Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix,
+ [ [ a+m a-m b+n b-n c+o c-o d+p d-p ]
+ [ e+q e-q f+r f-r g+s g-s h+t h-t ]
+ [ i+u i-u j+v j-v k+w k-w l+x l-x ] ]
+
+ And perform FFTs (size=3) on each of the columns as above, transposing
+ the matrix as it goes. The output of stage 1 is
+ (Legend: ap = [ a+m e+q i+u ]
+ am = [ a-m e-q i-u ] )
+
+ [ [ sum(ap) fft(ap)[0] fft(ap)[1] ]
+ [ sum(am) fft(am)[0] fft(am)[1] ]
+ [ sum(bp) fft(bp)[0] fft(bp)[1] ]
+ [ sum(bm) fft(bm)[0] fft(bm)[1] ]
+ [ sum(cp) fft(cp)[0] fft(cp)[1] ]
+ [ sum(cm) fft(cm)[0] fft(cm)[1] ]
+ [ sum(dp) fft(dp)[0] fft(dp)[1] ]
+ [ sum(dm) fft(dm)[0] fft(dm)[1] ] ]
+
+Stage 2 ( D=4) treats this buffer as a 4*6 matrix,
+ [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ]
+ [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ]
+ [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ]
+ [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ] ]
+
+ Then FFTs each column, transposing as it goes.
+
+ The resulting matrix is the 3d FFT of the 2x3x4 input matrix.
+
+ Note as a sanity check that the first element of the final
+ stage's output (DC term) is
+ sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] )
+ , i.e. the summation of all 24 input elements.
+
+*/
+void kiss_fftnd(kiss_fftnd_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+ int i,k;
+ const kiss_fft_cpx * bufin=fin;
+ kiss_fft_cpx * bufout;
+
+ /*arrange it so the last bufout == fout*/
+ if ( st->ndims & 1 ) {
+ bufout = fout;
+ if (fin==fout) {
+ memcpy( st->tmpbuf, fin, sizeof(kiss_fft_cpx) * st->dimprod );
+ bufin = st->tmpbuf;
+ }
+ }else
+ bufout = st->tmpbuf;
+
+ for ( k=0; k < st->ndims; ++k) {
+ int curdim = st->dims[k];
+ int stride = st->dimprod / curdim;
+
+ for ( i=0 ; i<stride ; ++i )
+ kiss_fft_stride( st->states[k], bufin+i , bufout+i*curdim, stride );
+
+ /*toggle back and forth between the two buffers*/
+ if (bufout == st->tmpbuf){
+ bufout = fout;
+ bufin = st->tmpbuf;
+ }else{
+ bufout = st->tmpbuf;
+ bufin = fout;
+ }
+ }
+}
diff --git a/kiss/kiss_fftnd.h b/kiss/kiss_fftnd.h
new file mode 100644
index 0000000..956ba94
--- /dev/null
+++ b/kiss/kiss_fftnd.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef KISS_FFTND_H
+#define KISS_FFTND_H
+
+#include "kiss_fft.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct kiss_fftnd_state * kiss_fftnd_cfg;
+
+kiss_fftnd_cfg KISS_FFT_API kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem);
+void KISS_FFT_API kiss_fftnd(kiss_fftnd_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/kiss/kiss_fftndr.c b/kiss/kiss_fftndr.c
new file mode 100644
index 0000000..e979d03
--- /dev/null
+++ b/kiss/kiss_fftndr.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#include "kiss_fftndr.h"
+#include "_kiss_fft_guts.h"
+#define MAX(x,y) ( ( (x)<(y) )?(y):(x) )
+
+struct kiss_fftndr_state
+{
+ int dimReal;
+ int dimOther;
+ kiss_fftr_cfg cfg_r;
+ kiss_fftnd_cfg cfg_nd;
+ void * tmpbuf;
+};
+
+static int prod(const int *dims, int ndims)
+{
+ int x=1;
+ while (ndims--)
+ x *= *dims++;
+ return x;
+}
+
+kiss_fftndr_cfg kiss_fftndr_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem)
+{
+ KISS_FFT_ALIGN_CHECK(mem)
+
+ kiss_fftndr_cfg st = NULL;
+ size_t nr=0 , nd=0,ntmp=0;
+ int dimReal = dims[ndims-1];
+ int dimOther = prod(dims,ndims-1);
+ size_t memneeded;
+ char * ptr = NULL;
+
+ (void)kiss_fftr_alloc(dimReal,inverse_fft,NULL,&nr);
+ (void)kiss_fftnd_alloc(dims,ndims-1,inverse_fft,NULL,&nd);
+ ntmp =
+ MAX( 2*dimOther , dimReal+2) * sizeof(kiss_fft_scalar) // freq buffer for one pass
+ + dimOther*(dimReal+2) * sizeof(kiss_fft_scalar); // large enough to hold entire input in case of in-place
+
+ memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof( struct kiss_fftndr_state )) + KISS_FFT_ALIGN_SIZE_UP(nr) + KISS_FFT_ALIGN_SIZE_UP(nd) + KISS_FFT_ALIGN_SIZE_UP(ntmp);
+
+ if (lenmem==NULL) {
+ ptr = (char*) malloc(memneeded);
+ }else{
+ if (*lenmem >= memneeded)
+ ptr = (char *)mem;
+ *lenmem = memneeded;
+ }
+ if (ptr==NULL)
+ return NULL;
+
+ st = (kiss_fftndr_cfg) ptr;
+ memset( st , 0 , memneeded);
+ ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftndr_state));
+
+ st->dimReal = dimReal;
+ st->dimOther = dimOther;
+ st->cfg_r = kiss_fftr_alloc( dimReal,inverse_fft,ptr,&nr);
+ ptr += KISS_FFT_ALIGN_SIZE_UP(nr);
+ st->cfg_nd = kiss_fftnd_alloc(dims,ndims-1,inverse_fft, ptr,&nd);
+ ptr += KISS_FFT_ALIGN_SIZE_UP(nd);
+ st->tmpbuf = ptr;
+
+ return st;
+}
+
+void kiss_fftndr(kiss_fftndr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
+{
+ int k1,k2;
+ int dimReal = st->dimReal;
+ int dimOther = st->dimOther;
+ int nrbins = dimReal/2+1;
+
+ kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf;
+ kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther);
+
+ // timedata is N0 x N1 x ... x Nk real
+
+ // take a real chunk of data, fft it and place the output at correct intervals
+ for (k1=0;k1<dimOther;++k1) {
+ kiss_fftr( st->cfg_r, timedata + k1*dimReal , tmp1 ); // tmp1 now holds nrbins complex points
+ for (k2=0;k2<nrbins;++k2)
+ tmp2[ k2*dimOther+k1 ] = tmp1[k2];
+ }
+
+ for (k2=0;k2<nrbins;++k2) {
+ kiss_fftnd(st->cfg_nd, tmp2+k2*dimOther, tmp1); // tmp1 now holds dimOther complex points
+ for (k1=0;k1<dimOther;++k1)
+ freqdata[ k1*(nrbins) + k2] = tmp1[k1];
+ }
+}
+
+void kiss_fftndri(kiss_fftndr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
+{
+ int k1,k2;
+ int dimReal = st->dimReal;
+ int dimOther = st->dimOther;
+ int nrbins = dimReal/2+1;
+ kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf;
+ kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther);
+
+ for (k2=0;k2<nrbins;++k2) {
+ for (k1=0;k1<dimOther;++k1)
+ tmp1[k1] = freqdata[ k1*(nrbins) + k2 ];
+ kiss_fftnd(st->cfg_nd, tmp1, tmp2+k2*dimOther);
+ }
+
+ for (k1=0;k1<dimOther;++k1) {
+ for (k2=0;k2<nrbins;++k2)
+ tmp1[k2] = tmp2[ k2*dimOther+k1 ];
+ kiss_fftri( st->cfg_r,tmp1,timedata + k1*dimReal);
+ }
+}
diff --git a/kiss/kiss_fftndr.h b/kiss/kiss_fftndr.h
new file mode 100644
index 0000000..0d56a1f
--- /dev/null
+++ b/kiss/kiss_fftndr.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef KISS_NDR_H
+#define KISS_NDR_H
+
+#include "kiss_fft.h"
+#include "kiss_fftr.h"
+#include "kiss_fftnd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct kiss_fftndr_state *kiss_fftndr_cfg;
+
+
+kiss_fftndr_cfg KISS_FFT_API kiss_fftndr_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem);
+/*
+ dims[0] must be even
+
+ If you don't care to allocate space, use mem = lenmem = NULL
+*/
+
+
+void KISS_FFT_API kiss_fftndr(
+ kiss_fftndr_cfg cfg,
+ const kiss_fft_scalar *timedata,
+ kiss_fft_cpx *freqdata);
+/*
+ input timedata has dims[0] X dims[1] X ... X dims[ndims-1] scalar points
+ output freqdata has dims[0] X dims[1] X ... X dims[ndims-1]/2+1 complex points
+*/
+
+void KISS_FFT_API kiss_fftndri(
+ kiss_fftndr_cfg cfg,
+ const kiss_fft_cpx *freqdata,
+ kiss_fft_scalar *timedata);
+/*
+ input and output dimensions are the exact opposite of kiss_fftndr
+*/
+
+
+#define kiss_fftndr_free free
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/kiss/kiss_fftr.c b/kiss/kiss_fftr.c
new file mode 100644
index 0000000..778a9a6
--- /dev/null
+++ b/kiss/kiss_fftr.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#include "kiss_fftr.h"
+#include "_kiss_fft_guts.h"
+
+struct kiss_fftr_state{
+ kiss_fft_cfg substate;
+ kiss_fft_cpx * tmpbuf;
+ kiss_fft_cpx * super_twiddles;
+#ifdef USE_SIMD
+ void * pad;
+#endif
+};
+
+kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
+{
+ KISS_FFT_ALIGN_CHECK(mem)
+
+ int i;
+ kiss_fftr_cfg st = NULL;
+ size_t subsize = 0, memneeded;
+
+ if (nfft & 1) {
+ KISS_FFT_ERROR("Real FFT optimization must be even.");
+ return NULL;
+ }
+ nfft >>= 1;
+
+ kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
+ memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2);
+
+ if (lenmem == NULL) {
+ st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
+ } else {
+ if (*lenmem >= memneeded)
+ st = (kiss_fftr_cfg) mem;
+ *lenmem = memneeded;
+ }
+ if (!st)
+ return NULL;
+
+ st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
+ st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
+ st->super_twiddles = st->tmpbuf + nfft;
+ kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
+
+ for (i = 0; i < nfft/2; ++i) {
+ double phase =
+ -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5);
+ if (inverse_fft)
+ phase *= -1;
+ kf_cexp (st->super_twiddles+i,phase);
+ }
+ return st;
+}
+
+void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
+{
+ /* input buffer timedata is stored row-wise */
+ int k,ncfft;
+ kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
+
+ if ( st->substate->inverse) {
+ KISS_FFT_ERROR("kiss fft usage error: improper alloc");
+ return;/* The caller did not call the correct function */
+ }
+
+ ncfft = st->substate->nfft;
+
+ /*perform the parallel fft of two real signals packed in real,imag*/
+ kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
+ /* The real part of the DC element of the frequency spectrum in st->tmpbuf
+ * contains the sum of the even-numbered elements of the input time sequence
+ * The imag part is the sum of the odd-numbered elements
+ *
+ * The sum of tdc.r and tdc.i is the sum of the input time sequence.
+ * yielding DC of input time sequence
+ * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
+ * yielding Nyquist bin of input time sequence
+ */
+
+ tdc.r = st->tmpbuf[0].r;
+ tdc.i = st->tmpbuf[0].i;
+ C_FIXDIV(tdc,2);
+ CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
+ CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
+ freqdata[0].r = tdc.r + tdc.i;
+ freqdata[ncfft].r = tdc.r - tdc.i;
+#ifdef USE_SIMD
+ freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
+#else
+ freqdata[ncfft].i = freqdata[0].i = 0;
+#endif
+
+ for ( k=1;k <= ncfft/2 ; ++k ) {
+ fpk = st->tmpbuf[k];
+ fpnk.r = st->tmpbuf[ncfft-k].r;
+ fpnk.i = - st->tmpbuf[ncfft-k].i;
+ C_FIXDIV(fpk,2);
+ C_FIXDIV(fpnk,2);
+
+ C_ADD( f1k, fpk , fpnk );
+ C_SUB( f2k, fpk , fpnk );
+ C_MUL( tw , f2k , st->super_twiddles[k-1]);
+
+ freqdata[k].r = HALF_OF(f1k.r + tw.r);
+ freqdata[k].i = HALF_OF(f1k.i + tw.i);
+ freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
+ freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
+ }
+}
+
+void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
+{
+ /* input buffer timedata is stored row-wise */
+ int k, ncfft;
+
+ if (st->substate->inverse == 0) {
+ KISS_FFT_ERROR("kiss fft usage error: improper alloc");
+ return;/* The caller did not call the correct function */
+ }
+
+ ncfft = st->substate->nfft;
+
+ st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
+ st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
+ C_FIXDIV(st->tmpbuf[0],2);
+
+ for (k = 1; k <= ncfft / 2; ++k) {
+ kiss_fft_cpx fk, fnkc, fek, fok, tmp;
+ fk = freqdata[k];
+ fnkc.r = freqdata[ncfft - k].r;
+ fnkc.i = -freqdata[ncfft - k].i;
+ C_FIXDIV( fk , 2 );
+ C_FIXDIV( fnkc , 2 );
+
+ C_ADD (fek, fk, fnkc);
+ C_SUB (tmp, fk, fnkc);
+ C_MUL (fok, tmp, st->super_twiddles[k-1]);
+ C_ADD (st->tmpbuf[k], fek, fok);
+ C_SUB (st->tmpbuf[ncfft - k], fek, fok);
+#ifdef USE_SIMD
+ st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
+#else
+ st->tmpbuf[ncfft - k].i *= -1;
+#endif
+ }
+ kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
+}
diff --git a/kiss/kiss_fftr.h b/kiss/kiss_fftr.h
new file mode 100644
index 0000000..7fd73d2
--- /dev/null
+++ b/kiss/kiss_fftr.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+
+#ifndef KISS_FTR_H
+#define KISS_FTR_H
+
+#include "kiss_fft.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+
+ Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
+
+
+
+ */
+
+typedef struct kiss_fftr_state *kiss_fftr_cfg;
+
+
+kiss_fftr_cfg KISS_FFT_API kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
+/*
+ nfft must be even
+
+ If you don't care to allocate space, use mem = lenmem = NULL
+*/
+
+
+void KISS_FFT_API kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata);
+/*
+ input timedata has nfft scalar points
+ output freqdata has nfft/2+1 complex points
+*/
+
+void KISS_FFT_API kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata);
+/*
+ input freqdata has nfft/2+1 complex points
+ output timedata has nfft scalar points
+*/
+
+#define kiss_fftr_free KISS_FFT_FREE
+
+#ifdef __cplusplus
+}
+#endif
+#endif