diff options
Diffstat (limited to 'kiss')
-rw-r--r-- | kiss/CHANGELOG | 123 | ||||
-rw-r--r-- | kiss/COPYING | 11 | ||||
-rw-r--r-- | kiss/README.md | 245 | ||||
-rw-r--r-- | kiss/_kiss_fft_guts.h | 167 | ||||
-rw-r--r-- | kiss/kfc.c | 109 | ||||
-rw-r--r-- | kiss/kfc.h | 54 | ||||
-rw-r--r-- | kiss/kiss_fft.c | 420 | ||||
-rw-r--r-- | kiss/kiss_fft.h | 160 | ||||
-rw-r--r-- | kiss/kiss_fft_log.h | 36 | ||||
-rw-r--r-- | kiss/kiss_fftnd.c | 188 | ||||
-rw-r--r-- | kiss/kiss_fftnd.h | 26 | ||||
-rw-r--r-- | kiss/kiss_fftndr.c | 120 | ||||
-rw-r--r-- | kiss/kiss_fftndr.h | 55 | ||||
-rw-r--r-- | kiss/kiss_fftr.c | 155 | ||||
-rw-r--r-- | kiss/kiss_fftr.h | 54 |
15 files changed, 1923 insertions, 0 deletions
diff --git a/kiss/CHANGELOG b/kiss/CHANGELOG new file mode 100644 index 0000000..2dd3603 --- /dev/null +++ b/kiss/CHANGELOG @@ -0,0 +1,123 @@ +1.3.0 2012-07-18 + removed non-standard malloc.h from kiss_fft.h + + moved -lm to end of link line + + checked various return values + + converted python Numeric code to NumPy + + fixed test of int32_t on 64 bit OS + + added padding in a couple of places to allow SIMD alignment of structs + +1.2.9 2010-05-27 + threadsafe ( including OpenMP ) + + first edition of kissfft.hh the C++ template fft engine + +1.2.8 + Changed memory.h to string.h -- apparently more standard + + Added openmp extensions. This can have fairly linear speedups for larger FFT sizes. + +1.2.7 + Shrank the real-fft memory footprint. Thanks to Galen Seitz. + +1.2.6 (Nov 14, 2006) The "thanks to GenArts" release. + Added multi-dimensional real-optimized FFT, see tools/kiss_fftndr + Thanks go to GenArts, Inc. for sponsoring the development. + +1.2.5 (June 27, 2006) The "release for no good reason" release. + Changed some harmless code to make some compilers' warnings go away. + Added some more digits to pi -- why not. + Added kiss_fft_next_fast_size() function to help people decide how much to pad. + Changed multidimensional test from 8 dimensions to only 3 to avoid testing + problems with fixed point (sorry Buckaroo Banzai). + +1.2.4 (Oct 27, 2005) The "oops, inverse fixed point real fft was borked" release. + Fixed scaling bug for inverse fixed point real fft -- also fixed test code that should've been failing. + Thanks to Jean-Marc Valin for bug report. + + Use sys/types.h for more portable types than short,int,long => int16_t,int32_t,int64_t + If your system does not have these, you may need to define them -- but at least it breaks in a + loud and easily fixable way -- unlike silently using the wrong size type. + + Hopefully tools/psdpng.c is fixed -- thanks to Steve Kellog for pointing out the weirdness. + +1.2.3 (June 25, 2005) The "you want to use WHAT as a sample" release. + Added ability to use 32 bit fixed point samples -- requires a 64 bit intermediate result, a la 'long long' + + Added ability to do 4 FFTs in parallel by using SSE SIMD instructions. This is accomplished by + using the __m128 (vector of 4 floats) as kiss_fft_scalar. Define USE_SIMD to use this. + + I know, I know ... this is drifting a bit from the "kiss" principle, but the speed advantages + make it worth it for some. Also recent gcc makes it SOO easy to use vectors of 4 floats like a POD type. + +1.2.2 (May 6, 2005) The Matthew release + Replaced fixed point division with multiply&shift. Thanks to Jean-Marc Valin for + discussions regarding. Considerable speedup for fixed-point. + + Corrected overflow protection in real fft routines when using fixed point. + Finder's Credit goes to Robert Oschler of robodance for pointing me at the bug. + This also led to the CHECK_OVERFLOW_OP macro. + +1.2.1 (April 4, 2004) + compiles cleanly with just about every -W warning flag under the sun + + reorganized kiss_fft_state so it could be read-only/const. This may be useful for embedded systems + that are willing to predeclare twiddle factors, factorization. + + Fixed C_MUL,S_MUL on 16-bit platforms. + + tmpbuf will only be allocated if input & output buffers are same + scratchbuf will only be allocated for ffts that are not multiples of 2,3,5 + + NOTE: The tmpbuf,scratchbuf changes may require synchronization code for multi-threaded apps. + + +1.2 (Feb 23, 2004) + interface change -- cfg object is forward declaration of struct instead of void* + This maintains type saftey and lets the compiler warn/error about stupid mistakes. + (prompted by suggestion from Erik de Castro Lopo) + + small speed improvements + + added psdpng.c -- sample utility that will create png spectrum "waterfalls" from an input file + ( not terribly useful yet) + +1.1.1 (Feb 1, 2004 ) + minor bug fix -- only affects odd rank, in-place, multi-dimensional FFTs + +1.1 : (Jan 30,2004) + split sample_code/ into test/ and tools/ + + Removed 2-D fft and added N-D fft (arbitrary) + + modified fftutil.c to allow multi-d FFTs + + Modified core fft routine to allow an input stride via kiss_fft_stride() + (eased support of multi-D ffts) + + Added fast convolution filtering (FIR filtering using overlap-scrap method, with tail scrap) + + Add kfc.[ch]: the KISS FFT Cache. It takes care of allocs for you ( suggested by Oscar Lesta ). + +1.0.1 (Dec 15, 2003) + fixed bug that occurred when nfft==1. Thanks to Steven Johnson. + +1.0 : (Dec 14, 2003) + changed kiss_fft function from using a single buffer, to two buffers. + If the same buffer pointer is supplied for both in and out, kiss will + manage the buffer copies. + + added kiss_fft2d and kiss_fftr as separate source files (declarations in kiss_fft.h ) + +0.4 :(Nov 4,2003) optimized for radix 2,3,4,5 + +0.3 :(Oct 28, 2003) woops, version 2 didn't actually factor out any radices other than 2. + Thanks to Steven Johnson for finding this one. + +0.2 :(Oct 27, 2003) added mixed radix, only radix 2,4 optimized versions + +0.1 :(May 19 2003) initial release, radix 2 only diff --git a/kiss/COPYING b/kiss/COPYING new file mode 100644 index 0000000..6b4b622 --- /dev/null +++ b/kiss/COPYING @@ -0,0 +1,11 @@ +Copyright (c) 2003-2010 Mark Borgerding . All rights reserved. + +KISS FFT is provided under: + + SPDX-License-Identifier: BSD-3-Clause + +Being under the terms of the BSD 3-clause "New" or "Revised" License, +according with: + + LICENSES/BSD-3-Clause + diff --git a/kiss/README.md b/kiss/README.md new file mode 100644 index 0000000..1138a0c --- /dev/null +++ b/kiss/README.md @@ -0,0 +1,245 @@ +# KISS FFT [![Build Status](https://travis-ci.com/mborgerding/kissfft.svg?branch=master)](https://travis-ci.com/mborgerding/kissfft) + +KISS FFT - A mixed-radix Fast Fourier Transform based up on the principle, +"Keep It Simple, Stupid." + +There are many great fft libraries already around. Kiss FFT is not trying +to be better than any of them. It only attempts to be a reasonably efficient, +moderately useful FFT that can use fixed or floating data types and can be +incorporated into someone's C program in a few minutes with trivial licensing. + +## USAGE: + +The basic usage for 1-d complex FFT is: + +```c + #include "kiss_fft.h" + kiss_fft_cfg cfg = kiss_fft_alloc( nfft ,is_inverse_fft ,0,0 ); + while ... + + ... // put kth sample in cx_in[k].r and cx_in[k].i + + kiss_fft( cfg , cx_in , cx_out ); + + ... // transformed. DC is in cx_out[0].r and cx_out[0].i + + kiss_fft_free(cfg); +``` + - **Note**: frequency-domain data is stored from dc up to 2pi. + so cx_out[0] is the dc bin of the FFT + and cx_out[nfft/2] is the Nyquist bin (if exists) + +Declarations are in "kiss_fft.h", along with a brief description of the +functions you'll need to use. + +Code definitions for 1d complex FFTs are in kiss_fft.c. + +You can do other cool stuff with the extras you'll find in tools/ +> - multi-dimensional FFTs +> - real-optimized FFTs (returns the positive half-spectrum: + (nfft/2+1) complex frequency bins) +> - fast convolution FIR filtering (not available for fixed point) +> - spectrum image creation + +The core fft and most tools/ code can be compiled to use float, double, + Q15 short or Q31 samples. The default is float. + +## BUILDING: + +There are two functionally-equivalent build systems supported by kissfft: + + - Make (traditional Makefiles for Unix / Linux systems) + - CMake (more modern and feature-rich build system developed by Kitware) + +To build kissfft, the following build environment can be used: + + - GNU build environment with GCC, Clang and GNU Make or CMake (>= 3.6) + - Microsoft Visual C++ (MSVC) with CMake (>= 3.6) + +Additional libraries required to build and test kissfft include: + + - libpng for psdpng tool, + - libfftw3 to validate kissfft results against it, + - python 2/3 with Numpy to validate kissfft results against it. + - OpenMP supported by GCC, Clang or MSVC for multi-core FFT transformations + +Environments like Cygwin and MinGW can be highly likely used to build kissfft +targeting Windows platform, but no tests were performed to the date. + +Both Make and CMake builds are easily configurable: + + - `KISSFFT_DATATYPE=<datatype>` (for Make) or `-DKISSFFT_DATATYPE=<datatype>` + (for CMake) denote the principal datatype used by kissfft. It can be one + of the following: + + - float (default) + - double + - int16_t + - int32_t + - SIMD (requires SSE instruction set support on target CPU) + + - `KISSFFT_OPENMP=1` (for Make) or `-DKISSFFT_OPENMP=ON` (for CMake) builds kissfft + with OpenMP support. Please note that a supported compiler is required and this + option is turned off by default. + + - `KISSFFT_STATIC=1` (for Make) or `-DKISSFFT_STATIC=ON` (for CMake) instructs + the builder to create static library ('.lib' for Windows / '.a' for Unix or Linux). + By default, this option is turned off and the shared library is created + ('.dll' for Windows, '.so' for Linux or Unix, '.dylib' for Mac OSX) + + - `-DKISSFFT_TEST=OFF` (for CMake) disables building tests for kissfft. On Make, + building tests is done separately by 'make testall' or 'make testsingle', so + no specific setting is required. + + - `KISSFFT_TOOLS=0` (for Make) or `-DKISSFFT_TOOLS=OFF` (for CMake) builds kissfft + without command-line tools like 'fastconv'. By default the tools are built. + + - `KISSFFT_USE_ALLOCA=1` (for Make) or `-DKISSFFT_USE_ALLOCA=ON` (for CMake) + build kissfft with 'alloca' usage instead of 'malloc' / 'free'. + + - `PREFIX=/full/path/to/installation/prefix/directory` (for Make) or + `-DCMAKE_INSTALL_PREFIX=/full/path/to/installation/prefix/directory` (for CMake) + specifies the prefix directory to install kissfft into. + +For example, to build kissfft as a static library with 'int16_t' datatype and +OpenMP support using Make, run the command from kissfft source tree: + +``` +make KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 all +``` + +The same configuration for CMake is: + +``` +mkdir build && cd build +cmake -DKISSFFT_DATATYPE=int16_t -DKISSFFT_STATIC=ON -DKISSFFT_OPENMP=ON .. +make all +``` + +To specify '/tmp/1234' as installation prefix directory, run: + + +``` +make PREFIX=/tmp/1234 KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 install +``` + +or + +``` +mkdir build && cd build +cmake -DCMAKE_INSTALL_PREFIX=/tmp/1234 -DKISSFFT_DATATYPE=int16_t -DKISSFFT_STATIC=ON -DKISSFFT_OPENMP=ON .. +make all +make install +``` + +## TESTING: + +To validate the build configured as an example above, run the following command from +kissfft source tree: + +``` +make KISSFFT_DATATYPE=int16_t KISSFFT_STATIC=1 KISSFFT_OPENMP=1 testsingle +``` + +if using Make, or: + +``` +make test +``` + +if using CMake. + +To test all possible build configurations, please run an extended testsuite from +kissfft source tree: + +``` +sh test/kissfft-testsuite.sh +``` + +Please note that the extended testsuite takes around 20-40 minutes depending on device +it runs on. This testsuite is useful for reporting bugs or testing the pull requests. + +## BACKGROUND + +I started coding this because I couldn't find a fixed point FFT that didn't +use assembly code. I started with floating point numbers so I could get the +theory straight before working on fixed point issues. In the end, I had a +little bit of code that could be recompiled easily to do ffts with short, float +or double (other types should be easy too). + +Once I got my FFT working, I was curious about the speed compared to +a well respected and highly optimized fft library. I don't want to criticize +this great library, so let's call it FFT_BRANDX. +During this process, I learned: + +> 1. FFT_BRANDX has more than 100K lines of code. The core of kiss_fft is about 500 lines (cpx 1-d). +> 2. It took me an embarrassingly long time to get FFT_BRANDX working. +> 3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB (without optimizing for size). +> 4. FFT_BRANDX is roughly twice as fast as KISS FFT in default mode. + +It is wonderful that free, highly optimized libraries like FFT_BRANDX exist. +But such libraries carry a huge burden of complexity necessary to extract every +last bit of performance. + +**Sometimes simpler is better, even if it's not better.** + +## FREQUENTLY ASKED QUESTIONS: +> Q: Can I use kissfft in a project with a ___ license?</br> +> A: Yes. See LICENSE below. + +> Q: Why don't I get the output I expect?</br> +> A: The two most common causes of this are +> 1) scaling : is there a constant multiplier between what you got and what you want? +> 2) mixed build environment -- all code must be compiled with same preprocessor +> definitions for FIXED_POINT and kiss_fft_scalar + +> Q: Will you write/debug my code for me?</br> +> A: Probably not unless you pay me. I am happy to answer pointed and topical questions, but +> I may refer you to a book, a forum, or some other resource. + + +## PERFORMANCE + (on Athlon XP 2100+, with gcc 2.96, float data type) + +Kiss performed 10000 1024-pt cpx ffts in .63 s of cpu time. +For comparison, it took md5sum twice as long to process the same amount of data. +Transforming 5 minutes of CD quality audio takes less than a second (nfft=1024). + +**DO NOT:** +- use Kiss if you need the Fastest Fourier Transform in the World +- ask me to add features that will bloat the code + +## UNDER THE HOOD + +Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT. If you give it an input buffer +and output buffer that are the same, a temporary buffer will be created to hold the data. + +No static data is used. The core routines of kiss_fft are thread-safe (but not all of the tools directory).[ + +No scaling is done for the floating point version (for speed). +Scaling is done both ways for the fixed-point version (for overflow prevention). + +Optimized butterflies are used for factors 2,3,4, and 5. + +The real (i.e. not complex) optimization code only works for even length ffts. It does two half-length +FFTs in parallel (packed into real&imag), and then combines them via twiddling. The result is +nfft/2+1 complex frequency bins from DC to Nyquist. If you don't know what this means, search the web. + +The fast convolution filtering uses the overlap-scrap method, slightly +modified to put the scrap at the tail. + +## LICENSE + Revised BSD License, see COPYING for verbiage. + Basically, "free to use&change, give credit where due, no guarantees" + Note this license is compatible with GPL at one end of the spectrum and closed, commercial software at + the other end. See http://www.fsf.org/licensing/licenses + +## TODO + - Add real optimization for odd length FFTs + - Document/revisit the input/output fft scaling + - Make doc describing the overlap (tail) scrap fast convolution filtering in kiss_fastfir.c + - Test all the ./tools/ code with fixed point (kiss_fastfir.c doesn't work, maybe others) + +## AUTHOR + Mark Borgerding + Mark@Borgerding.net diff --git a/kiss/_kiss_fft_guts.h b/kiss/_kiss_fft_guts.h new file mode 100644 index 0000000..4bd8d1c --- /dev/null +++ b/kiss/_kiss_fft_guts.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +/* kiss_fft.h + defines kiss_fft_scalar as either short or a float type + and defines + typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ + +#ifndef _kiss_fft_guts_h +#define _kiss_fft_guts_h + +#include "kiss_fft.h" +#include "kiss_fft_log.h" +#include <limits.h> + +#define MAXFACTORS 32 +/* e.g. an fft of length 128 has 4 factors + as far as kissfft is concerned + 4*4*4*2 + */ + +struct kiss_fft_state{ + int nfft; + int inverse; + int factors[2*MAXFACTORS]; + kiss_fft_cpx twiddles[1]; +}; + +/* + Explanation of macros dealing with complex math: + + C_MUL(m,a,b) : m = a*b + C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise + C_SUB( res, a,b) : res = a - b + C_SUBFROM( res , a) : res -= a + C_ADDTO( res , a) : res += a + * */ +#ifdef FIXED_POINT +#include <stdint.h> +#if (FIXED_POINT==32) +# define FRACBITS 31 +# define SAMPPROD int64_t +#define SAMP_MAX INT32_MAX +#define SAMP_MIN INT32_MIN +#else +# define FRACBITS 15 +# define SAMPPROD int32_t +#define SAMP_MAX INT16_MAX +#define SAMP_MIN INT16_MIN +#endif + +#if defined(CHECK_OVERFLOW) +# define CHECK_OVERFLOW_OP(a,op,b) \ + if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \ + KISS_FFT_WARNING("overflow (%d " #op" %d) = %ld", (a),(b),(SAMPPROD)(a) op (SAMPPROD)(b)); } +#endif + + +# define smul(a,b) ( (SAMPPROD)(a)*(b) ) +# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS ) + +# define S_MUL(a,b) sround( smul(a,b) ) + +# define C_MUL(m,a,b) \ + do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \ + (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0) + +# define DIVSCALAR(x,k) \ + (x) = sround( smul( x, SAMP_MAX/k ) ) + +# define C_FIXDIV(c,div) \ + do { DIVSCALAR( (c).r , div); \ + DIVSCALAR( (c).i , div); }while (0) + +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r = sround( smul( (c).r , s ) ) ;\ + (c).i = sround( smul( (c).i , s ) ) ; }while(0) + +#else /* not FIXED_POINT*/ + +# define S_MUL(a,b) ( (a)*(b) ) +#define C_MUL(m,a,b) \ + do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ + (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) +# define C_FIXDIV(c,div) /* NOOP */ +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r *= (s);\ + (c).i *= (s); }while(0) +#endif + +#ifndef CHECK_OVERFLOW_OP +# define CHECK_OVERFLOW_OP(a,op,b) /* noop */ +#endif + +#define C_ADD( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,+,(b).r)\ + CHECK_OVERFLOW_OP((a).i,+,(b).i)\ + (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ + }while(0) +#define C_SUB( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,-,(b).r)\ + CHECK_OVERFLOW_OP((a).i,-,(b).i)\ + (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ + }while(0) +#define C_ADDTO( res , a)\ + do { \ + CHECK_OVERFLOW_OP((res).r,+,(a).r)\ + CHECK_OVERFLOW_OP((res).i,+,(a).i)\ + (res).r += (a).r; (res).i += (a).i;\ + }while(0) + +#define C_SUBFROM( res , a)\ + do {\ + CHECK_OVERFLOW_OP((res).r,-,(a).r)\ + CHECK_OVERFLOW_OP((res).i,-,(a).i)\ + (res).r -= (a).r; (res).i -= (a).i; \ + }while(0) + + +#ifdef FIXED_POINT +# define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase)) +# define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase)) +# define HALF_OF(x) ((x)>>1) +#elif defined(USE_SIMD) +# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) +# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) +# define HALF_OF(x) ((x)*_mm_set1_ps(.5)) +#else +# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) +# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) +# define HALF_OF(x) ((x)*((kiss_fft_scalar).5)) +#endif + +#define kf_cexp(x,phase) \ + do{ \ + (x)->r = KISS_FFT_COS(phase);\ + (x)->i = KISS_FFT_SIN(phase);\ + }while(0) + + +/* a debugging function */ +#define pcpx(c)\ + KISS_FFT_DEBUG("%g + %gi\n",(double)((c)->r),(double)((c)->i)) + + +#ifdef KISS_FFT_USE_ALLOCA +// define this to allow use of alloca instead of malloc for temporary buffers +// Temporary buffers are used in two case: +// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5 +// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform. +#include <alloca.h> +#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes) +#define KISS_FFT_TMP_FREE(ptr) +#else +#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes) +#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr) +#endif + +#endif /* _kiss_fft_guts_h */ + diff --git a/kiss/kfc.c b/kiss/kfc.c new file mode 100644 index 0000000..a405d9b --- /dev/null +++ b/kiss/kfc.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#include "kfc.h" + +typedef struct cached_fft *kfc_cfg; + +struct cached_fft +{ + int nfft; + int inverse; + kiss_fft_cfg cfg; + kfc_cfg next; +}; + +static kfc_cfg cache_root=NULL; +static int ncached=0; + +static kiss_fft_cfg find_cached_fft(int nfft,int inverse) +{ + size_t len; + kfc_cfg cur=cache_root; + kfc_cfg prev=NULL; + while ( cur ) { + if ( cur->nfft == nfft && inverse == cur->inverse ) + break;/*found the right node*/ + prev = cur; + cur = prev->next; + } + if (cur== NULL) { + /* no cached node found, need to create a new one*/ + kiss_fft_alloc(nfft,inverse,0,&len); +#ifdef USE_SIMD + int padding = (16-sizeof(struct cached_fft)) & 15; + // make sure the cfg aligns on a 16 byte boundary + len += padding; +#endif + cur = (kfc_cfg)KISS_FFT_MALLOC((sizeof(struct cached_fft) + len )); + if (cur == NULL) + return NULL; + cur->cfg = (kiss_fft_cfg)(cur+1); +#ifdef USE_SIMD + cur->cfg = (kiss_fft_cfg) ((char*)(cur+1)+padding); +#endif + kiss_fft_alloc(nfft,inverse,cur->cfg,&len); + cur->nfft=nfft; + cur->inverse=inverse; + cur->next = NULL; + if ( prev ) + prev->next = cur; + else + cache_root = cur; + ++ncached; + } + return cur->cfg; +} + +void kfc_cleanup(void) +{ + kfc_cfg cur=cache_root; + kfc_cfg next=NULL; + while (cur){ + next = cur->next; + free(cur); + cur=next; + } + ncached=0; + cache_root = NULL; +} +void kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout) +{ + kiss_fft( find_cached_fft(nfft,0),fin,fout ); +} + +void kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout) +{ + kiss_fft( find_cached_fft(nfft,1),fin,fout ); +} + +#ifdef KFC_TEST +static void check(int nc) +{ + if (ncached != nc) { + fprintf(stderr,"ncached should be %d,but it is %d\n",nc,ncached); + exit(1); + } +} + +int main(void) +{ + kiss_fft_cpx buf1[1024],buf2[1024]; + memset(buf1,0,sizeof(buf1)); + check(0); + kfc_fft(512,buf1,buf2); + check(1); + kfc_fft(512,buf1,buf2); + check(1); + kfc_ifft(512,buf1,buf2); + check(2); + kfc_cleanup(); + check(0); + return 0; +} +#endif diff --git a/kiss/kfc.h b/kiss/kfc.h new file mode 100644 index 0000000..d7d8c1b --- /dev/null +++ b/kiss/kfc.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KFC_H +#define KFC_H +#include "kiss_fft.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* +KFC -- Kiss FFT Cache + +Not needing to deal with kiss_fft_alloc and a config +object may be handy for a lot of programs. + +KFC uses the underlying KISS FFT functions, but caches the config object. +The first time kfc_fft or kfc_ifft for a given FFT size, the cfg +object is created for it. All subsequent calls use the cached +configuration object. + +NOTE: +You should probably not use this if your program will be using a lot +of various sizes of FFTs. There is a linear search through the +cached objects. If you are only using one or two FFT sizes, this +will be negligible. Otherwise, you may want to use another method +of managing the cfg objects. + + There is no automated cleanup of the cached objects. This could lead +to large memory usage in a program that uses a lot of *DIFFERENT* +sized FFTs. If you want to force all cached cfg objects to be freed, +call kfc_cleanup. + + */ + +/*forward complex FFT */ +void KISS_FFT_API kfc_fft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout); +/*reverse complex FFT */ +void KISS_FFT_API kfc_ifft(int nfft, const kiss_fft_cpx * fin,kiss_fft_cpx * fout); + +/*free all cached objects*/ +void KISS_FFT_API kfc_cleanup(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/kiss/kiss_fft.c b/kiss/kiss_fft.c new file mode 100644 index 0000000..58c24a0 --- /dev/null +++ b/kiss/kiss_fft.c @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + + +#include "_kiss_fft_guts.h" +/* The guts header contains all the multiplication and addition macros that are defined for + fixed or floating point complex numbers. It also delares the kf_ internal functions. + */ + +static void kf_bfly2( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx * Fout2; + kiss_fft_cpx * tw1 = st->twiddles; + kiss_fft_cpx t; + Fout2 = Fout + m; + do{ + C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2); + + C_MUL (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + }while (--m); +} + +static void kf_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + const size_t m + ) +{ + kiss_fft_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + size_t k=m; + const size_t m2=2*m; + const size_t m3=3*m; + + + tw3 = tw2 = tw1 = st->twiddles; + + do { + C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4); + + C_MUL(scratch[0],Fout[m] , *tw1 ); + C_MUL(scratch[1],Fout[m2] , *tw2 ); + C_MUL(scratch[2],Fout[m3] , *tw3 ); + + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + if(st->inverse) { + Fout[m].r = scratch[5].r - scratch[4].i; + Fout[m].i = scratch[5].i + scratch[4].r; + Fout[m3].r = scratch[5].r + scratch[4].i; + Fout[m3].i = scratch[5].i - scratch[4].r; + }else{ + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + } + ++Fout; + }while(--k); +} + +static void kf_bfly3( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + size_t m + ) +{ + size_t k=m; + const size_t m2 = 2*m; + kiss_fft_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_fft_cpx epi3; + epi3 = st->twiddles[fstride*m]; + + tw1=tw2=st->twiddles; + + do{ + C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); + + C_MUL(scratch[1],Fout[m] , *tw1); + C_MUL(scratch[2],Fout[m2] , *tw2); + + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; + + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + + C_MULBYSCALAR( scratch[0] , epi3.i ); + + C_ADDTO(*Fout,scratch[3]); + + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; + + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; + + ++Fout; + }while(--k); +} + +static void kf_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int u; + kiss_fft_cpx scratch[13]; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx *tw; + kiss_fft_cpx ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=st->twiddles; + for ( u=0; u<m; ++u ) { + C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); + scratch[0] = *Fout0; + + C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); + C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); + C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); + C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); + + C_ADD( scratch[7],scratch[1],scratch[4]); + C_SUB( scratch[10],scratch[1],scratch[4]); + C_ADD( scratch[8],scratch[2],scratch[3]); + C_SUB( scratch[9],scratch[2],scratch[3]); + + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); + scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); + scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } +} + +/* perform the butterfly for one stage of a mixed radix FFT */ +static void kf_bfly_generic( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int p + ) +{ + int u,k,q1,q; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx t; + int Norig = st->nfft; + + kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p); + if (scratch == NULL){ + KISS_FFT_ERROR("Memory allocation failed."); + return; + } + + for ( u=0; u<m; ++u ) { + k=u; + for ( q1=0 ; q1<p ; ++q1 ) { + scratch[q1] = Fout[ k ]; + C_FIXDIV(scratch[q1],p); + k += m; + } + + k=u; + for ( q1=0 ; q1<p ; ++q1 ) { + int twidx=0; + Fout[ k ] = scratch[0]; + for (q=1;q<p;++q ) { + twidx += fstride * k; + if (twidx>=Norig) twidx-=Norig; + C_MUL(t,scratch[q] , twiddles[twidx] ); + C_ADDTO( Fout[ k ] ,t); + } + k += m; + } + } + KISS_FFT_TMP_FREE(scratch); +} + +static +void kf_work( + kiss_fft_cpx * Fout, + const kiss_fft_cpx * f, + const size_t fstride, + int in_stride, + int * factors, + const kiss_fft_cfg st + ) +{ + kiss_fft_cpx * Fout_beg=Fout; + const int p=*factors++; /* the radix */ + const int m=*factors++; /* stage's fft length/p */ + const kiss_fft_cpx * Fout_end = Fout + p*m; + +#ifdef _OPENMP + // use openmp extensions at the + // top-level (not recursive) + if (fstride==1 && p<=5 && m!=1) + { + int k; + + // execute the p different work units in different threads +# pragma omp parallel for + for (k=0;k<p;++k) + kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st); + // all threads have joined by this point + + switch (p) { + case 2: kf_bfly2(Fout,fstride,st,m); break; + case 3: kf_bfly3(Fout,fstride,st,m); break; + case 4: kf_bfly4(Fout,fstride,st,m); break; + case 5: kf_bfly5(Fout,fstride,st,m); break; + default: kf_bfly_generic(Fout,fstride,st,m,p); break; + } + return; + } +#endif + + if (m==1) { + do{ + *Fout = *f; + f += fstride*in_stride; + }while(++Fout != Fout_end ); + }else{ + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + kf_work( Fout , f, fstride*p, in_stride, factors,st); + f += fstride*in_stride; + }while( (Fout += m) != Fout_end ); + } + + Fout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: kf_bfly2(Fout,fstride,st,m); break; + case 3: kf_bfly3(Fout,fstride,st,m); break; + case 4: kf_bfly4(Fout,fstride,st,m); break; + case 5: kf_bfly5(Fout,fstride,st,m); break; + default: kf_bfly_generic(Fout,fstride,st,m,p); break; + } +} + +/* facbuf is populated by p1,m1,p2,m2, ... + where + p[i] * m[i] = m[i-1] + m0 = n */ +static +void kf_factor(int n,int * facbuf) +{ + int p=4; + double floor_sqrt; + floor_sqrt = floor( sqrt((double)n) ); + + /*factor out powers of 4, powers of 2, then any remaining primes */ + do { + while (n % p) { + switch (p) { + case 4: p = 2; break; + case 2: p = 3; break; + default: p += 2; break; + } + if (p > floor_sqrt) + p = n; /* no more factors, skip to end */ + } + n /= p; + *facbuf++ = p; + *facbuf++ = n; + } while (n > 1); +} + +/* + * + * User-callable function to allocate all necessary storage space for the fft. + * + * The return value is a contiguous block of memory, allocated with malloc. As such, + * It can be freed with free(), rather than a kiss_fft-specific function. + * */ +kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) +{ + KISS_FFT_ALIGN_CHECK(mem) + + kiss_fft_cfg st=NULL; + size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fft_state) + + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/ + + if ( lenmem==NULL ) { + st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded ); + }else{ + if (mem != NULL && *lenmem >= memneeded) + st = (kiss_fft_cfg)mem; + *lenmem = memneeded; + } + if (st) { + int i; + st->nfft=nfft; + st->inverse = inverse_fft; + + for (i=0;i<nfft;++i) { + const double pi=3.141592653589793238462643383279502884197169399375105820974944; + double phase = -2*pi*i / nfft; + if (st->inverse) + phase *= -1; + kf_cexp(st->twiddles+i, phase ); + } + + kf_factor(nfft,st->factors); + } + return st; +} + + +void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) +{ + if (fin == fout) { + //NOTE: this is not really an in-place FFT algorithm. + //It just performs an out-of-place FFT into a temp buffer + if (fout == NULL){ + KISS_FFT_ERROR("fout buffer NULL."); + return; + } + + kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft); + if (tmpbuf == NULL){ + KISS_FFT_ERROR("Memory allocation error."); + return; + } + + + + kf_work(tmpbuf,fin,1,in_stride, st->factors,st); + memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft); + KISS_FFT_TMP_FREE(tmpbuf); + }else{ + kf_work( fout, fin, 1,in_stride, st->factors,st ); + } +} + +void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + kiss_fft_stride(cfg,fin,fout,1); +} + + +void kiss_fft_cleanup(void) +{ + // nothing needed any more +} + +int kiss_fft_next_fast_size(int n) +{ + while(1) { + int m=n; + while ( (m%2) == 0 ) m/=2; + while ( (m%3) == 0 ) m/=3; + while ( (m%5) == 0 ) m/=5; + if (m<=1) + break; /* n is completely factorable by twos, threes, and fives */ + n++; + } + return n; +} diff --git a/kiss/kiss_fft.h b/kiss/kiss_fft.h new file mode 100644 index 0000000..dce1034 --- /dev/null +++ b/kiss/kiss_fft.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_FFT_H +#define KISS_FFT_H + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> + +// Define KISS_FFT_SHARED macro to properly export symbols +#ifdef KISS_FFT_SHARED +# ifdef _WIN32 +# ifdef KISS_FFT_BUILD +# define KISS_FFT_API __declspec(dllexport) +# else +# define KISS_FFT_API __declspec(dllimport) +# endif +# else +# define KISS_FFT_API __attribute__ ((visibility ("default"))) +# endif +#else +# define KISS_FFT_API +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + ATTENTION! + If you would like a : + -- a utility that will handle the caching of fft objects + -- real-only (no imaginary time component ) FFT + -- a multi-dimensional FFT + -- a command-line utility to perform ffts + -- a command-line utility to perform fast-convolution filtering + + Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c + in the tools/ directory. +*/ + +/* User may override KISS_FFT_MALLOC and/or KISS_FFT_FREE. */ +#ifdef USE_SIMD +# include <xmmintrin.h> +# define kiss_fft_scalar __m128 +# ifndef KISS_FFT_MALLOC +# define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16) +# define KISS_FFT_ALIGN_CHECK(ptr) +# define KISS_FFT_ALIGN_SIZE_UP(size) ((size + 15UL) & ~0xFUL) +# endif +# ifndef KISS_FFT_FREE +# define KISS_FFT_FREE _mm_free +# endif +#else +# define KISS_FFT_ALIGN_CHECK(ptr) +# define KISS_FFT_ALIGN_SIZE_UP(size) (size) +# ifndef KISS_FFT_MALLOC +# define KISS_FFT_MALLOC malloc +# endif +# ifndef KISS_FFT_FREE +# define KISS_FFT_FREE free +# endif +#endif + + +#ifdef FIXED_POINT +#include <stdint.h> +# if (FIXED_POINT == 32) +# define kiss_fft_scalar int32_t +# else +# define kiss_fft_scalar int16_t +# endif +#else +# ifndef kiss_fft_scalar +/* default is float */ +# define kiss_fft_scalar float +# endif +#endif + +typedef struct { + kiss_fft_scalar r; + kiss_fft_scalar i; +}kiss_fft_cpx; + +typedef struct kiss_fft_state* kiss_fft_cfg; + +/* + * kiss_fft_alloc + * + * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. + * + * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL); + * + * The return value from fft_alloc is a cfg buffer used internally + * by the fft routine or NULL. + * + * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc. + * The returned value should be free()d when done to avoid memory leaks. + * + * The state can be placed in a user supplied buffer 'mem': + * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, + * then the function places the cfg in mem and the size used in *lenmem + * and returns mem. + * + * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), + * then the function returns NULL and places the minimum cfg + * buffer size in *lenmem. + * */ + +kiss_fft_cfg KISS_FFT_API kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); + +/* + * kiss_fft(cfg,in_out_buf) + * + * Perform an FFT on a complex input buffer. + * for a forward FFT, + * fin should be f[0] , f[1] , ... ,f[nfft-1] + * fout will be F[0] , F[1] , ... ,F[nfft-1] + * Note that each element is complex and can be accessed like + f[k].r and f[k].i + * */ +void KISS_FFT_API kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); + +/* + A more generic version of the above function. It reads its input from every Nth sample. + * */ +void KISS_FFT_API kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride); + +/* If kiss_fft_alloc allocated a buffer, it is one contiguous + buffer and can be simply free()d when no longer needed*/ +#define kiss_fft_free KISS_FFT_FREE + +/* + Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up + your compiler output to call this before you exit. +*/ +void KISS_FFT_API kiss_fft_cleanup(void); + + +/* + * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) + */ +int KISS_FFT_API kiss_fft_next_fast_size(int n); + +/* for real ffts, we need an even size */ +#define kiss_fftr_next_fast_size_real(n) \ + (kiss_fft_next_fast_size( ((n)+1)>>1)<<1) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/kiss/kiss_fft_log.h b/kiss/kiss_fft_log.h new file mode 100644 index 0000000..b5b631a --- /dev/null +++ b/kiss/kiss_fft_log.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef kiss_fft_log_h +#define kiss_fft_log_h + +#define ERROR 1 +#define WARNING 2 +#define INFO 3 +#define DEBUG 4 + +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + +#if defined(NDEBUG) +# define KISS_FFT_LOG_MSG(severity, ...) ((void)0) +#else +# define KISS_FFT_LOG_MSG(severity, ...) \ + fprintf(stderr, "[" #severity "] " __FILE__ ":" TOSTRING(__LINE__) " "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n") +#endif + +#define KISS_FFT_ERROR(...) KISS_FFT_LOG_MSG(ERROR, __VA_ARGS__) +#define KISS_FFT_WARNING(...) KISS_FFT_LOG_MSG(WARNING, __VA_ARGS__) +#define KISS_FFT_INFO(...) KISS_FFT_LOG_MSG(INFO, __VA_ARGS__) +#define KISS_FFT_DEBUG(...) KISS_FFT_LOG_MSG(DEBUG, __VA_ARGS__) + + + +#endif /* kiss_fft_log_h */
\ No newline at end of file diff --git a/kiss/kiss_fftnd.c b/kiss/kiss_fftnd.c new file mode 100644 index 0000000..5d5b089 --- /dev/null +++ b/kiss/kiss_fftnd.c @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#include "kiss_fftnd.h" +#include "_kiss_fft_guts.h" + +struct kiss_fftnd_state{ + int dimprod; /* dimsum would be mighty tasty right now */ + int ndims; + int *dims; + kiss_fft_cfg *states; /* cfg states for each dimension */ + kiss_fft_cpx * tmpbuf; /*buffer capable of hold the entire input */ +}; + +kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem) +{ + KISS_FFT_ALIGN_CHECK(mem) + + kiss_fftnd_cfg st = NULL; + int i; + int dimprod=1; + size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state)); + char * ptr = NULL; + + for (i=0;i<ndims;++i) { + size_t sublen=0; + kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen); + memneeded += sublen; /* st->states[i] */ + dimprod *= dims[i]; + } + memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims);/* st->dims */ + memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims);/* st->states */ + memneeded += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod); /* st->tmpbuf */ + + if (lenmem == NULL) {/* allocate for the caller*/ + ptr = (char *) malloc (memneeded); + } else { /* initialize supplied buffer if big enough */ + if (*lenmem >= memneeded) + ptr = (char *) mem; + *lenmem = memneeded; /*tell caller how big struct is (or would be) */ + } + if (!ptr) + return NULL; /*malloc failed or buffer too small */ + + st = (kiss_fftnd_cfg) ptr; + st->dimprod = dimprod; + st->ndims = ndims; + ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftnd_state)); + + st->states = (kiss_fft_cfg *)ptr; + ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(void*) * ndims); + + st->dims = (int*)ptr; + ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(int) * ndims); + + st->tmpbuf = (kiss_fft_cpx*)ptr; + ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(kiss_fft_cpx) * dimprod); + + for (i=0;i<ndims;++i) { + size_t len; + st->dims[i] = dims[i]; + kiss_fft_alloc (st->dims[i], inverse_fft, NULL, &len); + st->states[i] = kiss_fft_alloc (st->dims[i], inverse_fft, ptr,&len); + ptr += len; + } + /* +Hi there! + +If you're looking at this particular code, it probably means you've got a brain-dead bounds checker +that thinks the above code overwrites the end of the array. + +It doesn't. + +-- Mark + +P.S. +The below code might give you some warm fuzzies and help convince you. + */ + if ( ptr - (char*)st != (int)memneeded ) { + fprintf(stderr, + "################################################################################\n" + "Internal error! Memory allocation miscalculation\n" + "################################################################################\n" + ); + } + return st; +} + +/* + This works by tackling one dimension at a time. + + In effect, + Each stage starts out by reshaping the matrix into a DixSi 2d matrix. + A Di-sized fft is taken of each column, transposing the matrix as it goes. + +Here's a 3-d example: +Take a 2x3x4 matrix, laid out in memory as a contiguous buffer + [ [ [ a b c d ] [ e f g h ] [ i j k l ] ] + [ [ m n o p ] [ q r s t ] [ u v w x ] ] ] + +Stage 0 ( D=2): treat the buffer as a 2x12 matrix + [ [a b ... k l] + [m n ... w x] ] + + FFT each column with size 2. + Transpose the matrix at the same time using kiss_fft_stride. + + [ [ a+m a-m ] + [ b+n b-n] + ... + [ k+w k-w ] + [ l+x l-x ] ] + + Note fft([x y]) == [x+y x-y] + +Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix, + [ [ a+m a-m b+n b-n c+o c-o d+p d-p ] + [ e+q e-q f+r f-r g+s g-s h+t h-t ] + [ i+u i-u j+v j-v k+w k-w l+x l-x ] ] + + And perform FFTs (size=3) on each of the columns as above, transposing + the matrix as it goes. The output of stage 1 is + (Legend: ap = [ a+m e+q i+u ] + am = [ a-m e-q i-u ] ) + + [ [ sum(ap) fft(ap)[0] fft(ap)[1] ] + [ sum(am) fft(am)[0] fft(am)[1] ] + [ sum(bp) fft(bp)[0] fft(bp)[1] ] + [ sum(bm) fft(bm)[0] fft(bm)[1] ] + [ sum(cp) fft(cp)[0] fft(cp)[1] ] + [ sum(cm) fft(cm)[0] fft(cm)[1] ] + [ sum(dp) fft(dp)[0] fft(dp)[1] ] + [ sum(dm) fft(dm)[0] fft(dm)[1] ] ] + +Stage 2 ( D=4) treats this buffer as a 4*6 matrix, + [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ] + [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ] + [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ] + [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ] ] + + Then FFTs each column, transposing as it goes. + + The resulting matrix is the 3d FFT of the 2x3x4 input matrix. + + Note as a sanity check that the first element of the final + stage's output (DC term) is + sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] ) + , i.e. the summation of all 24 input elements. + +*/ +void kiss_fftnd(kiss_fftnd_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + int i,k; + const kiss_fft_cpx * bufin=fin; + kiss_fft_cpx * bufout; + + /*arrange it so the last bufout == fout*/ + if ( st->ndims & 1 ) { + bufout = fout; + if (fin==fout) { + memcpy( st->tmpbuf, fin, sizeof(kiss_fft_cpx) * st->dimprod ); + bufin = st->tmpbuf; + } + }else + bufout = st->tmpbuf; + + for ( k=0; k < st->ndims; ++k) { + int curdim = st->dims[k]; + int stride = st->dimprod / curdim; + + for ( i=0 ; i<stride ; ++i ) + kiss_fft_stride( st->states[k], bufin+i , bufout+i*curdim, stride ); + + /*toggle back and forth between the two buffers*/ + if (bufout == st->tmpbuf){ + bufout = fout; + bufin = st->tmpbuf; + }else{ + bufout = st->tmpbuf; + bufin = fout; + } + } +} diff --git a/kiss/kiss_fftnd.h b/kiss/kiss_fftnd.h new file mode 100644 index 0000000..956ba94 --- /dev/null +++ b/kiss/kiss_fftnd.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_FFTND_H +#define KISS_FFTND_H + +#include "kiss_fft.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct kiss_fftnd_state * kiss_fftnd_cfg; + +kiss_fftnd_cfg KISS_FFT_API kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem); +void KISS_FFT_API kiss_fftnd(kiss_fftnd_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/kiss/kiss_fftndr.c b/kiss/kiss_fftndr.c new file mode 100644 index 0000000..e979d03 --- /dev/null +++ b/kiss/kiss_fftndr.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#include "kiss_fftndr.h" +#include "_kiss_fft_guts.h" +#define MAX(x,y) ( ( (x)<(y) )?(y):(x) ) + +struct kiss_fftndr_state +{ + int dimReal; + int dimOther; + kiss_fftr_cfg cfg_r; + kiss_fftnd_cfg cfg_nd; + void * tmpbuf; +}; + +static int prod(const int *dims, int ndims) +{ + int x=1; + while (ndims--) + x *= *dims++; + return x; +} + +kiss_fftndr_cfg kiss_fftndr_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem) +{ + KISS_FFT_ALIGN_CHECK(mem) + + kiss_fftndr_cfg st = NULL; + size_t nr=0 , nd=0,ntmp=0; + int dimReal = dims[ndims-1]; + int dimOther = prod(dims,ndims-1); + size_t memneeded; + char * ptr = NULL; + + (void)kiss_fftr_alloc(dimReal,inverse_fft,NULL,&nr); + (void)kiss_fftnd_alloc(dims,ndims-1,inverse_fft,NULL,&nd); + ntmp = + MAX( 2*dimOther , dimReal+2) * sizeof(kiss_fft_scalar) // freq buffer for one pass + + dimOther*(dimReal+2) * sizeof(kiss_fft_scalar); // large enough to hold entire input in case of in-place + + memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof( struct kiss_fftndr_state )) + KISS_FFT_ALIGN_SIZE_UP(nr) + KISS_FFT_ALIGN_SIZE_UP(nd) + KISS_FFT_ALIGN_SIZE_UP(ntmp); + + if (lenmem==NULL) { + ptr = (char*) malloc(memneeded); + }else{ + if (*lenmem >= memneeded) + ptr = (char *)mem; + *lenmem = memneeded; + } + if (ptr==NULL) + return NULL; + + st = (kiss_fftndr_cfg) ptr; + memset( st , 0 , memneeded); + ptr += KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fftndr_state)); + + st->dimReal = dimReal; + st->dimOther = dimOther; + st->cfg_r = kiss_fftr_alloc( dimReal,inverse_fft,ptr,&nr); + ptr += KISS_FFT_ALIGN_SIZE_UP(nr); + st->cfg_nd = kiss_fftnd_alloc(dims,ndims-1,inverse_fft, ptr,&nd); + ptr += KISS_FFT_ALIGN_SIZE_UP(nd); + st->tmpbuf = ptr; + + return st; +} + +void kiss_fftndr(kiss_fftndr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) +{ + int k1,k2; + int dimReal = st->dimReal; + int dimOther = st->dimOther; + int nrbins = dimReal/2+1; + + kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf; + kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther); + + // timedata is N0 x N1 x ... x Nk real + + // take a real chunk of data, fft it and place the output at correct intervals + for (k1=0;k1<dimOther;++k1) { + kiss_fftr( st->cfg_r, timedata + k1*dimReal , tmp1 ); // tmp1 now holds nrbins complex points + for (k2=0;k2<nrbins;++k2) + tmp2[ k2*dimOther+k1 ] = tmp1[k2]; + } + + for (k2=0;k2<nrbins;++k2) { + kiss_fftnd(st->cfg_nd, tmp2+k2*dimOther, tmp1); // tmp1 now holds dimOther complex points + for (k1=0;k1<dimOther;++k1) + freqdata[ k1*(nrbins) + k2] = tmp1[k1]; + } +} + +void kiss_fftndri(kiss_fftndr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) +{ + int k1,k2; + int dimReal = st->dimReal; + int dimOther = st->dimOther; + int nrbins = dimReal/2+1; + kiss_fft_cpx * tmp1 = (kiss_fft_cpx*)st->tmpbuf; + kiss_fft_cpx * tmp2 = tmp1 + MAX(nrbins,dimOther); + + for (k2=0;k2<nrbins;++k2) { + for (k1=0;k1<dimOther;++k1) + tmp1[k1] = freqdata[ k1*(nrbins) + k2 ]; + kiss_fftnd(st->cfg_nd, tmp1, tmp2+k2*dimOther); + } + + for (k1=0;k1<dimOther;++k1) { + for (k2=0;k2<nrbins;++k2) + tmp1[k2] = tmp2[ k2*dimOther+k1 ]; + kiss_fftri( st->cfg_r,tmp1,timedata + k1*dimReal); + } +} diff --git a/kiss/kiss_fftndr.h b/kiss/kiss_fftndr.h new file mode 100644 index 0000000..0d56a1f --- /dev/null +++ b/kiss/kiss_fftndr.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_NDR_H +#define KISS_NDR_H + +#include "kiss_fft.h" +#include "kiss_fftr.h" +#include "kiss_fftnd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct kiss_fftndr_state *kiss_fftndr_cfg; + + +kiss_fftndr_cfg KISS_FFT_API kiss_fftndr_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem); +/* + dims[0] must be even + + If you don't care to allocate space, use mem = lenmem = NULL +*/ + + +void KISS_FFT_API kiss_fftndr( + kiss_fftndr_cfg cfg, + const kiss_fft_scalar *timedata, + kiss_fft_cpx *freqdata); +/* + input timedata has dims[0] X dims[1] X ... X dims[ndims-1] scalar points + output freqdata has dims[0] X dims[1] X ... X dims[ndims-1]/2+1 complex points +*/ + +void KISS_FFT_API kiss_fftndri( + kiss_fftndr_cfg cfg, + const kiss_fft_cpx *freqdata, + kiss_fft_scalar *timedata); +/* + input and output dimensions are the exact opposite of kiss_fftndr +*/ + + +#define kiss_fftndr_free free + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/kiss/kiss_fftr.c b/kiss/kiss_fftr.c new file mode 100644 index 0000000..778a9a6 --- /dev/null +++ b/kiss/kiss_fftr.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#include "kiss_fftr.h" +#include "_kiss_fft_guts.h" + +struct kiss_fftr_state{ + kiss_fft_cfg substate; + kiss_fft_cpx * tmpbuf; + kiss_fft_cpx * super_twiddles; +#ifdef USE_SIMD + void * pad; +#endif +}; + +kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem) +{ + KISS_FFT_ALIGN_CHECK(mem) + + int i; + kiss_fftr_cfg st = NULL; + size_t subsize = 0, memneeded; + + if (nfft & 1) { + KISS_FFT_ERROR("Real FFT optimization must be even."); + return NULL; + } + nfft >>= 1; + + kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize); + memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2); + + if (lenmem == NULL) { + st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded); + } else { + if (*lenmem >= memneeded) + st = (kiss_fftr_cfg) mem; + *lenmem = memneeded; + } + if (!st) + return NULL; + + st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */ + st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize); + st->super_twiddles = st->tmpbuf + nfft; + kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); + + for (i = 0; i < nfft/2; ++i) { + double phase = + -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5); + if (inverse_fft) + phase *= -1; + kf_cexp (st->super_twiddles+i,phase); + } + return st; +} + +void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) +{ + /* input buffer timedata is stored row-wise */ + int k,ncfft; + kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc; + + if ( st->substate->inverse) { + KISS_FFT_ERROR("kiss fft usage error: improper alloc"); + return;/* The caller did not call the correct function */ + } + + ncfft = st->substate->nfft; + + /*perform the parallel fft of two real signals packed in real,imag*/ + kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf ); + /* The real part of the DC element of the frequency spectrum in st->tmpbuf + * contains the sum of the even-numbered elements of the input time sequence + * The imag part is the sum of the odd-numbered elements + * + * The sum of tdc.r and tdc.i is the sum of the input time sequence. + * yielding DC of input time sequence + * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... + * yielding Nyquist bin of input time sequence + */ + + tdc.r = st->tmpbuf[0].r; + tdc.i = st->tmpbuf[0].i; + C_FIXDIV(tdc,2); + CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); + CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); + freqdata[0].r = tdc.r + tdc.i; + freqdata[ncfft].r = tdc.r - tdc.i; +#ifdef USE_SIMD + freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); +#else + freqdata[ncfft].i = freqdata[0].i = 0; +#endif + + for ( k=1;k <= ncfft/2 ; ++k ) { + fpk = st->tmpbuf[k]; + fpnk.r = st->tmpbuf[ncfft-k].r; + fpnk.i = - st->tmpbuf[ncfft-k].i; + C_FIXDIV(fpk,2); + C_FIXDIV(fpnk,2); + + C_ADD( f1k, fpk , fpnk ); + C_SUB( f2k, fpk , fpnk ); + C_MUL( tw , f2k , st->super_twiddles[k-1]); + + freqdata[k].r = HALF_OF(f1k.r + tw.r); + freqdata[k].i = HALF_OF(f1k.i + tw.i); + freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); + freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); + } +} + +void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) +{ + /* input buffer timedata is stored row-wise */ + int k, ncfft; + + if (st->substate->inverse == 0) { + KISS_FFT_ERROR("kiss fft usage error: improper alloc"); + return;/* The caller did not call the correct function */ + } + + ncfft = st->substate->nfft; + + st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r; + st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r; + C_FIXDIV(st->tmpbuf[0],2); + + for (k = 1; k <= ncfft / 2; ++k) { + kiss_fft_cpx fk, fnkc, fek, fok, tmp; + fk = freqdata[k]; + fnkc.r = freqdata[ncfft - k].r; + fnkc.i = -freqdata[ncfft - k].i; + C_FIXDIV( fk , 2 ); + C_FIXDIV( fnkc , 2 ); + + C_ADD (fek, fk, fnkc); + C_SUB (tmp, fk, fnkc); + C_MUL (fok, tmp, st->super_twiddles[k-1]); + C_ADD (st->tmpbuf[k], fek, fok); + C_SUB (st->tmpbuf[ncfft - k], fek, fok); +#ifdef USE_SIMD + st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); +#else + st->tmpbuf[ncfft - k].i *= -1; +#endif + } + kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); +} diff --git a/kiss/kiss_fftr.h b/kiss/kiss_fftr.h new file mode 100644 index 0000000..7fd73d2 --- /dev/null +++ b/kiss/kiss_fftr.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_FTR_H +#define KISS_FTR_H + +#include "kiss_fft.h" +#ifdef __cplusplus +extern "C" { +#endif + + +/* + + Real optimized version can save about 45% cpu time vs. complex fft of a real seq. + + + + */ + +typedef struct kiss_fftr_state *kiss_fftr_cfg; + + +kiss_fftr_cfg KISS_FFT_API kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem); +/* + nfft must be even + + If you don't care to allocate space, use mem = lenmem = NULL +*/ + + +void KISS_FFT_API kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata); +/* + input timedata has nfft scalar points + output freqdata has nfft/2+1 complex points +*/ + +void KISS_FFT_API kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); +/* + input freqdata has nfft/2+1 complex points + output timedata has nfft scalar points +*/ + +#define kiss_fftr_free KISS_FFT_FREE + +#ifdef __cplusplus +} +#endif +#endif |