diff options
author | Matthias P. Braendli <matthias.braendli@mpb.li> | 2024-10-06 19:47:19 +0200 |
---|---|---|
committer | Matthias P. Braendli <matthias.braendli@mpb.li> | 2024-10-06 19:47:19 +0200 |
commit | 8736f6160aeafe7a177cb6143fea80157e174e52 (patch) | |
tree | c73d39eda0db5341875b0fac34cdc89c0961c94a /kiss/kiss_fft.c | |
parent | b563b465e8b3df367da7799e789d29e0009cb96a (diff) | |
download | dabmod-8736f6160aeafe7a177cb6143fea80157e174e52.tar.gz dabmod-8736f6160aeafe7a177cb6143fea80157e174e52.tar.bz2 dabmod-8736f6160aeafe7a177cb6143fea80157e174e52.zip |
Implement fixed-point symbols, FFT and file output
Diffstat (limited to 'kiss/kiss_fft.c')
-rw-r--r-- | kiss/kiss_fft.c | 420 |
1 files changed, 420 insertions, 0 deletions
diff --git a/kiss/kiss_fft.c b/kiss/kiss_fft.c new file mode 100644 index 0000000..58c24a0 --- /dev/null +++ b/kiss/kiss_fft.c @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + + +#include "_kiss_fft_guts.h" +/* The guts header contains all the multiplication and addition macros that are defined for + fixed or floating point complex numbers. It also delares the kf_ internal functions. + */ + +static void kf_bfly2( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx * Fout2; + kiss_fft_cpx * tw1 = st->twiddles; + kiss_fft_cpx t; + Fout2 = Fout + m; + do{ + C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2); + + C_MUL (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + }while (--m); +} + +static void kf_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + const size_t m + ) +{ + kiss_fft_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + size_t k=m; + const size_t m2=2*m; + const size_t m3=3*m; + + + tw3 = tw2 = tw1 = st->twiddles; + + do { + C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4); + + C_MUL(scratch[0],Fout[m] , *tw1 ); + C_MUL(scratch[1],Fout[m2] , *tw2 ); + C_MUL(scratch[2],Fout[m3] , *tw3 ); + + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + if(st->inverse) { + Fout[m].r = scratch[5].r - scratch[4].i; + Fout[m].i = scratch[5].i + scratch[4].r; + Fout[m3].r = scratch[5].r + scratch[4].i; + Fout[m3].i = scratch[5].i - scratch[4].r; + }else{ + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + } + ++Fout; + }while(--k); +} + +static void kf_bfly3( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + size_t m + ) +{ + size_t k=m; + const size_t m2 = 2*m; + kiss_fft_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_fft_cpx epi3; + epi3 = st->twiddles[fstride*m]; + + tw1=tw2=st->twiddles; + + do{ + C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); + + C_MUL(scratch[1],Fout[m] , *tw1); + C_MUL(scratch[2],Fout[m2] , *tw2); + + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; + + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + + C_MULBYSCALAR( scratch[0] , epi3.i ); + + C_ADDTO(*Fout,scratch[3]); + + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; + + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; + + ++Fout; + }while(--k); +} + +static void kf_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int u; + kiss_fft_cpx scratch[13]; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx *tw; + kiss_fft_cpx ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=st->twiddles; + for ( u=0; u<m; ++u ) { + C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); + scratch[0] = *Fout0; + + C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); + C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); + C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); + C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); + + C_ADD( scratch[7],scratch[1],scratch[4]); + C_SUB( scratch[10],scratch[1],scratch[4]); + C_ADD( scratch[8],scratch[2],scratch[3]); + C_SUB( scratch[9],scratch[2],scratch[3]); + + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); + scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); + scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } +} + +/* perform the butterfly for one stage of a mixed radix FFT */ +static void kf_bfly_generic( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int p + ) +{ + int u,k,q1,q; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx t; + int Norig = st->nfft; + + kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p); + if (scratch == NULL){ + KISS_FFT_ERROR("Memory allocation failed."); + return; + } + + for ( u=0; u<m; ++u ) { + k=u; + for ( q1=0 ; q1<p ; ++q1 ) { + scratch[q1] = Fout[ k ]; + C_FIXDIV(scratch[q1],p); + k += m; + } + + k=u; + for ( q1=0 ; q1<p ; ++q1 ) { + int twidx=0; + Fout[ k ] = scratch[0]; + for (q=1;q<p;++q ) { + twidx += fstride * k; + if (twidx>=Norig) twidx-=Norig; + C_MUL(t,scratch[q] , twiddles[twidx] ); + C_ADDTO( Fout[ k ] ,t); + } + k += m; + } + } + KISS_FFT_TMP_FREE(scratch); +} + +static +void kf_work( + kiss_fft_cpx * Fout, + const kiss_fft_cpx * f, + const size_t fstride, + int in_stride, + int * factors, + const kiss_fft_cfg st + ) +{ + kiss_fft_cpx * Fout_beg=Fout; + const int p=*factors++; /* the radix */ + const int m=*factors++; /* stage's fft length/p */ + const kiss_fft_cpx * Fout_end = Fout + p*m; + +#ifdef _OPENMP + // use openmp extensions at the + // top-level (not recursive) + if (fstride==1 && p<=5 && m!=1) + { + int k; + + // execute the p different work units in different threads +# pragma omp parallel for + for (k=0;k<p;++k) + kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st); + // all threads have joined by this point + + switch (p) { + case 2: kf_bfly2(Fout,fstride,st,m); break; + case 3: kf_bfly3(Fout,fstride,st,m); break; + case 4: kf_bfly4(Fout,fstride,st,m); break; + case 5: kf_bfly5(Fout,fstride,st,m); break; + default: kf_bfly_generic(Fout,fstride,st,m,p); break; + } + return; + } +#endif + + if (m==1) { + do{ + *Fout = *f; + f += fstride*in_stride; + }while(++Fout != Fout_end ); + }else{ + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + kf_work( Fout , f, fstride*p, in_stride, factors,st); + f += fstride*in_stride; + }while( (Fout += m) != Fout_end ); + } + + Fout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: kf_bfly2(Fout,fstride,st,m); break; + case 3: kf_bfly3(Fout,fstride,st,m); break; + case 4: kf_bfly4(Fout,fstride,st,m); break; + case 5: kf_bfly5(Fout,fstride,st,m); break; + default: kf_bfly_generic(Fout,fstride,st,m,p); break; + } +} + +/* facbuf is populated by p1,m1,p2,m2, ... + where + p[i] * m[i] = m[i-1] + m0 = n */ +static +void kf_factor(int n,int * facbuf) +{ + int p=4; + double floor_sqrt; + floor_sqrt = floor( sqrt((double)n) ); + + /*factor out powers of 4, powers of 2, then any remaining primes */ + do { + while (n % p) { + switch (p) { + case 4: p = 2; break; + case 2: p = 3; break; + default: p += 2; break; + } + if (p > floor_sqrt) + p = n; /* no more factors, skip to end */ + } + n /= p; + *facbuf++ = p; + *facbuf++ = n; + } while (n > 1); +} + +/* + * + * User-callable function to allocate all necessary storage space for the fft. + * + * The return value is a contiguous block of memory, allocated with malloc. As such, + * It can be freed with free(), rather than a kiss_fft-specific function. + * */ +kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) +{ + KISS_FFT_ALIGN_CHECK(mem) + + kiss_fft_cfg st=NULL; + size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fft_state) + + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/ + + if ( lenmem==NULL ) { + st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded ); + }else{ + if (mem != NULL && *lenmem >= memneeded) + st = (kiss_fft_cfg)mem; + *lenmem = memneeded; + } + if (st) { + int i; + st->nfft=nfft; + st->inverse = inverse_fft; + + for (i=0;i<nfft;++i) { + const double pi=3.141592653589793238462643383279502884197169399375105820974944; + double phase = -2*pi*i / nfft; + if (st->inverse) + phase *= -1; + kf_cexp(st->twiddles+i, phase ); + } + + kf_factor(nfft,st->factors); + } + return st; +} + + +void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) +{ + if (fin == fout) { + //NOTE: this is not really an in-place FFT algorithm. + //It just performs an out-of-place FFT into a temp buffer + if (fout == NULL){ + KISS_FFT_ERROR("fout buffer NULL."); + return; + } + + kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft); + if (tmpbuf == NULL){ + KISS_FFT_ERROR("Memory allocation error."); + return; + } + + + + kf_work(tmpbuf,fin,1,in_stride, st->factors,st); + memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft); + KISS_FFT_TMP_FREE(tmpbuf); + }else{ + kf_work( fout, fin, 1,in_stride, st->factors,st ); + } +} + +void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + kiss_fft_stride(cfg,fin,fout,1); +} + + +void kiss_fft_cleanup(void) +{ + // nothing needed any more +} + +int kiss_fft_next_fast_size(int n) +{ + while(1) { + int m=n; + while ( (m%2) == 0 ) m/=2; + while ( (m%3) == 0 ) m/=3; + while ( (m%5) == 0 ) m/=5; + if (m<=1) + break; /* n is completely factorable by twos, threes, and fives */ + n++; + } + return n; +} |