diff options
Diffstat (limited to 'libFDK/src/arm/fft_rad2_arm.cpp')
-rw-r--r-- | libFDK/src/arm/fft_rad2_arm.cpp | 259 |
1 files changed, 259 insertions, 0 deletions
diff --git a/libFDK/src/arm/fft_rad2_arm.cpp b/libFDK/src/arm/fft_rad2_arm.cpp new file mode 100644 index 0000000..f40961a --- /dev/null +++ b/libFDK/src/arm/fft_rad2_arm.cpp @@ -0,0 +1,259 @@ +/*************************** Fraunhofer IIS FDK Tools ********************** + + (C) Copyright Fraunhofer IIS (2005) + All Rights Reserved + + Please be advised that this software and/or program delivery is + Confidential Information of Fraunhofer and subject to and covered by the + + Fraunhofer IIS Software Evaluation Agreement + between Google Inc. and Fraunhofer + effective and in full force since March 1, 2012. + + You may use this software and/or program only under the terms and + conditions described in the above mentioned Fraunhofer IIS Software + Evaluation Agreement. Any other and/or further use requires a separate agreement. + + + $Id$ + Author(s): + Description: dit_fft ARM assembler replacements. + + This software and/or program is protected by copyright law and international + treaties. Any reproduction or distribution of this software and/or program, + or any portion of it, may result in severe civil and criminal penalties, and + will be prosecuted to the maximum extent possible under law. + +******************************************************************************/ + +/* NEON optimized FFT currently builds only with RVCT toolchain */ + +#ifndef FUNCTION_dit_fft + +/* If dit_fft was not yet defined by ARM-Cortex ... */ + +#if defined(SINETABLE_16BIT) + +#define FUNCTION_dit_fft + +/***************************************************************************** + + date: 28.07.2005 srl + + Contents/description: dit-tukey-FFT-algorithm + +******************************************************************************/ + +#if defined(FUNCTION_dit_fft) + + +void dit_fft(FIXP_DBL *x, const INT ldn, const FIXP_STP *trigdata, const INT trigDataSize) +{ + const INT n=1<<ldn; + INT i; + + scramble(x,n); + /* + * 1+2 stage radix 4 + */ + + for (i=0;i<n*2;i+=8) + { + FIXP_DBL a00, a10, a20, a30; + a00 = (x[i + 0] + x[i + 2])>>1; /* Re A + Re B */ + a10 = (x[i + 4] + x[i + 6])>>1; /* Re C + Re D */ + a20 = (x[i + 1] + x[i + 3])>>1; /* Im A + Im B */ + a30 = (x[i + 5] + x[i + 7])>>1; /* Im C + Im D */ + + x[i + 0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */ + x[i + 4] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */ + x[i + 1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */ + x[i + 5] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */ + + a00 = a00 - x[i + 2]; /* Re A - Re B */ + a10 = a10 - x[i + 6]; /* Re C - Re D */ + a20 = a20 - x[i + 3]; /* Im A - Im B */ + a30 = a30 - x[i + 7]; /* Im C - Im D */ + + x[i + 2] = a00 + a30; /* Re B' = Re A - Re B + Im C - Im D */ + x[i + 6] = a00 - a30; /* Re D' = Re A - Re B - Im C + Im D */ + x[i + 3] = a20 - a10; /* Im B' = Im A - Im B - Re C + Re D */ + x[i + 7] = a20 + a10; /* Im D' = Im A - Im B + Re C - Re D */ + } + + INT mh = 1 << 1; + INT ldm = ldn - 2; + INT trigstep = trigDataSize; + + do + { + const FIXP_STP *pTrigData = trigdata; + INT j; + + mh <<= 1; + trigstep >>= 1; + + FDK_ASSERT(trigstep > 0); + + /* Do first iteration with c=1.0 and s=0.0 separately to avoid loosing to much precision. + Beware: The impact on the overal FFT precision is rather large. */ + { + FIXP_DBL *xt1 = x; + int r = n; + + do { + FIXP_DBL *xt2 = xt1 + (mh<<1); + /* + FIXP_DBL *xt1 = x+ ((r)<<1); + FIXP_DBL *xt2 = xt1 + (mh<<1); + */ + FIXP_DBL vr,vi,ur,ui; + + //cplxMultDiv2(&vi, &vr, x[t2+1], x[t2], (FIXP_SGL)1.0, (FIXP_SGL)0.0); + vi = xt2[1]>>1; + vr = xt2[0]>>1; + + ur = xt1[0]>>1; + ui = xt1[1]>>1; + + xt1[0] = ur+vr; + xt1[1] = ui+vi; + + xt2[0] = ur-vr; + xt2[1] = ui-vi; + + xt1 += mh; + xt2 += mh; + + //cplxMultDiv2(&vr, &vi, x[t2+1], x[t2], (FIXP_SGL)1.0, (FIXP_SGL)0.0); + vr = xt2[1]>>1; + vi = xt2[0]>>1; + + ur = xt1[0]>>1; + ui = xt1[1]>>1; + + xt1[0] = ur+vr; + xt1[1] = ui-vi; + + xt2[0] = ur-vr; + xt2[1] = ui+vi; + + xt1 = xt2 + mh; + } while ((r=r-(mh<<1)) != 0); + } + for(j=4; j<mh; j+=4) + { + FIXP_DBL *xt1 = x + (j>>1); + FIXP_SPK cs; + int r = n; + + pTrigData += trigstep; + cs = *pTrigData; + + do + { + FIXP_DBL *xt2 = xt1 + (mh<<1); + FIXP_DBL vr,vi,ur,ui; + + cplxMultDiv2(&vi, &vr, xt2[1], xt2[0], cs); + + ur = xt1[0]>>1; + ui = xt1[1]>>1; + + xt1[0] = ur+vr; + xt1[1] = ui+vi; + + xt2[0] = ur-vr; + xt2[1] = ui-vi; + + xt1 += mh; + xt2 += mh; + + cplxMultDiv2(&vr, &vi, xt2[1], xt2[0], cs); + + ur = xt1[0]>>1; + ui = xt1[1]>>1; + + xt1[0] = ur+vr; + xt1[1] = ui-vi; + + xt2[0] = ur-vr; + xt2[1] = ui+vi; + + /* Same as above but for t1,t2 with j>mh/4 and thus cs swapped */ + xt1 = xt1 - (j); + xt2 = xt1 + (mh<<1); + + cplxMultDiv2(&vi, &vr, xt2[0], xt2[1], cs); + + ur = xt1[0]>>1; + ui = xt1[1]>>1; + + xt1[0] = ur+vr; + xt1[1] = ui-vi; + + xt2[0] = ur-vr; + xt2[1] = ui+vi; + + xt1 += mh; + xt2 += mh; + + cplxMultDiv2(&vr, &vi, xt2[0], xt2[1], cs); + + ur = xt1[0]>>1; + ui = xt1[1]>>1; + + xt1[0] = ur-vr; + xt1[1] = ui-vi; + + xt2[0] = ur+vr; + xt2[1] = ui+vi; + + xt1 = xt2 + (j); + } while ((r=r-(mh<<1)) != 0); + } + { + FIXP_DBL *xt1 = x + (mh>>1); + int r = n; + + do + { + FIXP_DBL *xt2 = xt1 + (mh<<1); + FIXP_DBL vr,vi,ur,ui; + + cplxMultDiv2(&vi, &vr, xt2[1], xt2[0], STC(0x5a82799a), STC(0x5a82799a)); + + ur = xt1[0]>>1; + ui = xt1[1]>>1; + + xt1[0] = ur+vr; + xt1[1] = ui+vi; + + xt2[0] = ur-vr; + xt2[1] = ui-vi; + + xt1 += mh; + xt2 += mh; + + cplxMultDiv2(&vr, &vi, xt2[1], xt2[0], STC(0x5a82799a), STC(0x5a82799a)); + + ur = xt1[0]>>1; + ui = xt1[1]>>1; + + xt1[0] = ur+vr; + xt1[1] = ui-vi; + + xt2[0] = ur-vr; + xt2[1] = ui+vi; + + xt1 = xt2 + mh; + } while ((r=r-(mh<<1)) != 0); + } + } while (--ldm != 0); +} + +#endif /* if defined(FUNCTION_dit_fft) */ + +#endif /* if defined(SINETABLE_16BIT) */ + +#endif /* ifndef FUNCTION_dit_fft */ |