diff options
Diffstat (limited to 'libFDK/src/fft.cpp')
-rw-r--r-- | libFDK/src/fft.cpp | 2819 |
1 files changed, 1669 insertions, 1150 deletions
diff --git a/libFDK/src/fft.cpp b/libFDK/src/fft.cpp index 653a71a..4e6fdd2 100644 --- a/libFDK/src/fft.cpp +++ b/libFDK/src/fft.cpp @@ -1,74 +1,85 @@ - -/* ----------------------------------------------------------------------------------------------------------- +/* ----------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. - All rights reserved. +© Copyright 1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten +Forschung e.V. All rights reserved. 1. INTRODUCTION -The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements -the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio. -This FDK AAC Codec software is intended to be used on a wide variety of Android devices. - -AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual -audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by -independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part -of the MPEG specifications. - -Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer) -may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners -individually for the purpose of encoding or decoding bit streams in products that are compliant with -the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license -these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec -software may already be covered under those patent licenses when it is used for those licensed purposes only. - -Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality, -are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional -applications information and documentation. +The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software +that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding +scheme for digital audio. This FDK AAC Codec software is intended to be used on +a wide variety of Android devices. + +AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient +general perceptual audio codecs. AAC-ELD is considered the best-performing +full-bandwidth communications codec by independent studies and is widely +deployed. AAC has been standardized by ISO and IEC as part of the MPEG +specifications. + +Patent licenses for necessary patent claims for the FDK AAC Codec (including +those of Fraunhofer) may be obtained through Via Licensing +(www.vialicensing.com) or through the respective patent owners individually for +the purpose of encoding or decoding bit streams in products that are compliant +with the ISO/IEC MPEG audio standards. Please note that most manufacturers of +Android devices already license these patent claims through Via Licensing or +directly from the patent owners, and therefore FDK AAC Codec software may +already be covered under those patent licenses when it is used for those +licensed purposes only. + +Commercially-licensed AAC software libraries, including floating-point versions +with enhanced sound quality, are also available from Fraunhofer. Users are +encouraged to check the Fraunhofer website for additional applications +information and documentation. 2. COPYRIGHT LICENSE -Redistribution and use in source and binary forms, with or without modification, are permitted without -payment of copyright license fees provided that you satisfy the following conditions: +Redistribution and use in source and binary forms, with or without modification, +are permitted without payment of copyright license fees provided that you +satisfy the following conditions: -You must retain the complete text of this software license in redistributions of the FDK AAC Codec or -your modifications thereto in source code form. +You must retain the complete text of this software license in redistributions of +the FDK AAC Codec or your modifications thereto in source code form. -You must retain the complete text of this software license in the documentation and/or other materials -provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form. -You must make available free of charge copies of the complete source code of the FDK AAC Codec and your +You must retain the complete text of this software license in the documentation +and/or other materials provided with redistributions of the FDK AAC Codec or +your modifications thereto in binary form. You must make available free of +charge copies of the complete source code of the FDK AAC Codec and your modifications thereto to recipients of copies in binary form. -The name of Fraunhofer may not be used to endorse or promote products derived from this library without -prior written permission. +The name of Fraunhofer may not be used to endorse or promote products derived +from this library without prior written permission. -You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec -software or your modifications thereto. +You may not charge copyright license fees for anyone to use, copy or distribute +the FDK AAC Codec software or your modifications thereto. -Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software -and the date of any change. For modified versions of the FDK AAC Codec, the term -"Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term -"Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android." +Your modified versions of the FDK AAC Codec must carry prominent notices stating +that you changed the software and the date of any change. For modified versions +of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android" +must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK +AAC Codec Library for Android." 3. NO PATENT LICENSE -NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer, -ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with -respect to this software. +NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without +limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE. +Fraunhofer provides no warranty of patent non-infringement with respect to this +software. -You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized -by appropriate patent licenses. +You may use this FDK AAC Codec software or modifications thereto only for +purposes that are authorized by appropriate patent licenses. 4. DISCLAIMER -This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors -"AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties -of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR -CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages, -including but not limited to procurement of substitute goods or services; loss of use, data, or profits, -or business interruption, however caused and on any theory of liability, whether in contract, strict -liability, or tort (including negligence), arising in any way out of the use of this software, even if -advised of the possibility of such damage. +This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright +holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, +including but not limited to the implied warranties of merchantability and +fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, +or consequential damages, including but not limited to procurement of substitute +goods or services; loss of use, data, or profits, or business interruption, +however caused and on any theory of liability, whether in contract, strict +liability, or tort (including negligence), arising in any way out of the use of +this software, even if advised of the possibility of such damage. 5. CONTACT INFORMATION @@ -79,45 +90,130 @@ Am Wolfsmantel 33 www.iis.fraunhofer.de/amm amm-info@iis.fraunhofer.de ------------------------------------------------------------------------------------------------------------ */ +----------------------------------------------------------------------------- */ -/*************************** Fraunhofer IIS FDK Tools ********************** +/******************* Library for basic calculation routines ******************** Author(s): Josef Hoepfl, DSP Solutions - Description: Fix point FFT -******************************************************************************/ + Description: Fix point FFT -#include "fft.h" +*******************************************************************************/ #include "fft_rad2.h" #include "FDK_tools_rom.h" +#define W_PiFOURTH STC(0x5a82799a) +//#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a)) +#ifndef SUMDIFF_PIFOURTH +#define SUMDIFF_PIFOURTH(diff, sum, a, b) \ + { \ + FIXP_DBL wa, wb; \ + wa = fMultDiv2(a, W_PiFOURTH); \ + wb = fMultDiv2(b, W_PiFOURTH); \ + diff = wb - wa; \ + sum = wb + wa; \ + } +#define SUMDIFF_PIFOURTH16(diff, sum, a, b) \ + { \ + FIXP_SGL wa, wb; \ + wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \ + wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \ + diff = wb - wa; \ + sum = wb + wa; \ + } +#endif +#define SCALEFACTOR2048 10 +#define SCALEFACTOR1024 9 +#define SCALEFACTOR512 8 +#define SCALEFACTOR256 7 +#define SCALEFACTOR128 6 +#define SCALEFACTOR64 5 +#define SCALEFACTOR32 4 +#define SCALEFACTOR16 3 +#define SCALEFACTOR8 2 +#define SCALEFACTOR4 1 +#define SCALEFACTOR2 1 + +#define SCALEFACTOR3 1 +#define SCALEFACTOR5 1 +#define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2) +#define SCALEFACTOR7 2 +#define SCALEFACTOR9 2 +#define SCALEFACTOR10 5 +#define SCALEFACTOR12 3 +#define SCALEFACTOR15 3 +#define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2) +#define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2) +#define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2) +#define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2) +#define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2) +#define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2) +#define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2) +#define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2) +#define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2) +#define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2) +#define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2) +#define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2) +#define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2) +#define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2) +#define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2) +#define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2) +#define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2) +#define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2) +#define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2) +#include "fft.h" +#ifndef FUNCTION_fft2 -#define F3C(x) STC(x) +/* Performs the FFT of length 2. Input vector unscaled, output vector scaled + * with factor 0.5 */ +static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) { + FIXP_DBL r1, i1; + FIXP_DBL r2, i2; -#define C31 (F3C(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) */ + /* real part */ + r1 = pDat[2]; + r2 = pDat[0]; + + /* imaginary part */ + i1 = pDat[3]; + i2 = pDat[1]; + + /* real part */ + pDat[0] = (r2 + r1) >> 1; + pDat[2] = (r2 - r1) >> 1; + + /* imaginary part */ + pDat[1] = (i2 + i1) >> 1; + pDat[3] = (i2 - i1) >> 1; +} +#endif /* FUNCTION_fft2 */ + +#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2 */ + +#ifndef FUNCTION_fft3 +/* Performs the FFT of length 3 according to the algorithm after winograd. */ +static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) { + FIXP_DBL r1, r2; + FIXP_DBL s1, s2; + FIXP_DBL pD; -/* Performs the FFT of length 3 according to the algorithm after winograd. - No scaling of the input vector because the scaling is already done in the rotation vector. */ -static FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) -{ - FIXP_DBL r1,r2; - FIXP_DBL s1,s2; /* real part */ - r1 = pDat[2] + pDat[4]; - r2 = fMult((pDat[2] - pDat[4]), C31); - pDat[0] = pDat[0] + r1; - r1 = pDat[0] - r1 - (r1>>1); + r1 = pDat[2] + pDat[4]; + r2 = fMultDiv2((pDat[2] - pDat[4]), C31); + pD = pDat[0] >> 1; + pDat[0] = pD + (r1 >> 1); + r1 = pD - (r1 >> 2); /* imaginary part */ - s1 = pDat[3] + pDat[5]; - s2 = fMult((pDat[3] - pDat[5]), C31); - pDat[1] = pDat[1] + s1; - s1 = pDat[1] - s1 - (s1>>1); + s1 = pDat[3] + pDat[5]; + s2 = fMultDiv2((pDat[3] - pDat[5]), C31); + pD = pDat[1] >> 1; + pDat[1] = pD + (s1 >> 1); + s1 = pD - (s1 >> 2); /* combination */ pDat[2] = r1 - s2; @@ -125,60 +221,60 @@ static FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) pDat[3] = s1 + r2; pDat[5] = s1 - r2; } - +#endif /* #ifndef FUNCTION_fft3 */ #define F5C(x) STC(x) -#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */ -#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */ -#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */ -#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */ -#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */ +#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */ +#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */ +#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */ +#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */ +#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */ /* performs the FFT of length 5 according to the algorithm after winograd */ -static FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) -{ - FIXP_DBL r1,r2,r3,r4; - FIXP_DBL s1,s2,s3,s4; +/* This version works with a prescale of 2 instead of 3 */ +static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) { + FIXP_DBL r1, r2, r3, r4; + FIXP_DBL s1, s2, s3, s4; FIXP_DBL t; /* real part */ - r1 = pDat[2] + pDat[8]; - r4 = pDat[2] - pDat[8]; - r3 = pDat[4] + pDat[6]; - r2 = pDat[4] - pDat[6]; - t = fMult((r1-r3), C54); - r1 = r1 + r3; - pDat[0] = pDat[0] + r1; - /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of - the values as fracts */ - r1 = pDat[0] + (fMultDiv2(r1, C55) <<(2)); - r3 = r1 - t; - r1 = r1 + t; - t = fMult((r4 + r2), C51); - /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of - the values as fracts */ - r4 = t + (fMultDiv2(r4, C52) <<(2)); - r2 = t + fMult(r2, C53); + r1 = (pDat[2] + pDat[8]) >> 1; + r4 = (pDat[2] - pDat[8]) >> 1; + r3 = (pDat[4] + pDat[6]) >> 1; + r2 = (pDat[4] - pDat[6]) >> 1; + t = fMult((r1 - r3), C54); + r1 = r1 + r3; + pDat[0] = (pDat[0] >> 1) + r1; + /* Bit shift left because of the constant C55 which was scaled with the factor + 0.5 because of the representation of the values as fracts */ + r1 = pDat[0] + (fMultDiv2(r1, C55) << (2)); + r3 = r1 - t; + r1 = r1 + t; + t = fMult((r4 + r2), C51); + /* Bit shift left because of the constant C55 which was scaled with the factor + 0.5 because of the representation of the values as fracts */ + r4 = t + (fMultDiv2(r4, C52) << (2)); + r2 = t + fMult(r2, C53); /* imaginary part */ - s1 = pDat[3] + pDat[9]; - s4 = pDat[3] - pDat[9]; - s3 = pDat[5] + pDat[7]; - s2 = pDat[5] - pDat[7]; - t = fMult((s1 - s3), C54); - s1 = s1 + s3; - pDat[1] = pDat[1] + s1; - /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of - the values as fracts */ - s1 = pDat[1] + (fMultDiv2(s1, C55) <<(2)); - s3 = s1 - t; - s1 = s1 + t; - t = fMult((s4 + s2), C51); - /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of - the values as fracts */ - s4 = t + (fMultDiv2(s4, C52) <<(2)); - s2 = t + fMult(s2, C53); + s1 = (pDat[3] + pDat[9]) >> 1; + s4 = (pDat[3] - pDat[9]) >> 1; + s3 = (pDat[5] + pDat[7]) >> 1; + s2 = (pDat[5] - pDat[7]) >> 1; + t = fMult((s1 - s3), C54); + s1 = s1 + s3; + pDat[1] = (pDat[1] >> 1) + s1; + /* Bit shift left because of the constant C55 which was scaled with the factor + 0.5 because of the representation of the values as fracts */ + s1 = pDat[1] + (fMultDiv2(s1, C55) << (2)); + s3 = s1 - t; + s1 = s1 + t; + t = fMult((s4 + s2), C51); + /* Bit shift left because of the constant C55 which was scaled with the factor + 0.5 because of the representation of the values as fracts */ + s4 = t + (fMultDiv2(s4, C52) << (2)); + s2 = t + fMult(s2, C53); /* combination */ pDat[2] = r1 + s2; @@ -192,20 +288,322 @@ static FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) pDat[7] = s3 - r4; } +#define F5C(x) STC(x) +#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */ +#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */ +#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */ +#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */ +#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */ +/** + * \brief Function performs a complex 10-point FFT + * The FFT is performed inplace. The result of the FFT + * is scaled by SCALEFACTOR10 bits. + * + * WOPS FLC version: 1093 cycles + * WOPS with 32x16 bit multiplications: 196 cycles + * + * \param [i/o] re real input / output + * \param [i/o] im imag input / output + * \param [i ] s stride real and imag input / output + * + * \return void + */ +static void fft10(FIXP_DBL *x) // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s) +{ + FIXP_DBL t; + FIXP_DBL x0, x1, x2, x3, x4; + FIXP_DBL r1, r2, r3, r4; + FIXP_DBL s1, s2, s3, s4; + FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09; + FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; + const int s = 1; // stride factor -#define N3 3 -#define N5 5 -#define N6 6 -#define N15 15 + /* 2 fft5 stages */ -/* Performs the FFT of length 15. It is split into FFTs of length 3 and length 5. */ -static inline void fft15(FIXP_DBL *pInput) -{ - FIXP_DBL aDst[2*N15]; - FIXP_DBL aDst1[2*N15]; - int i,k,l; + /* real part */ + x0 = (x[s * 0] >> SCALEFACTOR10); + x1 = (x[s * 4] >> SCALEFACTOR10); + x2 = (x[s * 8] >> SCALEFACTOR10); + x3 = (x[s * 12] >> SCALEFACTOR10); + x4 = (x[s * 16] >> SCALEFACTOR10); + + r1 = (x3 + x2); + r4 = (x3 - x2); + r3 = (x1 + x4); + r2 = (x1 - x4); + t = fMult((r1 - r3), C54); + r1 = (r1 + r3); + y00 = (x0 + r1); + r1 = (y00 + ((fMult(r1, C55) << 1))); + r3 = (r1 - t); + r1 = (r1 + t); + t = fMult((r4 + r2), C51); + r4 = (t + (fMult(r4, C52) << 1)); + r2 = (t + fMult(r2, C53)); + + /* imaginary part */ + x0 = (x[s * 0 + 1] >> SCALEFACTOR10); + x1 = (x[s * 4 + 1] >> SCALEFACTOR10); + x2 = (x[s * 8 + 1] >> SCALEFACTOR10); + x3 = (x[s * 12 + 1] >> SCALEFACTOR10); + x4 = (x[s * 16 + 1] >> SCALEFACTOR10); + + s1 = (x3 + x2); + s4 = (x3 - x2); + s3 = (x1 + x4); + s2 = (x1 - x4); + t = fMult((s1 - s3), C54); + s1 = (s1 + s3); + y01 = (x0 + s1); + s1 = (y01 + (fMult(s1, C55) << 1)); + s3 = (s1 - t); + s1 = (s1 + t); + t = fMult((s4 + s2), C51); + s4 = (t + (fMult(s4, C52) << 1)); + s2 = (t + fMult(s2, C53)); + + /* combination */ + y04 = (r1 + s2); + y16 = (r1 - s2); + y08 = (r3 - s4); + y12 = (r3 + s4); + + y05 = (s1 - r2); + y17 = (s1 + r2); + y09 = (s3 + r4); + y13 = (s3 - r4); + + /* real part */ + x0 = (x[s * 10] >> SCALEFACTOR10); + x1 = (x[s * 2] >> SCALEFACTOR10); + x2 = (x[s * 6] >> SCALEFACTOR10); + x3 = (x[s * 14] >> SCALEFACTOR10); + x4 = (x[s * 18] >> SCALEFACTOR10); + + r1 = (x1 + x4); + r4 = (x1 - x4); + r3 = (x3 + x2); + r2 = (x3 - x2); + t = fMult((r1 - r3), C54); + r1 = (r1 + r3); + y02 = (x0 + r1); + r1 = (y02 + ((fMult(r1, C55) << 1))); + r3 = (r1 - t); + r1 = (r1 + t); + t = fMult(((r4 + r2)), C51); + r4 = (t + (fMult(r4, C52) << 1)); + r2 = (t + fMult(r2, C53)); + + /* imaginary part */ + x0 = (x[s * 10 + 1] >> SCALEFACTOR10); + x1 = (x[s * 2 + 1] >> SCALEFACTOR10); + x2 = (x[s * 6 + 1] >> SCALEFACTOR10); + x3 = (x[s * 14 + 1] >> SCALEFACTOR10); + x4 = (x[s * 18 + 1] >> SCALEFACTOR10); + + s1 = (x1 + x4); + s4 = (x1 - x4); + s3 = (x3 + x2); + s2 = (x3 - x2); + t = fMult((s1 - s3), C54); + s1 = (s1 + s3); + y03 = (x0 + s1); + s1 = (y03 + (fMult(s1, C55) << 1)); + s3 = (s1 - t); + s1 = (s1 + t); + t = fMult((s4 + s2), C51); + s4 = (t + (fMult(s4, C52) << 1)); + s2 = (t + fMult(s2, C53)); + + /* combination */ + y06 = (r1 + s2); + y18 = (r1 - s2); + y10 = (r3 - s4); + y14 = (r3 + s4); + + y07 = (s1 - r2); + y19 = (s1 + r2); + y11 = (s3 + r4); + y15 = (s3 - r4); + + /* 5 fft2 stages */ + x[s * 0] = (y00 + y02); + x[s * 0 + 1] = (y01 + y03); + x[s * 10] = (y00 - y02); + x[s * 10 + 1] = (y01 - y03); + + x[s * 4] = (y04 + y06); + x[s * 4 + 1] = (y05 + y07); + x[s * 14] = (y04 - y06); + x[s * 14 + 1] = (y05 - y07); + + x[s * 8] = (y08 + y10); + x[s * 8 + 1] = (y09 + y11); + x[s * 18] = (y08 - y10); + x[s * 18 + 1] = (y09 - y11); + + x[s * 12] = (y12 + y14); + x[s * 12 + 1] = (y13 + y15); + x[s * 2] = (y12 - y14); + x[s * 2 + 1] = (y13 - y15); + + x[s * 16] = (y16 + y18); + x[s * 16 + 1] = (y17 + y19); + x[s * 6] = (y16 - y18); + x[s * 6 + 1] = (y17 - y19); +} + +#ifndef FUNCTION_fft12 +#define FUNCTION_fft12 + +#undef C31 +#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2 */ + +static inline void fft12(FIXP_DBL *pInput) { + FIXP_DBL aDst[24]; + FIXP_DBL *pSrc, *pDst; + int i; + + pSrc = pInput; + pDst = aDst; + FIXP_DBL r1, r2, s1, s2, pD; + + /* First 3*2 samples are shifted right by 2 before output */ + r1 = pSrc[8] + pSrc[16]; + r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31); + pD = pSrc[0] >> 1; + pDst[0] = (pD + (r1 >> 1)) >> 1; + r1 = pD - (r1 >> 2); + + /* imaginary part */ + s1 = pSrc[9] + pSrc[17]; + s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31); + pD = pSrc[1] >> 1; + pDst[1] = (pD + (s1 >> 1)) >> 1; + s1 = pD - (s1 >> 2); + + /* combination */ + pDst[2] = (r1 - s2) >> 1; + pDst[3] = (s1 + r2) >> 1; + pDst[4] = (r1 + s2) >> 1; + pDst[5] = (s1 - r2) >> 1; + pSrc += 2; + pDst += 6; + + const FIXP_STB *pVecRe = RotVectorReal12; + const FIXP_STB *pVecIm = RotVectorImag12; + FIXP_DBL re, im; + FIXP_STB vre, vim; + for (i = 0; i < 2; i++) { + /* sample 0,1 are shifted right by 2 before output */ + /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before + * output */ + + r1 = pSrc[8] + pSrc[16]; + r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31); + pD = pSrc[0] >> 1; + pDst[0] = (pD + (r1 >> 1)) >> 1; + r1 = pD - (r1 >> 2); + + /* imaginary part */ + s1 = pSrc[9] + pSrc[17]; + s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31); + pD = pSrc[1] >> 1; + pDst[1] = (pD + (s1 >> 1)) >> 1; + s1 = pD - (s1 >> 2); + + /* combination */ + re = (r1 - s2) >> 0; + im = (s1 + r2) >> 0; + vre = *pVecRe++; + vim = *pVecIm++; + cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim); + + re = (r1 + s2) >> 0; + im = (s1 - r2) >> 0; + vre = *pVecRe++; + vim = *pVecIm++; + cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim); + + pDst += 6; + pSrc += 2; + } + /* sample 0,1 are shifted right by 2 before output */ + /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */ + /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */ + r1 = pSrc[8] + pSrc[16]; + r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31); + pD = pSrc[0] >> 1; + pDst[0] = (pD + (r1 >> 1)) >> 1; + r1 = pD - (r1 >> 2); + + /* imaginary part */ + s1 = pSrc[9] + pSrc[17]; + s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31); + pD = pSrc[1] >> 1; + pDst[1] = (pD + (s1 >> 1)) >> 1; + s1 = pD - (s1 >> 2); + + /* combination */ + pDst[2] = (s1 + r2) >> 1; + pDst[3] = (s2 - r1) >> 1; + pDst[4] = -((r1 + s2) >> 1); + pDst[5] = (r2 - s1) >> 1; + + /* Perform 3 times the fft of length 4. The input samples are at the address + of aDst and the output samples are at the address of pInput. The input vector + for the fft of length 4 is built of the interleaved samples in aDst, the + output samples are stored consecutively at the address of pInput. + */ + pSrc = aDst; + pDst = pInput; + for (i = 0; i < 3; i++) { + /* inline FFT4 merged with incoming resorting loop */ + FIXP_DBL a00, a10, a20, a30, tmp0, tmp1; + + a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */ + a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */ + a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */ + a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */ + + pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */ + pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */ + + tmp0 = a00 - pSrc[12]; /* Re A - Re B */ + tmp1 = a20 - pSrc[13]; /* Im A - Im B */ + + pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */ + pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */ + + a10 = a10 - pSrc[18]; /* Re C - Re D */ + a30 = a30 - pSrc[19]; /* Im C - Im D */ + + pDst[6] = tmp0 + a30; /* Re B' = Re A - Re B + Im C - Im D */ + pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */ + pDst[7] = tmp1 - a10; /* Im B' = Im A - Im B - Re C + Re D */ + pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */ + + pSrc += 2; + pDst += 2; + } +} +#endif /* FUNCTION_fft12 */ + +#ifndef FUNCTION_fft15 + +#define N3 3 +#define N5 5 +#define N6 6 +#define N15 15 + +/* Performs the FFT of length 15. It is split into FFTs of length 3 and + * length 5. */ +static inline void fft15(FIXP_DBL *pInput) { + FIXP_DBL aDst[2 * N15]; + FIXP_DBL aDst1[2 * N15]; + int i, k, l; /* Sort input vector for fft's of length 3 input3(0:2) = [input(0) input(5) input(10)]; @@ -217,47 +615,43 @@ static inline void fft15(FIXP_DBL *pInput) const FIXP_DBL *pSrc = pInput; FIXP_DBL *RESTRICT pDst = aDst; /* Merge 3 loops into one, skip call of fft3 */ - for(i=0,l=0,k=0; i<N5; i++, k+=6) - { - pDst[k+0] = pSrc[l]; - pDst[k+1] = pSrc[l+1]; - l += 2*N5; - if (l >= (2*N15)) - l -= (2*N15); - - pDst[k+2] = pSrc[l]; - pDst[k+3] = pSrc[l+1]; - l += 2*N5; - if (l >= (2*N15)) - l -= (2*N15); - pDst[k+4] = pSrc[l]; - pDst[k+5] = pSrc[l+1]; - l += (2*N5) + (2*N3); - if (l >= (2*N15)) - l -= (2*N15); + for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) { + pDst[k + 0] = pSrc[l]; + pDst[k + 1] = pSrc[l + 1]; + l += 2 * N5; + if (l >= (2 * N15)) l -= (2 * N15); + + pDst[k + 2] = pSrc[l]; + pDst[k + 3] = pSrc[l + 1]; + l += 2 * N5; + if (l >= (2 * N15)) l -= (2 * N15); + pDst[k + 4] = pSrc[l]; + pDst[k + 5] = pSrc[l + 1]; + l += (2 * N5) + (2 * N3); + if (l >= (2 * N15)) l -= (2 * N15); /* fft3 merged with shift right by 2 loop */ - FIXP_DBL r1,r2,r3; - FIXP_DBL s1,s2; + FIXP_DBL r1, r2, r3; + FIXP_DBL s1, s2; /* real part */ - r1 = pDst[k+2] + pDst[k+4]; - r2 = fMult((pDst[k+2] - pDst[k+4]), C31); - s1 = pDst[k+0]; - pDst[k+0] = (s1 + r1)>>2; - r1 = s1 - (r1>>1); + r1 = pDst[k + 2] + pDst[k + 4]; + r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31); + s1 = pDst[k + 0]; + pDst[k + 0] = (s1 + r1) >> 2; + r1 = s1 - (r1 >> 1); /* imaginary part */ - s1 = pDst[k+3] + pDst[k+5]; - s2 = fMult((pDst[k+3] - pDst[k+5]), C31); - r3 = pDst[k+1]; - pDst[k+1] = (r3 + s1)>>2; - s1 = r3 - (s1>>1); + s1 = pDst[k + 3] + pDst[k + 5]; + s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31); + r3 = pDst[k + 1]; + pDst[k + 1] = (r3 + s1) >> 2; + s1 = r3 - (s1 >> 1); /* combination */ - pDst[k+2] = (r1 - s2)>>2; - pDst[k+4] = (r1 + s2)>>2; - pDst[k+3] = (s1 + r2)>>2; - pDst[k+5] = (s1 - r2)>>2; + pDst[k + 2] = (r1 - s2) >> 2; + pDst[k + 4] = (r1 + s2) >> 2; + pDst[k + 3] = (s1 + r2) >> 2; + pDst[k + 5] = (s1 - r2) >> 2; } } /* Sort input vector for fft's of length 5 @@ -268,19 +662,18 @@ static inline void fft15(FIXP_DBL *pInput) { const FIXP_DBL *pSrc = aDst; FIXP_DBL *RESTRICT pDst = aDst1; - for(i=0,l=0,k=0; i<N3; i++, k+=10) - { - l = 2*i; - pDst[k+0] = pSrc[l+0]; - pDst[k+1] = pSrc[l+1]; - pDst[k+2] = pSrc[l+0+(2*N3)]; - pDst[k+3] = pSrc[l+1+(2*N3)]; - pDst[k+4] = pSrc[l+0+(4*N3)]; - pDst[k+5] = pSrc[l+1+(4*N3)]; - pDst[k+6] = pSrc[l+0+(6*N3)]; - pDst[k+7] = pSrc[l+1+(6*N3)]; - pDst[k+8] = pSrc[l+0+(8*N3)]; - pDst[k+9] = pSrc[l+1+(8*N3)]; + for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) { + l = 2 * i; + pDst[k + 0] = pSrc[l + 0]; + pDst[k + 1] = pSrc[l + 1]; + pDst[k + 2] = pSrc[l + 0 + (2 * N3)]; + pDst[k + 3] = pSrc[l + 1 + (2 * N3)]; + pDst[k + 4] = pSrc[l + 0 + (4 * N3)]; + pDst[k + 5] = pSrc[l + 1 + (4 * N3)]; + pDst[k + 6] = pSrc[l + 0 + (6 * N3)]; + pDst[k + 7] = pSrc[l + 1 + (6 * N3)]; + pDst[k + 8] = pSrc[l + 0 + (8 * N3)]; + pDst[k + 9] = pSrc[l + 1 + (8 * N3)]; fft5(&pDst[k]); } } @@ -292,1112 +685,1238 @@ static inline void fft15(FIXP_DBL *pInput) { const FIXP_DBL *pSrc = aDst1; FIXP_DBL *RESTRICT pDst = pInput; - for(i=0,l=0,k=0; i<N3; i++, k+=10) - { - pDst[k+0] = pSrc[l]; - pDst[k+1] = pSrc[l+1]; - l += (2*N6); - if (l >= (2*N15)) - l -= (2*N15); - pDst[k+2] = pSrc[l]; - pDst[k+3] = pSrc[l+1]; - l += (2*N6); - if (l >= (2*N15)) - l -= (2*N15); - pDst[k+4] = pSrc[l]; - pDst[k+5] = pSrc[l+1]; - l += (2*N6); - if (l >= (2*N15)) - l -= (2*N15); - pDst[k+6] = pSrc[l]; - pDst[k+7] = pSrc[l+1]; - l += (2*N6); - if (l >= (2*N15)) - l -= (2*N15); - pDst[k+8] = pSrc[l]; - pDst[k+9] = pSrc[l+1]; - l += 2; /* no modulo check needed, it cannot occur */ + for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) { + pDst[k + 0] = pSrc[l]; + pDst[k + 1] = pSrc[l + 1]; + l += (2 * N6); + if (l >= (2 * N15)) l -= (2 * N15); + pDst[k + 2] = pSrc[l]; + pDst[k + 3] = pSrc[l + 1]; + l += (2 * N6); + if (l >= (2 * N15)) l -= (2 * N15); + pDst[k + 4] = pSrc[l]; + pDst[k + 5] = pSrc[l + 1]; + l += (2 * N6); + if (l >= (2 * N15)) l -= (2 * N15); + pDst[k + 6] = pSrc[l]; + pDst[k + 7] = pSrc[l + 1]; + l += (2 * N6); + if (l >= (2 * N15)) l -= (2 * N15); + pDst[k + 8] = pSrc[l]; + pDst[k + 9] = pSrc[l + 1]; + l += 2; /* no modulo check needed, it cannot occur */ } } } +#endif /* FUNCTION_fft15 */ -#define W_PiFOURTH STC(0x5a82799a) -#ifndef SUMDIFF_PIFOURTH -#define SUMDIFF_PIFOURTH(diff,sum,a,b) \ - { \ - FIXP_DBL wa, wb;\ - wa = fMultDiv2(a, W_PiFOURTH);\ - wb = fMultDiv2(b, W_PiFOURTH);\ - diff = wb - wa;\ - sum = wb + wa;\ - } +/* + Select shift placement. + Some processors like ARM may shift "for free" in combination with an addition + or substraction, but others don't so either combining shift with +/- or reduce + the total amount or shift operations is optimal + */ +#if !defined(__arm__) +#define SHIFT_A >> 1 +#define SHIFT_B +#else +#define SHIFT_A +#define SHIFT_B >> 1 #endif -/* This version is more overflow save, but less cycle optimal. */ -#define SUMDIFF_EIGTH(x, y, ix, iy, vr, vi, ur, ui) \ - vr = (x[ 0 + ix]>>1) + (x[16 + ix]>>1); /* Re A + Re B */ \ - vi = (x[ 8 + ix]>>1) + (x[24 + ix]>>1); /* Re C + Re D */ \ - ur = (x[ 1 + ix]>>1) + (x[17 + ix]>>1); /* Im A + Im B */ \ - ui = (x[ 9 + ix]>>1) + (x[25 + ix]>>1); /* Im C + Im D */ \ - y[ 0 + iy] = vr + vi; /* Re A' = ReA + ReB +ReC + ReD */ \ - y[ 4 + iy] = vr - vi; /* Re C' = -(ReC+ReD) + (ReA+ReB) */ \ - y[ 1 + iy] = ur + ui; /* Im A' = sum of imag values */ \ - y[ 5 + iy] = ur - ui; /* Im C' = -Im C -Im D +Im A +Im B */ \ - vr -= x[16 + ix]; /* Re A - Re B */ \ - vi = vi - x[24 + ix]; /* Re C - Re D */ \ - ur -= x[17 + ix]; /* Im A - Im B */ \ - ui = ui - x[25 + ix]; /* Im C - Im D */ \ - y[ 2 + iy] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ \ - y[ 6 + iy] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ \ - y[ 3 + iy] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ \ - y[ 7 + iy] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ - -static const FIXP_STP fft16_w16[2] = { STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d) }; +#ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \ + */ + +/* This defines prevents this array to be declared twice, if 16-bit fft is + * enabled too */ +#define FUNCTION_DATA_fft_16_w16 +static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d), + STCP(0x30fbc54d, 0x7641af3d)}; LNK_SECTION_CODE_L1 -inline void fft_16(FIXP_DBL *RESTRICT x) -{ - FIXP_DBL vr, vi, ur, ui; - FIXP_DBL y[32]; - - SUMDIFF_EIGTH(x, y, 0, 0, vr, vi, ur, ui); - SUMDIFF_EIGTH(x, y, 4, 8, vr, vi, ur, ui); - SUMDIFF_EIGTH(x, y, 2, 16, vr, vi, ur, ui); - SUMDIFF_EIGTH(x, y, 6, 24, vr, vi, ur, ui); - -// xt1 = 0 -// xt2 = 8 - vr = y[ 8]; - vi = y[ 9]; - ur = y[ 0]>>1; - ui = y[ 1]>>1; - x[ 0] = ur + (vr>>1); - x[ 1] = ui + (vi>>1); - x[ 8] = ur - (vr>>1); - x[ 9] = ui - (vi>>1); - -// xt1 = 4 -// xt2 = 12 - vr = y[13]; - vi = y[12]; - ur = y[ 4]>>1; - ui = y[ 5]>>1; - x[ 4] = ur + (vr>>1); - x[ 5] = ui - (vi>>1); - x[12] = ur - (vr>>1); - x[13] = ui + (vi>>1); - -// xt1 = 16 -// xt2 = 24 - vr = y[24]; - vi = y[25]; - ur = y[16]>>1; - ui = y[17]>>1; - x[16] = ur + (vr>>1); - x[17] = ui + (vi>>1); - x[24] = ur - (vr>>1); - x[25] = ui - (vi>>1); - -// xt1 = 20 -// xt2 = 28 - vr = y[29]; - vi = y[28]; - ur = y[20]>>1; - ui = y[21]>>1; - x[20] = ur + (vr>>1); - x[21] = ui - (vi>>1); - x[28] = ur - (vr>>1); - x[29] = ui + (vi>>1); +inline void fft_16(FIXP_DBL *RESTRICT x) { + FIXP_DBL vr, ur; + FIXP_DBL vr2, ur2; + FIXP_DBL vr3, ur3; + FIXP_DBL vr4, ur4; + FIXP_DBL vi, ui; + FIXP_DBL vi2, ui2; + FIXP_DBL vi3, ui3; + + vr = (x[0] >> 1) + (x[16] >> 1); /* Re A + Re B */ + ur = (x[1] >> 1) + (x[17] >> 1); /* Im A + Im B */ + vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */ + ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */ + x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */ + + vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */ + ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */ + + x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ + vr -= x[16]; /* Re A - Re B */ + vi = (vi SHIFT_B)-x[24]; /* Re C - Re D */ + ur -= x[17]; /* Im A - Im B */ + ui = (ui SHIFT_B)-x[25]; /* Im C - Im D */ + + vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */ + ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */ + + x[2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ + x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ + + vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */ + ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */ + + x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ + x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ + + vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */ + ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */ + x[8] = vr2 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[9] = ur2 + (ui2 SHIFT_B); /* Im A' = sum of imag values */ + x[12] = vr2 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[13] = ur2 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ + vr2 -= x[20]; /* Re A - Re B */ + ur2 -= x[21]; /* Im A - Im B */ + vi2 = (vi2 SHIFT_B)-x[28]; /* Re C - Re D */ + ui2 = (ui2 SHIFT_B)-x[29]; /* Im C - Im D */ + + vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */ + ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */ + + x[10] = ui2 + vr2; /* Re B' = Im C - Im D + Re A - Re B */ + x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */ + + vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */ + ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */ + + x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */ + x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */ + + x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */ + x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ + vr3 -= x[18]; /* Re A - Re B */ + ur3 -= x[19]; /* Im A - Im B */ + vi = (vi SHIFT_B)-x[26]; /* Re C - Re D */ + ui = (ui SHIFT_B)-x[27]; /* Im C - Im D */ + x[18] = ui + vr3; /* Re B' = Im C - Im D + Re A - Re B */ + x[19] = ur3 - vi; /* Im B'= -Re C + Re D + Im A - Im B */ + + x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */ + x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ + vr4 -= x[22]; /* Re A - Re B */ + ur4 -= x[23]; /* Im A - Im B */ + + x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */ + x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */ + + vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */ + ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */ + x[26] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */ + x[30] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */ + x[27] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */ + x[31] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */ + + // xt1 = 0 + // xt2 = 8 + vr = x[8]; + vi = x[9]; + ur = x[0] >> 1; + ui = x[1] >> 1; + x[0] = ur + (vr >> 1); + x[1] = ui + (vi >> 1); + x[8] = ur - (vr >> 1); + x[9] = ui - (vi >> 1); + + // xt1 = 4 + // xt2 = 12 + vr = x[13]; + vi = x[12]; + ur = x[4] >> 1; + ui = x[5] >> 1; + x[4] = ur + (vr >> 1); + x[5] = ui - (vi >> 1); + x[12] = ur - (vr >> 1); + x[13] = ui + (vi >> 1); + + // xt1 = 16 + // xt2 = 24 + vr = x[24]; + vi = x[25]; + ur = x[16] >> 1; + ui = x[17] >> 1; + x[16] = ur + (vr >> 1); + x[17] = ui + (vi >> 1); + x[24] = ur - (vr >> 1); + x[25] = ui - (vi >> 1); + + // xt1 = 20 + // xt2 = 28 + vr = x[29]; + vi = x[28]; + ur = x[20] >> 1; + ui = x[21] >> 1; + x[20] = ur + (vr >> 1); + x[21] = ui - (vi >> 1); + x[28] = ur - (vr >> 1); + x[29] = ui + (vi >> 1); // xt1 = 2 -// xt2 = 10 - SUMDIFF_PIFOURTH(vi, vr, y[10], y[11]) - //vr = fMultDiv2((y[11] + y[10]),W_PiFOURTH); - //vi = fMultDiv2((y[11] - y[10]),W_PiFOURTH); - ur = y[ 2]; - ui = y[ 3]; - x[ 2] = (ur>>1) + vr; - x[ 3] = (ui>>1) + vi; - x[10] = (ur>>1) - vr; - x[11] = (ui>>1) - vi; - -// xt1 = 6 -// xt2 = 14 - SUMDIFF_PIFOURTH(vr, vi, y[14], y[15]) - ur = y[ 6]; - ui = y[ 7]; - x[ 6] = (ur>>1) + vr; - x[ 7] = (ui>>1) - vi; - x[14] = (ur>>1) - vr; - x[15] = (ui>>1) + vi; - -// xt1 = 18 -// xt2 = 26 - SUMDIFF_PIFOURTH(vi, vr, y[26], y[27]) - ur = y[18]; - ui = y[19]; - x[18] = (ur>>1) + vr; - x[19] = (ui>>1) + vi; - x[26] = (ur>>1) - vr; - x[27] = (ui>>1) - vi; - -// xt1 = 22 -// xt2 = 30 - SUMDIFF_PIFOURTH(vr, vi, y[30], y[31]) - ur = y[22]; - ui = y[23]; - x[22] = (ur>>1) + vr; - x[23] = (ui>>1) - vi; - x[30] = (ur>>1) - vr; - x[31] = (ui>>1) + vi; - -// xt1 = 0 -// xt2 = 16 + // xt2 = 10 + SUMDIFF_PIFOURTH(vi, vr, x[10], x[11]) + // vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH); + // vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH); + ur = x[2]; + ui = x[3]; + x[2] = (ur >> 1) + vr; + x[3] = (ui >> 1) + vi; + x[10] = (ur >> 1) - vr; + x[11] = (ui >> 1) - vi; + + // xt1 = 6 + // xt2 = 14 + SUMDIFF_PIFOURTH(vr, vi, x[14], x[15]) + ur = x[6]; + ui = x[7]; + x[6] = (ur >> 1) + vr; + x[7] = (ui >> 1) - vi; + x[14] = (ur >> 1) - vr; + x[15] = (ui >> 1) + vi; + + // xt1 = 18 + // xt2 = 26 + SUMDIFF_PIFOURTH(vi, vr, x[26], x[27]) + ur = x[18]; + ui = x[19]; + x[18] = (ur >> 1) + vr; + x[19] = (ui >> 1) + vi; + x[26] = (ur >> 1) - vr; + x[27] = (ui >> 1) - vi; + + // xt1 = 22 + // xt2 = 30 + SUMDIFF_PIFOURTH(vr, vi, x[30], x[31]) + ur = x[22]; + ui = x[23]; + x[22] = (ur >> 1) + vr; + x[23] = (ui >> 1) - vi; + x[30] = (ur >> 1) - vr; + x[31] = (ui >> 1) + vi; + + // xt1 = 0 + // xt2 = 16 vr = x[16]; vi = x[17]; - ur = x[ 0]>>1; - ui = x[ 1]>>1; - x[ 0] = ur + (vr>>1); - x[ 1] = ui + (vi>>1); - x[16] = ur - (vr>>1); - x[17] = ui - (vi>>1); - -// xt1 = 8 -// xt2 = 24 + ur = x[0] >> 1; + ui = x[1] >> 1; + x[0] = ur + (vr >> 1); + x[1] = ui + (vi >> 1); + x[16] = ur - (vr >> 1); + x[17] = ui - (vi >> 1); + + // xt1 = 8 + // xt2 = 24 vi = x[24]; vr = x[25]; - ur = x[ 8]>>1; - ui = x[ 9]>>1; - x[ 8] = ur + (vr>>1); - x[ 9] = ui - (vi>>1); - x[24] = ur - (vr>>1); - x[25] = ui + (vi>>1); - -// xt1 = 2 -// xt2 = 18 + ur = x[8] >> 1; + ui = x[9] >> 1; + x[8] = ur + (vr >> 1); + x[9] = ui - (vi >> 1); + x[24] = ur - (vr >> 1); + x[25] = ui + (vi >> 1); + + // xt1 = 2 + // xt2 = 18 cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]); - ur = x[ 2]; - ui = x[ 3]; - x[ 2] = (ur>>1) + vr; - x[ 3] = (ui>>1) + vi; - x[18] = (ur>>1) - vr; - x[19] = (ui>>1) - vi; - -// xt1 = 10 -// xt2 = 26 + ur = x[2]; + ui = x[3]; + x[2] = (ur >> 1) + vr; + x[3] = (ui >> 1) + vi; + x[18] = (ur >> 1) - vr; + x[19] = (ui >> 1) - vi; + + // xt1 = 10 + // xt2 = 26 cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]); ur = x[10]; ui = x[11]; - x[10] = (ur>>1) + vr; - x[11] = (ui>>1) - vi; - x[26] = (ur>>1) - vr; - x[27] = (ui>>1) + vi; + x[10] = (ur >> 1) + vr; + x[11] = (ui >> 1) - vi; + x[26] = (ur >> 1) - vr; + x[27] = (ui >> 1) + vi; -// xt1 = 4 -// xt2 = 20 + // xt1 = 4 + // xt2 = 20 SUMDIFF_PIFOURTH(vi, vr, x[20], x[21]) - ur = x[ 4]; - ui = x[ 5]; - x[ 4] = (ur>>1) + vr; - x[ 5] = (ui>>1) + vi; - x[20] = (ur>>1) - vr; - x[21] = (ui>>1) - vi; - -// xt1 = 12 -// xt2 = 28 + ur = x[4]; + ui = x[5]; + x[4] = (ur >> 1) + vr; + x[5] = (ui >> 1) + vi; + x[20] = (ur >> 1) - vr; + x[21] = (ui >> 1) - vi; + + // xt1 = 12 + // xt2 = 28 SUMDIFF_PIFOURTH(vr, vi, x[28], x[29]) ur = x[12]; ui = x[13]; - x[12] = (ur>>1) + vr; - x[13] = (ui>>1) - vi; - x[28] = (ur>>1) - vr; - x[29] = (ui>>1) + vi; + x[12] = (ur >> 1) + vr; + x[13] = (ui >> 1) - vi; + x[28] = (ur >> 1) - vr; + x[29] = (ui >> 1) + vi; -// xt1 = 6 -// xt2 = 22 + // xt1 = 6 + // xt2 = 22 cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]); - ur = x[ 6]; - ui = x[ 7]; - x[ 6] = (ur>>1) + vr; - x[ 7] = (ui>>1) + vi; - x[22] = (ur>>1) - vr; - x[23] = (ui>>1) - vi; - -// xt1 = 14 -// xt2 = 30 + ur = x[6]; + ui = x[7]; + x[6] = (ur >> 1) + vr; + x[7] = (ui >> 1) + vi; + x[22] = (ur >> 1) - vr; + x[23] = (ui >> 1) - vi; + + // xt1 = 14 + // xt2 = 30 cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]); ur = x[14]; ui = x[15]; - x[14] = (ur>>1) + vr; - x[15] = (ui>>1) - vi; - x[30] = (ur>>1) - vr; - x[31] = (ui>>1) + vi; + x[14] = (ur >> 1) + vr; + x[15] = (ui >> 1) - vi; + x[30] = (ur >> 1) - vr; + x[31] = (ui >> 1) + vi; } +#endif /* FUNCTION_fft_16 */ #ifndef FUNCTION_fft_32 -static const FIXP_STP fft32_w32[6] = -{ - STCP (0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d), STCP(0x7d8a5f40, 0x18f8b83c), - STCP (0x6a6d98a4, 0x471cece7), STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40) -}; - -LNK_SECTION_CODE_L1 -inline void fft_32(FIXP_DBL *x) -{ - +static const FIXP_STP fft32_w32[6] = { + STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d), + STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7), + STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)}; #define W_PiFOURTH STC(0x5a82799a) - FIXP_DBL vr,vi,ur,ui; - FIXP_DBL y[64]; - +LNK_SECTION_CODE_L1 +inline void fft_32(FIXP_DBL *const _x) { /* * 1+2 stage radix 4 */ -///////////////////////////////////////////////////////////////////////////////////////// - - // i = 0 - vr = (x[ 0] + x[32])>>1; /* Re A + Re B */ - vi = (x[16] + x[48]); /* Re C + Re D */ - ur = (x[ 1] + x[33])>>1; /* Im A + Im B */ - ui = (x[17] + x[49]); /* Im C + Im D */ - - y[ 0] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ - y[ 4] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ - y[ 1] = ur + (ui>>1); /* Im A' = sum of imag values */ - y[ 5] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ - - vr -= x[32]; /* Re A - Re B */ - vi = (vi>>1) - x[48]; /* Re C - Re D */ - ur -= x[33]; /* Im A - Im B */ - ui = (ui>>1) - x[49]; /* Im C - Im D */ - - y[ 2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ - y[ 6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ - y[ 3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ - y[ 7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ - - //i=8 - vr = (x[ 8] + x[40])>>1; /* Re A + Re B */ - vi = (x[24] + x[56]); /* Re C + Re D */ - ur = (x[ 9] + x[41])>>1; /* Im A + Im B */ - ui = (x[25] + x[57]); /* Im C + Im D */ - - y[ 8] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ - y[12] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ - y[ 9] = ur + (ui>>1); /* Im A' = sum of imag values */ - y[13] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ - - vr -= x[40]; /* Re A - Re B */ - vi = (vi>>1) - x[56]; /* Re C - Re D */ - ur -= x[41]; /* Im A - Im B */ - ui = (ui>>1) - x[57]; /* Im C - Im D */ - - y[10] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ - y[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ - y[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ - y[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ - - //i=16 - vr = (x[ 4] + x[36])>>1; /* Re A + Re B */ - vi = (x[20] + x[52]); /* Re C + Re D */ - ur = (x[ 5] + x[37])>>1; /* Im A + Im B */ - ui = (x[21] + x[53]); /* Im C + Im D */ - - y[16] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ - y[20] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ - y[17] = ur + (ui>>1); /* Im A' = sum of imag values */ - y[21] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ - - vr -= x[36]; /* Re A - Re B */ - vi = (vi>>1) - x[52]; /* Re C - Re D */ - ur -= x[37]; /* Im A - Im B */ - ui = (ui>>1) - x[53]; /* Im C - Im D */ - - y[18] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ - y[22] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ - y[19] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ - y[23] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ - - //i=24 - vr = (x[12] + x[44])>>1; /* Re A + Re B */ - vi = (x[28] + x[60]); /* Re C + Re D */ - ur = (x[13] + x[45])>>1; /* Im A + Im B */ - ui = (x[29] + x[61]); /* Im C + Im D */ - - y[24] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ - y[28] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ - y[25] = ur + (ui>>1); /* Im A' = sum of imag values */ - y[29] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ - - vr -= x[44]; /* Re A - Re B */ - vi = (vi>>1) - x[60]; /* Re C - Re D */ - ur -= x[45]; /* Im A - Im B */ - ui = (ui>>1) - x[61]; /* Im C - Im D */ - - y[26] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ - y[30] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ - y[27] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ - y[31] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ - - // i = 32 - vr = (x[ 2] + x[34])>>1; /* Re A + Re B */ - vi = (x[18] + x[50]); /* Re C + Re D */ - ur = (x[ 3] + x[35])>>1; /* Im A + Im B */ - ui = (x[19] + x[51]); /* Im C + Im D */ - - y[32] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ - y[36] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ - y[33] = ur + (ui>>1); /* Im A' = sum of imag values */ - y[37] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ - - vr -= x[34]; /* Re A - Re B */ - vi = (vi>>1) - x[50]; /* Re C - Re D */ - ur -= x[35]; /* Im A - Im B */ - ui = (ui>>1) - x[51]; /* Im C - Im D */ - - y[34] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ - y[38] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ - y[35] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ - y[39] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ - - //i=40 - vr = (x[10] + x[42])>>1; /* Re A + Re B */ - vi = (x[26] + x[58]); /* Re C + Re D */ - ur = (x[11] + x[43])>>1; /* Im A + Im B */ - ui = (x[27] + x[59]); /* Im C + Im D */ - - y[40] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ - y[44] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ - y[41] = ur + (ui>>1); /* Im A' = sum of imag values */ - y[45] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ - - vr -= x[42]; /* Re A - Re B */ - vi = (vi>>1) - x[58]; /* Re C - Re D */ - ur -= x[43]; /* Im A - Im B */ - ui = (ui>>1) - x[59]; /* Im C - Im D */ - - y[42] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ - y[46] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ - y[43] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ - y[47] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ - - //i=48 - vr = (x[ 6] + x[38])>>1; /* Re A + Re B */ - vi = (x[22] + x[54]); /* Re C + Re D */ - ur = (x[ 7] + x[39])>>1; /* Im A + Im B */ - ui = (x[23] + x[55]); /* Im C + Im D */ - - y[48] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ - y[52] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ - y[49] = ur + (ui>>1); /* Im A' = sum of imag values */ - y[53] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ - - vr -= x[38]; /* Re A - Re B */ - vi = (vi>>1) - x[54]; /* Re C - Re D */ - ur -= x[39]; /* Im A - Im B */ - ui = (ui>>1) - x[55]; /* Im C - Im D */ - - y[50] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ - y[54] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ - y[51] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ - y[55] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ - - //i=56 - vr = (x[14] + x[46])>>1; /* Re A + Re B */ - vi = (x[30] + x[62]); /* Re C + Re D */ - ur = (x[15] + x[47])>>1; /* Im A + Im B */ - ui = (x[31] + x[63]); /* Im C + Im D */ - - y[56] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ - y[60] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ - y[57] = ur + (ui>>1); /* Im A' = sum of imag values */ - y[61] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ - - vr -= x[46]; /* Re A - Re B */ - vi = (vi>>1) - x[62]; /* Re C - Re D */ - ur -= x[47]; /* Im A - Im B */ - ui = (ui>>1) - x[63]; /* Im C - Im D */ - - y[58] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ - y[62] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ - y[59] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ - y[63] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ - - - FIXP_DBL *xt = &x[0]; - FIXP_DBL *yt = &y[0]; - - int j = 4; - do + ///////////////////////////////////////////////////////////////////////////////////////// { - vr = yt[8]; - vi = yt[9]; - ur = yt[0]>>1; - ui = yt[1]>>1; - xt[ 0] = ur + (vr>>1); - xt[ 1] = ui + (vi>>1); - xt[ 8] = ur - (vr>>1); - xt[ 9] = ui - (vi>>1); - - vr = yt[13]; - vi = yt[12]; - ur = yt[4]>>1; - ui = yt[5]>>1; - xt[ 4] = ur + (vr>>1); - xt[ 5] = ui - (vi>>1); - xt[12] = ur - (vr>>1); - xt[13] = ui + (vi>>1); - - SUMDIFF_PIFOURTH(vi, vr, yt[10], yt[11]) - ur = yt[2]; - ui = yt[3]; - xt[ 2] = (ur>>1) + vr; - xt[ 3] = (ui>>1) + vi; - xt[10] = (ur>>1) - vr; - xt[11] = (ui>>1) - vi; - - SUMDIFF_PIFOURTH(vr, vi, yt[14], yt[15]) - ur = yt[6]; - ui = yt[7]; - - xt[ 6] = (ur>>1) + vr; - xt[ 7] = (ui>>1) - vi; - xt[14] = (ur>>1) - vr; - xt[15] = (ui>>1) + vi; - xt += 16; - yt += 16; - } while (--j != 0); + FIXP_DBL *const x = _x; + FIXP_DBL vi, ui; + FIXP_DBL vi2, ui2; + FIXP_DBL vi3, ui3; + FIXP_DBL vr, ur; + FIXP_DBL vr2, ur2; + FIXP_DBL vr3, ur3; + FIXP_DBL vr4, ur4; - vr = x[16]; - vi = x[17]; - ur = x[ 0]>>1; - ui = x[ 1]>>1; - x[ 0] = ur + (vr>>1); - x[ 1] = ui + (vi>>1); - x[16] = ur - (vr>>1); - x[17] = ui - (vi>>1); + // i = 0 + vr = (x[0] + x[32]) >> 1; /* Re A + Re B */ + ur = (x[1] + x[33]) >> 1; /* Im A + Im B */ + vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */ + ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */ - vi = x[24]; - vr = x[25]; - ur = x[ 8]>>1; - ui = x[ 9]>>1; - x[ 8] = ur + (vr>>1); - x[ 9] = ui - (vi>>1); - x[24] = ur - (vr>>1); - x[25] = ui + (vi>>1); - - vr = x[48]; - vi = x[49]; - ur = x[32]>>1; - ui = x[33]>>1; - x[32] = ur + (vr>>1); - x[33] = ui + (vi>>1); - x[48] = ur - (vr>>1); - x[49] = ui - (vi>>1); - - vi = x[56]; - vr = x[57]; - ur = x[40]>>1; - ui = x[41]>>1; - x[40] = ur + (vr>>1); - x[41] = ui - (vi>>1); - x[56] = ur - (vr>>1); - x[57] = ui + (vi>>1); - - cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]); - ur = x[ 2]; - ui = x[ 3]; - x[ 2] = (ur>>1) + vr; - x[ 3] = (ui>>1) + vi; - x[18] = (ur>>1) - vr; - x[19] = (ui>>1) - vi; - - cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]); - ur = x[10]; - ui = x[11]; - x[10] = (ur>>1) + vr; - x[11] = (ui>>1) - vi; - x[26] = (ur>>1) - vr; - x[27] = (ui>>1) + vi; - - cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]); - ur = x[34]; - ui = x[35]; - x[34] = (ur>>1) + vr; - x[35] = (ui>>1) + vi; - x[50] = (ur>>1) - vr; - x[51] = (ui>>1) - vi; - - cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]); - ur = x[42]; - ui = x[43]; - x[42] = (ur>>1) + vr; - x[43] = (ui>>1) - vi; - x[58] = (ur>>1) - vr; - x[59] = (ui>>1) + vi; + x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */ - SUMDIFF_PIFOURTH(vi, vr, x[20], x[21]) - ur = x[ 4]; - ui = x[ 5]; - x[ 4] = (ur>>1) + vr; - x[ 5] = (ui>>1) + vi; - x[20] = (ur>>1) - vr; - x[21] = (ui>>1) - vi; + vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */ + ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */ - SUMDIFF_PIFOURTH(vr, vi, x[28], x[29]) - ur = x[12]; - ui = x[13]; - x[12] = (ur>>1) + vr; - x[13] = (ui>>1) - vi; - x[28] = (ur>>1) - vr; - x[29] = (ui>>1) + vi; - - SUMDIFF_PIFOURTH(vi, vr, x[52], x[53]) - ur = x[36]; - ui = x[37]; - x[36] = (ur>>1) + vr; - x[37] = (ui>>1) + vi; - x[52] = (ur>>1) - vr; - x[53] = (ui>>1) - vi; - - SUMDIFF_PIFOURTH(vr, vi, x[60], x[61]) - ur = x[44]; - ui = x[45]; - x[44] = (ur>>1) + vr; - x[45] = (ui>>1) - vi; - x[60] = (ur>>1) - vr; - x[61] = (ui>>1) + vi; - - - cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]); - ur = x[ 6]; - ui = x[ 7]; - x[ 6] = (ur>>1) + vr; - x[ 7] = (ui>>1) + vi; - x[22] = (ur>>1) - vr; - x[23] = (ui>>1) - vi; - - cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]); - ur = x[14]; - ui = x[15]; - x[14] = (ur>>1) + vr; - x[15] = (ui>>1) - vi; - x[30] = (ur>>1) - vr; - x[31] = (ui>>1) + vi; - - cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]); - ur = x[38]; - ui = x[39]; - x[38] = (ur>>1) + vr; - x[39] = (ui>>1) + vi; - x[54] = (ur>>1) - vr; - x[55] = (ui>>1) - vi; - - cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]); - ur = x[46]; - ui = x[47]; - - x[46] = (ur>>1) + vr; - x[47] = (ui>>1) - vi; - x[62] = (ur>>1) - vr; - x[63] = (ui>>1) + vi; - - vr = x[32]; - vi = x[33]; - ur = x[ 0]>>1; - ui = x[ 1]>>1; - x[ 0] = ur + (vr>>1); - x[ 1] = ui + (vi>>1); - x[32] = ur - (vr>>1); - x[33] = ui - (vi>>1); - - vi = x[48]; - vr = x[49]; - ur = x[16]>>1; - ui = x[17]>>1; - x[16] = ur + (vr>>1); - x[17] = ui - (vi>>1); - x[48] = ur - (vr>>1); - x[49] = ui + (vi>>1); - - cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]); - ur = x[ 2]; - ui = x[ 3]; - x[ 2] = (ur>>1) + vr; - x[ 3] = (ui>>1) + vi; - x[34] = (ur>>1) - vr; - x[35] = (ui>>1) - vi; - - cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]); - ur = x[18]; - ui = x[19]; - x[18] = (ur>>1) + vr; - x[19] = (ui>>1) - vi; - x[50] = (ur>>1) - vr; - x[51] = (ui>>1) + vi; - - cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]); - ur = x[ 4]; - ui = x[ 5]; - x[ 4] = (ur>>1) + vr; - x[ 5] = (ui>>1) + vi; - x[36] = (ur>>1) - vr; - x[37] = (ui>>1) - vi; - - cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]); - ur = x[20]; - ui = x[21]; - x[20] = (ur>>1) + vr; - x[21] = (ui>>1) - vi; - x[52] = (ur>>1) - vr; - x[53] = (ui>>1) + vi; - - cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]); - ur = x[ 6]; - ui = x[ 7]; - x[ 6] = (ur>>1) + vr; - x[ 7] = (ui>>1) + vi; - x[38] = (ur>>1) - vr; - x[39] = (ui>>1) - vi; - - cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]); - ur = x[22]; - ui = x[23]; - x[22] = (ur>>1) + vr; - x[23] = (ui>>1) - vi; - x[54] = (ur>>1) - vr; - x[55] = (ui>>1) + vi; - - SUMDIFF_PIFOURTH(vi, vr, x[40], x[41]) - ur = x[ 8]; - ui = x[ 9]; - x[ 8] = (ur>>1) + vr; - x[ 9] = (ui>>1) + vi; - x[40] = (ur>>1) - vr; - x[41] = (ui>>1) - vi; - - SUMDIFF_PIFOURTH(vr, vi, x[56], x[57]) - ur = x[24]; - ui = x[25]; - x[24] = (ur>>1) + vr; - x[25] = (ui>>1) - vi; - x[56] = (ur>>1) - vr; - x[57] = (ui>>1) + vi; - - cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]); - ur = x[10]; - ui = x[11]; + x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ - x[10] = (ur>>1) + vr; - x[11] = (ui>>1) + vi; - x[42] = (ur>>1) - vr; - x[43] = (ui>>1) - vi; + vr -= x[32]; /* Re A - Re B */ + ur -= x[33]; /* Im A - Im B */ + vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */ + ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */ - cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]); - ur = x[26]; - ui = x[27]; - x[26] = (ur>>1) + vr; - x[27] = (ui>>1) - vi; - x[58] = (ur>>1) - vr; - x[59] = (ui>>1) + vi; + vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */ + ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */ - cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]); - ur = x[12]; - ui = x[13]; - x[12] = (ur>>1) + vr; - x[13] = (ui>>1) + vi; - x[44] = (ur>>1) - vr; - x[45] = (ui>>1) - vi; - - cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]); - ur = x[28]; - ui = x[29]; - x[28] = (ur>>1) + vr; - x[29] = (ui>>1) - vi; - x[60] = (ur>>1) - vr; - x[61] = (ui>>1) + vi; - - cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]); - ur = x[14]; - ui = x[15]; - x[14] = (ur>>1) + vr; - x[15] = (ui>>1) + vi; - x[46] = (ur>>1) - vr; - x[47] = (ui>>1) - vi; - - cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]); - ur = x[30]; - ui = x[31]; - x[30] = (ur>>1) + vr; - x[31] = (ui>>1) - vi; - x[62] = (ur>>1) - vr; - x[63] = (ui>>1) + vi; + x[2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ + x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ + + vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */ + ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */ + + x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ + x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ + + // i=16 + vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */ + ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */ + + x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */ + x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ + + vr2 -= x[36]; /* Re A - Re B */ + ur2 -= x[37]; /* Im A - Im B */ + vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */ + ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */ + + vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */ + ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */ + + x[18] = ui + vr2; /* Re B' = Im C - Im D + Re A - Re B */ + x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */ + + vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */ + ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */ + + x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */ + x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */ + + // i = 32 + + x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */ + x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ + + vr3 -= x[34]; /* Re A - Re B */ + ur3 -= x[35]; /* Im A - Im B */ + vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */ + ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */ + + x[34] = ui2 + vr3; /* Re B' = Im C - Im D + Re A - Re B */ + x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */ + + // i=48 + + x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */ + x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ + + vr4 -= x[38]; /* Re A - Re B */ + ur4 -= x[39]; /* Im A - Im B */ + + x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */ + x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */ + + vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */ + ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */ + + x[50] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */ + x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */ + x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */ + x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */ + + // i=8 + vr = (x[8] + x[40]) >> 1; /* Re A + Re B */ + ur = (x[9] + x[41]) >> 1; /* Im A + Im B */ + vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */ + ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */ + + x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */ + + vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */ + ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */ + + x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ + + vr -= x[40]; /* Re A - Re B */ + ur -= x[41]; /* Im A - Im B */ + vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */ + ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */ + + vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */ + ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */ + + x[10] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ + x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ + + vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */ + ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */ + + x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ + x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ + + // i=24 + vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */ + ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */ + + x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */ + x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ + + vr2 -= x[44]; /* Re A - Re B */ + ur2 -= x[45]; /* Im A - Im B */ + vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */ + ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */ + + vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */ + ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */ + + x[26] = ui + vr2; /* Re B' = Im C - Im D + Re A - Re B */ + x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */ + + vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */ + ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */ + + x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */ + x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */ + + // i=40 + + x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */ + x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ + + vr3 -= x[42]; /* Re A - Re B */ + ur3 -= x[43]; /* Im A - Im B */ + vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */ + ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */ + + x[42] = ui2 + vr3; /* Re B' = Im C - Im D + Re A - Re B */ + x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */ + + // i=56 + + x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ + x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ + x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */ + x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ + + vr4 -= x[46]; /* Re A - Re B */ + ur4 -= x[47]; /* Im A - Im B */ + + x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */ + x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */ + + vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */ + ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */ + + x[58] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */ + x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */ + x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */ + x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */ + } + + { + FIXP_DBL *xt = _x; + + int j = 4; + do { + FIXP_DBL vi, ui, vr, ur; + + vr = xt[8]; + vi = xt[9]; + ur = xt[0] >> 1; + ui = xt[1] >> 1; + xt[0] = ur + (vr >> 1); + xt[1] = ui + (vi >> 1); + xt[8] = ur - (vr >> 1); + xt[9] = ui - (vi >> 1); + + vr = xt[13]; + vi = xt[12]; + ur = xt[4] >> 1; + ui = xt[5] >> 1; + xt[4] = ur + (vr >> 1); + xt[5] = ui - (vi >> 1); + xt[12] = ur - (vr >> 1); + xt[13] = ui + (vi >> 1); + + SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11]) + ur = xt[2]; + ui = xt[3]; + xt[2] = (ur >> 1) + vr; + xt[3] = (ui >> 1) + vi; + xt[10] = (ur >> 1) - vr; + xt[11] = (ui >> 1) - vi; + + SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15]) + ur = xt[6]; + ui = xt[7]; + + xt[6] = (ur >> 1) + vr; + xt[7] = (ui >> 1) - vi; + xt[14] = (ur >> 1) - vr; + xt[15] = (ui >> 1) + vi; + xt += 16; + } while (--j != 0); + } + + { + FIXP_DBL *const x = _x; + FIXP_DBL vi, ui, vr, ur; + + vr = x[16]; + vi = x[17]; + ur = x[0] >> 1; + ui = x[1] >> 1; + x[0] = ur + (vr >> 1); + x[1] = ui + (vi >> 1); + x[16] = ur - (vr >> 1); + x[17] = ui - (vi >> 1); + + vi = x[24]; + vr = x[25]; + ur = x[8] >> 1; + ui = x[9] >> 1; + x[8] = ur + (vr >> 1); + x[9] = ui - (vi >> 1); + x[24] = ur - (vr >> 1); + x[25] = ui + (vi >> 1); + + vr = x[48]; + vi = x[49]; + ur = x[32] >> 1; + ui = x[33] >> 1; + x[32] = ur + (vr >> 1); + x[33] = ui + (vi >> 1); + x[48] = ur - (vr >> 1); + x[49] = ui - (vi >> 1); + + vi = x[56]; + vr = x[57]; + ur = x[40] >> 1; + ui = x[41] >> 1; + x[40] = ur + (vr >> 1); + x[41] = ui - (vi >> 1); + x[56] = ur - (vr >> 1); + x[57] = ui + (vi >> 1); + + cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]); + ur = x[2]; + ui = x[3]; + x[2] = (ur >> 1) + vr; + x[3] = (ui >> 1) + vi; + x[18] = (ur >> 1) - vr; + x[19] = (ui >> 1) - vi; + + cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]); + ur = x[10]; + ui = x[11]; + x[10] = (ur >> 1) + vr; + x[11] = (ui >> 1) - vi; + x[26] = (ur >> 1) - vr; + x[27] = (ui >> 1) + vi; + + cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]); + ur = x[34]; + ui = x[35]; + x[34] = (ur >> 1) + vr; + x[35] = (ui >> 1) + vi; + x[50] = (ur >> 1) - vr; + x[51] = (ui >> 1) - vi; + + cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]); + ur = x[42]; + ui = x[43]; + x[42] = (ur >> 1) + vr; + x[43] = (ui >> 1) - vi; + x[58] = (ur >> 1) - vr; + x[59] = (ui >> 1) + vi; + + SUMDIFF_PIFOURTH(vi, vr, x[20], x[21]) + ur = x[4]; + ui = x[5]; + x[4] = (ur >> 1) + vr; + x[5] = (ui >> 1) + vi; + x[20] = (ur >> 1) - vr; + x[21] = (ui >> 1) - vi; + + SUMDIFF_PIFOURTH(vr, vi, x[28], x[29]) + ur = x[12]; + ui = x[13]; + x[12] = (ur >> 1) + vr; + x[13] = (ui >> 1) - vi; + x[28] = (ur >> 1) - vr; + x[29] = (ui >> 1) + vi; + + SUMDIFF_PIFOURTH(vi, vr, x[52], x[53]) + ur = x[36]; + ui = x[37]; + x[36] = (ur >> 1) + vr; + x[37] = (ui >> 1) + vi; + x[52] = (ur >> 1) - vr; + x[53] = (ui >> 1) - vi; + + SUMDIFF_PIFOURTH(vr, vi, x[60], x[61]) + ur = x[44]; + ui = x[45]; + x[44] = (ur >> 1) + vr; + x[45] = (ui >> 1) - vi; + x[60] = (ur >> 1) - vr; + x[61] = (ui >> 1) + vi; + + cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]); + ur = x[6]; + ui = x[7]; + x[6] = (ur >> 1) + vr; + x[7] = (ui >> 1) + vi; + x[22] = (ur >> 1) - vr; + x[23] = (ui >> 1) - vi; + + cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]); + ur = x[14]; + ui = x[15]; + x[14] = (ur >> 1) + vr; + x[15] = (ui >> 1) - vi; + x[30] = (ur >> 1) - vr; + x[31] = (ui >> 1) + vi; + + cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]); + ur = x[38]; + ui = x[39]; + x[38] = (ur >> 1) + vr; + x[39] = (ui >> 1) + vi; + x[54] = (ur >> 1) - vr; + x[55] = (ui >> 1) - vi; + + cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]); + ur = x[46]; + ui = x[47]; + + x[46] = (ur >> 1) + vr; + x[47] = (ui >> 1) - vi; + x[62] = (ur >> 1) - vr; + x[63] = (ui >> 1) + vi; + + vr = x[32]; + vi = x[33]; + ur = x[0] >> 1; + ui = x[1] >> 1; + x[0] = ur + (vr >> 1); + x[1] = ui + (vi >> 1); + x[32] = ur - (vr >> 1); + x[33] = ui - (vi >> 1); + + vi = x[48]; + vr = x[49]; + ur = x[16] >> 1; + ui = x[17] >> 1; + x[16] = ur + (vr >> 1); + x[17] = ui - (vi >> 1); + x[48] = ur - (vr >> 1); + x[49] = ui + (vi >> 1); + + cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]); + ur = x[2]; + ui = x[3]; + x[2] = (ur >> 1) + vr; + x[3] = (ui >> 1) + vi; + x[34] = (ur >> 1) - vr; + x[35] = (ui >> 1) - vi; + + cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]); + ur = x[18]; + ui = x[19]; + x[18] = (ur >> 1) + vr; + x[19] = (ui >> 1) - vi; + x[50] = (ur >> 1) - vr; + x[51] = (ui >> 1) + vi; + + cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]); + ur = x[4]; + ui = x[5]; + x[4] = (ur >> 1) + vr; + x[5] = (ui >> 1) + vi; + x[36] = (ur >> 1) - vr; + x[37] = (ui >> 1) - vi; + + cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]); + ur = x[20]; + ui = x[21]; + x[20] = (ur >> 1) + vr; + x[21] = (ui >> 1) - vi; + x[52] = (ur >> 1) - vr; + x[53] = (ui >> 1) + vi; + + cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]); + ur = x[6]; + ui = x[7]; + x[6] = (ur >> 1) + vr; + x[7] = (ui >> 1) + vi; + x[38] = (ur >> 1) - vr; + x[39] = (ui >> 1) - vi; + + cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]); + ur = x[22]; + ui = x[23]; + x[22] = (ur >> 1) + vr; + x[23] = (ui >> 1) - vi; + x[54] = (ur >> 1) - vr; + x[55] = (ui >> 1) + vi; + + SUMDIFF_PIFOURTH(vi, vr, x[40], x[41]) + ur = x[8]; + ui = x[9]; + x[8] = (ur >> 1) + vr; + x[9] = (ui >> 1) + vi; + x[40] = (ur >> 1) - vr; + x[41] = (ui >> 1) - vi; + + SUMDIFF_PIFOURTH(vr, vi, x[56], x[57]) + ur = x[24]; + ui = x[25]; + x[24] = (ur >> 1) + vr; + x[25] = (ui >> 1) - vi; + x[56] = (ur >> 1) - vr; + x[57] = (ui >> 1) + vi; + + cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]); + ur = x[10]; + ui = x[11]; + + x[10] = (ur >> 1) + vr; + x[11] = (ui >> 1) + vi; + x[42] = (ur >> 1) - vr; + x[43] = (ui >> 1) - vi; + + cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]); + ur = x[26]; + ui = x[27]; + x[26] = (ur >> 1) + vr; + x[27] = (ui >> 1) - vi; + x[58] = (ur >> 1) - vr; + x[59] = (ui >> 1) + vi; + + cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]); + ur = x[12]; + ui = x[13]; + x[12] = (ur >> 1) + vr; + x[13] = (ui >> 1) + vi; + x[44] = (ur >> 1) - vr; + x[45] = (ui >> 1) - vi; + + cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]); + ur = x[28]; + ui = x[29]; + x[28] = (ur >> 1) + vr; + x[29] = (ui >> 1) - vi; + x[60] = (ur >> 1) - vr; + x[61] = (ui >> 1) + vi; + + cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]); + ur = x[14]; + ui = x[15]; + x[14] = (ur >> 1) + vr; + x[15] = (ui >> 1) + vi; + x[46] = (ur >> 1) - vr; + x[47] = (ui >> 1) - vi; + + cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]); + ur = x[30]; + ui = x[31]; + x[30] = (ur >> 1) + vr; + x[31] = (ui >> 1) - vi; + x[62] = (ur >> 1) - vr; + x[63] = (ui >> 1) + vi; + } } #endif /* #ifndef FUNCTION_fft_32 */ - /** * \brief Apply rotation vectors to a data buffer. * \param cl length of each row of input data. * \param l total length of input data. - * \param pVecRe real part of rotation ceofficient vector. - * \param pVecIm imaginary part of rotation ceofficient vector. + * \param pVecRe real part of rotation coefficient vector. + * \param pVecIm imaginary part of rotation coefficient vector. */ -static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl, const int l, const FIXP_STB *pVecRe, const FIXP_STB *pVecIm) -{ + +/* + This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000 + (-1.0) instead. At the end, the sign of the result is inverted +*/ +#define noFFT_APPLY_ROT_VECTOR_HQ + +#ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL +static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl, + const int l, const FIXP_STB *pVecRe, + const FIXP_STB *pVecIm) { FIXP_DBL re, im; FIXP_STB vre, vim; int i, c; - for(i=0; i<cl; i++) { - re = pData[2*i]; - im = pData[2*i+1]; + for (i = 0; i < cl; i++) { + re = pData[2 * i]; + im = pData[2 * i + 1]; - pData[2*i] = re>>2; /* * 0.25 */ - pData[2*i+1] = im>>2; /* * 0.25 */ + pData[2 * i] = re >> 2; /* * 0.25 */ + pData[2 * i + 1] = im >> 2; /* * 0.25 */ } - for(; i<l; i+=cl) - { - re = pData[2*i]; - im = pData[2*i+1]; + for (; i < l; i += cl) { + re = pData[2 * i]; + im = pData[2 * i + 1]; - pData[2*i] = re>>2; /* * 0.25 */ - pData[2*i+1] = im>>2; /* * 0.25 */ + pData[2 * i] = re >> 2; /* * 0.25 */ + pData[2 * i + 1] = im >> 2; /* * 0.25 */ - for (c=i+1; c<i+cl; c++) - { - re = pData[2*c]>>1; - im = pData[2*c+1]>>1; + for (c = i + 1; c < i + cl; c++) { + re = pData[2 * c] >> 1; + im = pData[2 * c + 1] >> 1; vre = *pVecRe++; vim = *pVecIm++; - cplxMultDiv2(&pData[2*c+1], &pData[2*c], im, re, vre, vim); + cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim); } } } - -#define FFT_TWO_STAGE_MACRO_ENABLE - - -#ifdef FFT_TWO_STAGE_MACRO_ENABLE - -#define fftN2(pInput, length, dim1, dim2, fft_func1, fft_func2, RotVectorReal, RotVectorImag) \ -{ \ - int i, j; \ - \ - C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2); \ - C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2); \ - \ - FDK_ASSERT(length == dim1*dim2); \ - \ - /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the \ - output samples are at the address of pDst. The input vector for the fft of length dim1 is built \ - of the interleaved samples in pSrc, the output samples are stored consecutively. \ - */ \ - { \ - const FIXP_DBL* pSrc = pInput; \ - FIXP_DBL *RESTRICT pDst = aDst; \ - \ - for(i=0; i<dim2; i++) \ - { \ - for(j=0; j<dim1; j++) \ - { \ - pDst[2*j] = pSrc[2*j*dim2]; \ - pDst[2*j+1] = pSrc[2*j*dim2+1]; \ - } \ - \ - fft_func1(pDst); \ - pSrc += 2; \ - pDst = pDst + 2*dim1; \ - } \ - } \ - \ - /* Perform the modulation of the output of the fft of length dim1 */ \ - fft_apply_rot_vector(aDst, dim1, length, RotVectorReal, RotVectorImag); \ - \ - /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the \ - output samples are at the address of pInput. The input vector for the fft of length dim2 is built \ - of the interleaved samples in aDst, the output samples are stored consecutively at the address \ - of pInput. \ - */ \ - { \ - const FIXP_DBL* pSrc = aDst; \ - FIXP_DBL *RESTRICT pDst = aDst2; \ - FIXP_DBL *RESTRICT pDstOut = pInput; \ - \ - for(i=0; i<dim1; i++) \ - { \ - for(j=0; j<dim2; j++) \ - { \ - pDst[2*j] = pSrc[2*j*dim1]; \ - pDst[2*j+1] = pSrc[2*j*dim1+1]; \ - } \ - \ - fft_func2(pDst); \ - \ - for(j=0; j<dim2; j++) \ - { \ - pDstOut[2*j*dim1] = pDst[2*j]; \ - pDstOut[2*j*dim1+1] = pDst[2*j+1]; \ - } \ - pSrc += 2; \ - pDstOut += 2; \ - } \ - } \ - \ - C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2); \ - C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2); \ -} \ - -#else /* FFT_TWO_STAGE_MACRO_ENABLE */ +#endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */ /* select either switch case of function pointer. */ //#define FFT_TWO_STAGE_SWITCH_CASE - -static inline void fftN2( - FIXP_DBL *pInput, - const int length, - const int dim1, - const int dim2, - void (* const fft1)(FIXP_DBL *), - void (* const fft2)(FIXP_DBL *), - const FIXP_STB *RotVectorReal, - const FIXP_STB *RotVectorImag - ) -{ - /* The real part of the input samples are at the addresses with even indices and the imaginary - part of the input samples are at the addresses with odd indices. The output samples are stored - at the address of pInput +#ifndef FUNCTION_fftN2_func +static inline void fftN2_func(FIXP_DBL *pInput, const int length, + const int dim1, const int dim2, + void (*const fft1)(FIXP_DBL *), + void (*const fft2)(FIXP_DBL *), + const FIXP_STB *RotVectorReal, + const FIXP_STB *RotVectorImag, FIXP_DBL *aDst, + FIXP_DBL *aDst2) { + /* The real part of the input samples are at the addresses with even indices + and the imaginary part of the input samples are at the addresses with odd + indices. The output samples are stored at the address of pInput */ - FIXP_DBL *pSrc, *pDst, *pDstOut; - int i, j; + FIXP_DBL *pSrc, *pDst, *pDstOut; + int i; - C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2); - C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2); - - FDK_ASSERT(length == dim1*dim2); + FDK_ASSERT(length == dim1 * dim2); - /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the - output samples are at the address of pDst. The input vector for the fft of length dim1 is built - of the interleaved samples in pSrc, the output samples are stored consecutively. + /* Perform dim2 times the fft of length dim1. The input samples are at the + address of pSrc and the output samples are at the address of pDst. The input + vector for the fft of length dim1 is built of the interleaved samples in pSrc, + the output samples are stored consecutively. */ pSrc = pInput; pDst = aDst; - for(i=0; i<length/dim1; i++) - { - for(j=0; j<length/dim2; j++) - { - pDst[2*j] = pSrc[2*j*dim2]; - pDst[2*j+1] = pSrc[2*j*dim2+1]; + for (i = 0; i < dim2; i++) { + for (int j = 0; j < dim1; j++) { + pDst[2 * j] = pSrc[2 * j * dim2]; + pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1]; } - /* fft of size dim1 */ + /* fft of size dim1 */ #ifndef FFT_TWO_STAGE_SWITCH_CASE fft1(pDst); #else switch (dim1) { - case 3: fft3(pDst); break; - case 4: fft_4(pDst); break; - case 5: fft5(pDst); break; - case 8: fft_8(pDst); break; - case 15: fft15(pDst); break; - case 16: fft_16(pDst); break; - case 32: fft_32(pDst); break; - /*case 64: fft_64(pDst); break;*/ - case 128: fft_128(pDst); break; + case 2: + fft2(pDst); + break; + case 3: + fft3(pDst); + break; + case 4: + fft_4(pDst); + break; + /* case 5: fft5(pDst); break; */ + /* case 8: fft_8(pDst); break; */ + case 12: + fft12(pDst); + break; + /* case 15: fft15(pDst); break; */ + case 16: + fft_16(pDst); + break; + case 32: + fft_32(pDst); + break; + /*case 64: fft_64(pDst); break;*/ + /* case 128: fft_128(pDst); break; */ } #endif pSrc += 2; - pDst = pDst + 2*length/dim2; + pDst = pDst + 2 * dim1; } /* Perform the modulation of the output of the fft of length dim1 */ - pSrc=aDst; - fft_apply_rot_vector(pSrc, length/dim2, length, RotVectorReal, RotVectorImag); + pSrc = aDst; + fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag); - /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the - output samples are at the address of pInput. The input vector for the fft of length dim2 is built - of the interleaved samples in aDst, the output samples are stored consecutively at the address - of pInput. + /* Perform dim1 times the fft of length dim2. The input samples are at the + address of aDst and the output samples are at the address of pInput. The input + vector for the fft of length dim2 is built of the interleaved samples in aDst, + the output samples are stored consecutively at the address of pInput. */ - pSrc = aDst; - pDst = aDst2; + pSrc = aDst; + pDst = aDst2; pDstOut = pInput; - for(i=0; i<length/dim2; i++) - { - for(j=0; j<length/dim1; j++) - { - pDst[2*j] = pSrc[2*j*dim1]; - pDst[2*j+1] = pSrc[2*j*dim1+1]; + for (i = 0; i < dim1; i++) { + for (int j = 0; j < dim2; j++) { + pDst[2 * j] = pSrc[2 * j * dim1]; + pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1]; } #ifndef FFT_TWO_STAGE_SWITCH_CASE fft2(pDst); #else switch (dim2) { - case 3: fft3(pDst); break; - case 4: fft_4(pDst); break; - case 5: fft5(pDst); break; - case 8: fft_8(pDst); break; - case 15: fft15(pDst); break; - case 16: fft_16(pDst); break; - case 32: fft_32(pDst); break; - /*case 64: fft_64(pDst); break;*/ - case 128: fft_128(pDst); break; + case 4: + fft_4(pDst); + break; + case 9: + fft9(pDst); + break; + case 12: + fft12(pDst); + break; + case 15: + fft15(pDst); + break; + case 16: + fft_16(pDst); + break; + case 32: + fft_32(pDst); + break; } #endif - for(j=0; j<length/dim1; j++) - { - pDstOut[2*j*dim1] = pDst[2*j]; - pDstOut[2*j*dim1+1] = pDst[2*j+1]; + for (int j = 0; j < dim2; j++) { + pDstOut[2 * j * dim1] = pDst[2 * j]; + pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1]; } pSrc += 2; pDstOut += 2; } - - C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2); - C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2); } +#endif /* FUNCTION_fftN2_function */ + +#define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \ + RotVectorReal, RotVectorImag) \ + { \ + C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length) \ + C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2) \ + fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2, \ + RotVectorReal, RotVectorImag, aDst, aDst2); \ + C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2) \ + C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length) \ + } -#endif /* FFT_TWO_STAGE_MACRO_ENABLE */ - - - - - - - - + /*! + * + * \brief complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480 + * \param pInput contains the input signal prescaled right by 2 + * pInput contains the output signal scaled by SCALEFACTOR<#length> + * The output signal does not have any fixed headroom + * \return void + * + */ +#ifndef FUNCTION_fft6 +static inline void fft6(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6); +} +#endif /* #ifndef FUNCTION_fft6 */ +#ifndef FUNCTION_fft12 +static inline void fft12(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12, + RotVectorImag12); /* 16,58 */ +} +#endif /* #ifndef FUNCTION_fft12 */ +#ifndef FUNCTION_fft20 +static inline void fft20(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20, + RotVectorImag20); +} +#endif /* FUNCTION_fft20 */ -#define SCALEFACTOR60 5 -/** -The function performs the fft of length 60. It is splittet into fft's of length 4 and fft's of -length 15. Between the fft's a modolation is calculated. -*/ -static inline void fft60(FIXP_DBL *pInput, INT *pScalefactor) -{ - fftN2( - pInput, 60, 4, 15, - fft_4, fft15, - RotVectorReal60, RotVectorImag60 - ); - *pScalefactor += SCALEFACTOR60; +static inline void fft24(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24, + RotVectorImag24); /* 16,73 */ } +static inline void fft48(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48, + RotVectorImag48); /* 16,32 */ +} +#ifndef FUNCTION_fft60 +static inline void fft60(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60, + RotVectorImag60); /* 15,51 */ +} +#endif /* FUNCTION_fft60 */ -/* Fallback implementation in case of no better implementation available. */ +#ifndef FUNCTION_fft80 +static inline void fft80(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80, + RotVectorImag80); /* */ +} +#endif -#define SCALEFACTOR240 7 +#ifndef FUNCTION_fft96 +static inline void fft96(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96, + RotVectorImag96); /* 15,47 */ +} +#endif /* FUNCTION_fft96*/ -/** -The function performs the fft of length 240. It is splittet into fft's of length 16 and fft's of -length 15. Between the fft's a modulation is calculated. -*/ -static inline void fft240(FIXP_DBL *pInput, INT *pScalefactor) -{ - fftN2( - pInput, 240, 16, 15, - fft_16, fft15, - RotVectorReal240, RotVectorImag240 - ); - *pScalefactor += SCALEFACTOR240; +#ifndef FUNCTION_fft120 +static inline void fft120(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120, + RotVectorImag120); } +#endif /* FUNCTION_fft120 */ +#ifndef FUNCTION_fft192 +static inline void fft192(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192, + RotVectorImag192); /* 15,50 */ +} +#endif -#define SCALEFACTOR480 8 -#define N32 32 -#define TABLE_SIZE_16 (32/2) +#ifndef FUNCTION_fft240 +static inline void fft240(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240, + RotVectorImag240); /* 15.44 */ +} +#endif -/** -The function performs the fft of length 480. It is splittet into fft's of length 32 and fft's of -length 15. Between the fft's a modulation is calculated. -*/ -static inline void fft480(FIXP_DBL *pInput, INT *pScalefactor) -{ - fftN2( - pInput, 480, 32, 15, - fft_32, fft15, - RotVectorReal480, RotVectorImag480 - ); - *pScalefactor += SCALEFACTOR480; +#ifndef FUNCTION_fft384 +static inline void fft384(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384, + RotVectorImag384); /* 16.02 */ } +#endif /* FUNCTION_fft384 */ -void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) -{ - if (length == 32) - { - fft_32(pInput); - *pScalefactor += SCALEFACTOR32; - } - else - { - - switch (length) { - case 16: - fft_16(pInput); - *pScalefactor += SCALEFACTOR16; - break; - case 8: - fft_8(pInput); - *pScalefactor += SCALEFACTOR8; - break; - case 3: - fft3(pInput); - break; - case 4: - fft_4(pInput); - *pScalefactor += SCALEFACTOR4; - break; - case 5: - fft5(pInput); - break; - case 15: - fft15(pInput); - *pScalefactor += 2; - break; - case 60: - fft60(pInput, pScalefactor); - break; - case 64: - dit_fft(pInput, 6, SineTable512, 512); - *pScalefactor += SCALEFACTOR64; - break; - case 240: - fft240(pInput, pScalefactor); - break; - case 256: - dit_fft(pInput, 8, SineTable512, 512); - *pScalefactor += SCALEFACTOR256; - break; - case 480: - fft480(pInput, pScalefactor); - break; - case 512: - dit_fft(pInput, 9, SineTable512, 512); - *pScalefactor += SCALEFACTOR512; - break; - default: - FDK_ASSERT(0); /* FFT length not supported! */ - break; - } +#ifndef FUNCTION_fft480 +static inline void fft480(FIXP_DBL *pInput) { + fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480, + RotVectorImag480); /* 15.84 */ +} +#endif /* FUNCTION_fft480 */ + +void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) { + /* Ensure, that the io-ptr is always (at least 8-byte) aligned */ + C_ALLOC_ALIGNED_CHECK(pInput); + + if (length == 32) { + fft_32(pInput); + *pScalefactor += SCALEFACTOR32; + } else { + switch (length) { + case 16: + fft_16(pInput); + *pScalefactor += SCALEFACTOR16; + break; + case 8: + fft_8(pInput); + *pScalefactor += SCALEFACTOR8; + break; + case 2: + fft2(pInput); + *pScalefactor += SCALEFACTOR2; + break; + case 3: + fft3(pInput); + *pScalefactor += SCALEFACTOR3; + break; + case 4: + fft_4(pInput); + *pScalefactor += SCALEFACTOR4; + break; + case 5: + fft5(pInput); + *pScalefactor += SCALEFACTOR5; + break; + case 6: + fft6(pInput); + *pScalefactor += SCALEFACTOR6; + break; + case 10: + fft10(pInput); + *pScalefactor += SCALEFACTOR10; + break; + case 12: + fft12(pInput); + *pScalefactor += SCALEFACTOR12; + break; + case 15: + fft15(pInput); + *pScalefactor += SCALEFACTOR15; + break; + case 20: + fft20(pInput); + *pScalefactor += SCALEFACTOR20; + break; + case 24: + fft24(pInput); + *pScalefactor += SCALEFACTOR24; + break; + case 48: + fft48(pInput); + *pScalefactor += SCALEFACTOR48; + break; + case 60: + fft60(pInput); + *pScalefactor += SCALEFACTOR60; + break; + case 64: + dit_fft(pInput, 6, SineTable512, 512); + *pScalefactor += SCALEFACTOR64; + break; + case 80: + fft80(pInput); + *pScalefactor += SCALEFACTOR80; + break; + case 96: + fft96(pInput); + *pScalefactor += SCALEFACTOR96; + break; + case 120: + fft120(pInput); + *pScalefactor += SCALEFACTOR120; + break; + case 128: + dit_fft(pInput, 7, SineTable512, 512); + *pScalefactor += SCALEFACTOR128; + break; + case 192: + fft192(pInput); + *pScalefactor += SCALEFACTOR192; + break; + case 240: + fft240(pInput); + *pScalefactor += SCALEFACTOR240; + break; + case 256: + dit_fft(pInput, 8, SineTable512, 512); + *pScalefactor += SCALEFACTOR256; + break; + case 384: + fft384(pInput); + *pScalefactor += SCALEFACTOR384; + break; + case 480: + fft480(pInput); + *pScalefactor += SCALEFACTOR480; + break; + case 512: + dit_fft(pInput, 9, SineTable512, 512); + *pScalefactor += SCALEFACTOR512; + break; + default: + FDK_ASSERT(0); /* FFT length not supported! */ + break; + } } } - -void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) -{ +void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) { switch (length) { default: FDK_ASSERT(0); /* IFFT length not supported! */ break; } } - - |