aboutsummaryrefslogtreecommitdiffstats
path: root/libFDK/src/arm/fft_rad2_arm.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libFDK/src/arm/fft_rad2_arm.cpp')
-rw-r--r--libFDK/src/arm/fft_rad2_arm.cpp259
1 files changed, 259 insertions, 0 deletions
diff --git a/libFDK/src/arm/fft_rad2_arm.cpp b/libFDK/src/arm/fft_rad2_arm.cpp
new file mode 100644
index 0000000..f40961a
--- /dev/null
+++ b/libFDK/src/arm/fft_rad2_arm.cpp
@@ -0,0 +1,259 @@
+/*************************** Fraunhofer IIS FDK Tools **********************
+
+ (C) Copyright Fraunhofer IIS (2005)
+ All Rights Reserved
+
+ Please be advised that this software and/or program delivery is
+ Confidential Information of Fraunhofer and subject to and covered by the
+
+ Fraunhofer IIS Software Evaluation Agreement
+ between Google Inc. and Fraunhofer
+ effective and in full force since March 1, 2012.
+
+ You may use this software and/or program only under the terms and
+ conditions described in the above mentioned Fraunhofer IIS Software
+ Evaluation Agreement. Any other and/or further use requires a separate agreement.
+
+
+ $Id$
+ Author(s):
+ Description: dit_fft ARM assembler replacements.
+
+ This software and/or program is protected by copyright law and international
+ treaties. Any reproduction or distribution of this software and/or program,
+ or any portion of it, may result in severe civil and criminal penalties, and
+ will be prosecuted to the maximum extent possible under law.
+
+******************************************************************************/
+
+/* NEON optimized FFT currently builds only with RVCT toolchain */
+
+#ifndef FUNCTION_dit_fft
+
+/* If dit_fft was not yet defined by ARM-Cortex ... */
+
+#if defined(SINETABLE_16BIT)
+
+#define FUNCTION_dit_fft
+
+/*****************************************************************************
+
+ date: 28.07.2005 srl
+
+ Contents/description: dit-tukey-FFT-algorithm
+
+******************************************************************************/
+
+#if defined(FUNCTION_dit_fft)
+
+
+void dit_fft(FIXP_DBL *x, const INT ldn, const FIXP_STP *trigdata, const INT trigDataSize)
+{
+ const INT n=1<<ldn;
+ INT i;
+
+ scramble(x,n);
+ /*
+ * 1+2 stage radix 4
+ */
+
+ for (i=0;i<n*2;i+=8)
+ {
+ FIXP_DBL a00, a10, a20, a30;
+ a00 = (x[i + 0] + x[i + 2])>>1; /* Re A + Re B */
+ a10 = (x[i + 4] + x[i + 6])>>1; /* Re C + Re D */
+ a20 = (x[i + 1] + x[i + 3])>>1; /* Im A + Im B */
+ a30 = (x[i + 5] + x[i + 7])>>1; /* Im C + Im D */
+
+ x[i + 0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */
+ x[i + 4] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */
+ x[i + 1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */
+ x[i + 5] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */
+
+ a00 = a00 - x[i + 2]; /* Re A - Re B */
+ a10 = a10 - x[i + 6]; /* Re C - Re D */
+ a20 = a20 - x[i + 3]; /* Im A - Im B */
+ a30 = a30 - x[i + 7]; /* Im C - Im D */
+
+ x[i + 2] = a00 + a30; /* Re B' = Re A - Re B + Im C - Im D */
+ x[i + 6] = a00 - a30; /* Re D' = Re A - Re B - Im C + Im D */
+ x[i + 3] = a20 - a10; /* Im B' = Im A - Im B - Re C + Re D */
+ x[i + 7] = a20 + a10; /* Im D' = Im A - Im B + Re C - Re D */
+ }
+
+ INT mh = 1 << 1;
+ INT ldm = ldn - 2;
+ INT trigstep = trigDataSize;
+
+ do
+ {
+ const FIXP_STP *pTrigData = trigdata;
+ INT j;
+
+ mh <<= 1;
+ trigstep >>= 1;
+
+ FDK_ASSERT(trigstep > 0);
+
+ /* Do first iteration with c=1.0 and s=0.0 separately to avoid loosing to much precision.
+ Beware: The impact on the overal FFT precision is rather large. */
+ {
+ FIXP_DBL *xt1 = x;
+ int r = n;
+
+ do {
+ FIXP_DBL *xt2 = xt1 + (mh<<1);
+ /*
+ FIXP_DBL *xt1 = x+ ((r)<<1);
+ FIXP_DBL *xt2 = xt1 + (mh<<1);
+ */
+ FIXP_DBL vr,vi,ur,ui;
+
+ //cplxMultDiv2(&vi, &vr, x[t2+1], x[t2], (FIXP_SGL)1.0, (FIXP_SGL)0.0);
+ vi = xt2[1]>>1;
+ vr = xt2[0]>>1;
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui+vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui-vi;
+
+ xt1 += mh;
+ xt2 += mh;
+
+ //cplxMultDiv2(&vr, &vi, x[t2+1], x[t2], (FIXP_SGL)1.0, (FIXP_SGL)0.0);
+ vr = xt2[1]>>1;
+ vi = xt2[0]>>1;
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui-vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui+vi;
+
+ xt1 = xt2 + mh;
+ } while ((r=r-(mh<<1)) != 0);
+ }
+ for(j=4; j<mh; j+=4)
+ {
+ FIXP_DBL *xt1 = x + (j>>1);
+ FIXP_SPK cs;
+ int r = n;
+
+ pTrigData += trigstep;
+ cs = *pTrigData;
+
+ do
+ {
+ FIXP_DBL *xt2 = xt1 + (mh<<1);
+ FIXP_DBL vr,vi,ur,ui;
+
+ cplxMultDiv2(&vi, &vr, xt2[1], xt2[0], cs);
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui+vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui-vi;
+
+ xt1 += mh;
+ xt2 += mh;
+
+ cplxMultDiv2(&vr, &vi, xt2[1], xt2[0], cs);
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui-vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui+vi;
+
+ /* Same as above but for t1,t2 with j>mh/4 and thus cs swapped */
+ xt1 = xt1 - (j);
+ xt2 = xt1 + (mh<<1);
+
+ cplxMultDiv2(&vi, &vr, xt2[0], xt2[1], cs);
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui-vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui+vi;
+
+ xt1 += mh;
+ xt2 += mh;
+
+ cplxMultDiv2(&vr, &vi, xt2[0], xt2[1], cs);
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur-vr;
+ xt1[1] = ui-vi;
+
+ xt2[0] = ur+vr;
+ xt2[1] = ui+vi;
+
+ xt1 = xt2 + (j);
+ } while ((r=r-(mh<<1)) != 0);
+ }
+ {
+ FIXP_DBL *xt1 = x + (mh>>1);
+ int r = n;
+
+ do
+ {
+ FIXP_DBL *xt2 = xt1 + (mh<<1);
+ FIXP_DBL vr,vi,ur,ui;
+
+ cplxMultDiv2(&vi, &vr, xt2[1], xt2[0], STC(0x5a82799a), STC(0x5a82799a));
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui+vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui-vi;
+
+ xt1 += mh;
+ xt2 += mh;
+
+ cplxMultDiv2(&vr, &vi, xt2[1], xt2[0], STC(0x5a82799a), STC(0x5a82799a));
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui-vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui+vi;
+
+ xt1 = xt2 + mh;
+ } while ((r=r-(mh<<1)) != 0);
+ }
+ } while (--ldm != 0);
+}
+
+#endif /* if defined(FUNCTION_dit_fft) */
+
+#endif /* if defined(SINETABLE_16BIT) */
+
+#endif /* ifndef FUNCTION_dit_fft */