summaryrefslogtreecommitdiffstats
path: root/libFDK/src/arm
diff options
context:
space:
mode:
authorDave Burke <daveburke@google.com>2012-04-17 09:51:45 -0700
committerDave Burke <daveburke@google.com>2012-04-17 23:04:43 -0700
commit9bf37cc9712506b2483650c82d3c41152337ef7e (patch)
tree77db44e2bae06e3d144b255628be2b7a55c581d3 /libFDK/src/arm
parenta37315fe10ee143d6d0b28c19d41a476a23e63ea (diff)
downloadfdk-aac-9bf37cc9712506b2483650c82d3c41152337ef7e.tar.gz
fdk-aac-9bf37cc9712506b2483650c82d3c41152337ef7e.tar.bz2
fdk-aac-9bf37cc9712506b2483650c82d3c41152337ef7e.zip
Fraunhofer AAC codec.
License boilerplate update to follow. Change-Id: I2810460c11a58b6d148d84673cc031f3685e79b5
Diffstat (limited to 'libFDK/src/arm')
-rw-r--r--libFDK/src/arm/autocorr2nd.cpp33
-rw-r--r--libFDK/src/arm/dct_arm.cpp395
-rw-r--r--libFDK/src/arm/fft_rad2_arm.cpp259
-rw-r--r--libFDK/src/arm/qmf_arm.cpp710
-rw-r--r--libFDK/src/arm/scale_arm.cpp110
5 files changed, 1507 insertions, 0 deletions
diff --git a/libFDK/src/arm/autocorr2nd.cpp b/libFDK/src/arm/autocorr2nd.cpp
new file mode 100644
index 0000000..85926af
--- /dev/null
+++ b/libFDK/src/arm/autocorr2nd.cpp
@@ -0,0 +1,33 @@
+/****************************************************************************
+
+ (C) Copyright Fraunhofer IIS (2006)
+ All Rights Reserved
+
+ Please be advised that this software and/or program delivery is
+ Confidential Information of Fraunhofer and subject to and covered by the
+
+ Fraunhofer IIS Software Evaluation Agreement
+ between Google Inc. and Fraunhofer
+ effective and in full force since March 1, 2012.
+
+ You may use this software and/or program only under the terms and
+ conditions described in the above mentioned Fraunhofer IIS Software
+ Evaluation Agreement. Any other and/or further use requires a separate agreement.
+
+
+ This software and/or program is protected by copyright law and international
+ treaties. Any reproduction or distribution of this software and/or program,
+ or any portion of it, may result in severe civil and criminal penalties, and
+ will be prosecuted to the maximum extent possible under law.
+
+ $Id$
+
+*******************************************************************************/
+
+/*!
+ *
+ * \brief Calculate second order autocorrelation
+ *
+ */
+
+
diff --git a/libFDK/src/arm/dct_arm.cpp b/libFDK/src/arm/dct_arm.cpp
new file mode 100644
index 0000000..dd0ca09
--- /dev/null
+++ b/libFDK/src/arm/dct_arm.cpp
@@ -0,0 +1,395 @@
+/****************************************************************************
+
+ (C) copyright Fraunhofer IIS (2004)
+ All Rights Reserved
+
+ Please be advised that this software and/or program delivery is
+ Confidential Information of Fraunhofer and subject to and covered by the
+
+ Fraunhofer IIS Software Evaluation Agreement
+ between Google Inc. and Fraunhofer
+ effective and in full force since March 1, 2012.
+
+ You may use this software and/or program only under the terms and
+ conditions described in the above mentioned Fraunhofer IIS Software
+ Evaluation Agreement. Any other and/or further use requires a separate agreement.
+
+
+
+ $Id$
+
+***************************************************************************/
+
+
+
+#ifdef FUNCTION_dct_IV_func1
+
+/*
+ Note: This assembler routine is here, because the ARM926 compiler does
+ not encode the inline assembler with optimal speed.
+ With this version, we save 2 cycles per loop iteration.
+*/
+
+__asm void dct_IV_func1(
+ int i,
+ const FIXP_SPK *twiddle,
+ FIXP_DBL *RESTRICT pDat_0,
+ FIXP_DBL *RESTRICT pDat_1)
+{
+ /* Register map:
+ r0 i
+ r1 twiddle
+ r2 pDat_0
+ r3 pDat_1
+ r4 accu1
+ r5 accu2
+ r6 accu3
+ r7 accu4
+ r8 val_tw
+ r9 accuX
+ */
+ PUSH {r4-r9}
+
+ /* 44 cycles for 2 iterations = 22 cycles/iteration */
+dct_IV_loop1_start
+/* First iteration */
+ LDR r8, [r1], #4 // val_tw = *twiddle++;
+ LDR r5, [r2, #0] // accu2 = pDat_0[0]
+ LDR r4, [r3, #0] // accu1 = pDat_1[0]
+
+ SMULWT r9, r5, r8 // accuX = accu2*val_tw.l
+ SMULWB r5, r5, r8 // accu2 = accu2*val_tw.h
+ RSB r9, r9, #0 // accuX =-accu2*val_tw.l
+ SMLAWT r5, r4, r8, r5 // accu2 = accu2*val_tw.h + accu1*val_tw.l
+ SMLAWB r4, r4, r8, r9 // accu1 = accu1*val_tw.h - accu2*val_tw.l
+
+ LDR r8, [r1], #4 // val_tw = *twiddle++;
+ LDR r7, [r3, #-4] // accu4 = pDat_1[-1]
+ LDR r6, [r2, #4] // accu3 = pDat_0[1]
+
+ SMULWB r9, r7, r8 // accuX = accu4*val_tw.h
+ SMULWT r7, r7, r8 // accu4 = accu4*val_tw.l
+ RSB r9, r9, #0 // accuX =-accu4*val_tw.h
+ SMLAWB r7, r6, r8, r7 // accu4 = accu4*val_tw.l+accu3*val_tw.h
+ SMLAWT r6, r6, r8, r9 // accu3 = accu3*val_tw.l-accu4*val_tw.h
+
+ STR r5, [r2], #4 // *pDat_0++ = accu2
+ STR r4, [r2], #4 // *pDat_0++ = accu1
+ STR r6, [r3], #-4 // *pDat_1-- = accu3
+ STR r7, [r3], #-4 // *pDat_1-- = accu4
+
+/* Second iteration */
+ LDR r8, [r1], #4 // val_tw = *twiddle++;
+ LDR r5, [r2, #0] // accu2 = pDat_0[0]
+ LDR r4, [r3, #0] // accu1 = pDat_1[0]
+
+ SMULWT r9, r5, r8 // accuX = accu2*val_tw.l
+ SMULWB r5, r5, r8 // accu2 = accu2*val_tw.h
+ RSB r9, r9, #0 // accuX =-accu2*val_tw.l
+ SMLAWT r5, r4, r8, r5 // accu2 = accu2*val_tw.h + accu1*val_tw.l
+ SMLAWB r4, r4, r8, r9 // accu1 = accu1*val_tw.h - accu2*val_tw.l
+
+ LDR r8, [r1], #4 // val_tw = *twiddle++;
+ LDR r7, [r3, #-4] // accu4 = pDat_1[-1]
+ LDR r6, [r2, #4] // accu3 = pDat_0[1]
+
+ SMULWB r9, r7, r8 // accuX = accu4*val_tw.h
+ SMULWT r7, r7, r8 // accu4 = accu4*val_tw.l
+ RSB r9, r9, #0 // accuX =-accu4*val_tw.h
+ SMLAWB r7, r6, r8, r7 // accu4 = accu4*val_tw.l+accu3*val_tw.h
+ SMLAWT r6, r6, r8, r9 // accu3 = accu3*val_tw.l-accu4*val_tw.h
+
+ STR r5, [r2], #4 // *pDat_0++ = accu2
+ STR r4, [r2], #4 // *pDat_0++ = accu1
+ STR r6, [r3], #-4 // *pDat_1-- = accu3
+ STR r7, [r3], #-4 // *pDat_1-- = accu4
+
+ SUBS r0, r0, #1
+ BNE dct_IV_loop1_start
+
+ POP {r4-r9}
+
+ BX lr
+}
+
+#endif /* FUNCTION_dct_IV_func1 */
+
+
+#ifdef FUNCTION_dct_IV_func2
+
+FDK_INLINE
+/* __attribute__((noinline)) */
+static void dct_IV_func2(
+ int i,
+ const FIXP_SPK *twiddle,
+ FIXP_DBL *pDat_0,
+ FIXP_DBL *pDat_1,
+ int inc)
+{
+ FIXP_DBL accu1, accu2, accu3, accu4, accuX;
+ LONG val_tw;
+
+ accu1 = pDat_1[-2];
+ accu2 = pDat_1[-1];
+
+ *--pDat_1 = -(pDat_0[1]>>1);
+ *pDat_0++ = (pDat_0[0]>>1);
+
+ twiddle += inc;
+
+__asm
+ {
+ LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
+ B dct_IV_loop2_2nd_part
+
+ /* 42 cycles for 2 iterations = 21 cycles/iteration */
+dct_IV_loop2:
+ SMULWT accuX, accu2, val_tw
+ SMULWB accu2, accu2, val_tw
+ RSB accuX, accuX, #0
+ SMLAWB accuX, accu1, val_tw, accuX
+ SMLAWT accu2, accu1, val_tw, accu2
+ STR accuX, [pDat_0], #4
+ STR accu2, [pDat_1, #-4] !
+
+ LDR accu4, [pDat_0, #4]
+ LDR accu3, [pDat_0]
+ SMULWB accuX, accu4, val_tw
+ SMULWT accu4, accu4, val_tw
+ RSB accuX, accuX, #0
+ SMLAWT accuX, accu3, val_tw, accuX
+ SMLAWB accu4, accu3, val_tw, accu4
+
+ LDR accu1, [pDat_1, #-8]
+ LDR accu2, [pDat_1, #-4]
+
+ LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
+
+ STR accuX, [pDat_1, #-4] !
+ STR accu4, [pDat_0], #4
+
+dct_IV_loop2_2nd_part:
+ SMULWT accuX, accu2, val_tw
+ SMULWB accu2, accu2, val_tw
+ RSB accuX, accuX, #0
+ SMLAWB accuX, accu1, val_tw, accuX
+ SMLAWT accu2, accu1, val_tw, accu2
+ STR accuX, [pDat_0], #4
+ STR accu2, [pDat_1, #-4] !
+
+ LDR accu4, [pDat_0, #4]
+ LDR accu3, [pDat_0]
+ SMULWB accuX, accu4, val_tw
+ SMULWT accu4, accu4, val_tw
+ RSB accuX, accuX, #0
+ SMLAWT accuX, accu3, val_tw, accuX
+ SMLAWB accu4, accu3, val_tw, accu4
+
+ LDR accu1, [pDat_1, #-8]
+ LDR accu2, [pDat_1, #-4]
+
+ STR accuX, [pDat_1, #-4] !
+ STR accu4, [pDat_0], #4
+
+ LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
+
+ SUBS i, i, #1
+ BNE dct_IV_loop2
+ }
+
+ /* Last Sin and Cos value pair are the same */
+ accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
+ accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
+
+ *--pDat_1 = accu1 + accu2;
+ *pDat_0++ = accu1 - accu2;
+}
+#endif /* FUNCTION_dct_IV_func2 */
+
+
+#ifdef FUNCTION_dst_IV_func1
+
+__asm void dst_IV_func1(
+ int i,
+ const FIXP_SPK *twiddle,
+ FIXP_DBL *pDat_0,
+ FIXP_DBL *pDat_1)
+{
+ /* Register map:
+ r0 i
+ r1 twiddle
+ r2 pDat_0
+ r3 pDat_1
+ r4 accu1
+ r5 accu2
+ r6 accu3
+ r7 accu4
+ r8 val_tw
+ r9 accuX
+ */
+ PUSH {r4-r9}
+
+dst_IV_loop1
+ LDR r8, [r1], #4 // val_tw = *twiddle++
+ LDR r5, [r2] // accu2 = pDat_0[0]
+ LDR r6, [r2, #4] // accu3 = pDat_0[1]
+ RSB r5, r5, #0 // accu2 = -accu2
+ SMULWT r9, r5, r8 // accuX = (-accu2)*val_tw.l
+ LDR r4, [r3, #-4] // accu1 = pDat_1[-1]
+ RSB r9, r9, #0 // accuX = -(-accu2)*val_tw.l
+ SMLAWB r9, r4, r8, r9 // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
+ SMULWT r4, r4, r8 // accu1 = accu1*val_tw.l
+ LDR r7, [r3, #-8] // accu4 = pDat_1[-2]
+ SMLAWB r5, r5, r8, r4 // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
+ LDR r8, [r1], #4 // val_tw = *twiddle++
+ STR r5, [r2], #4 // *pDat_0++ = accu2
+ STR r9, [r2], #4 // *pDat_0++ = accu1 (accuX)
+ RSB r7, r7, #0 // accu4 = -accu4
+ SMULWB r5, r7, r8 // accu2 = (-accu4)*val_tw.h
+ SMULWB r4, r6, r8 // accu1 = (-accu4)*val_tw.l
+ RSB r5, r5, #0 // accu2 = -(-accu4)*val_tw.h
+ SMLAWT r6, r6, r8, r5 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
+ SMLAWT r7, r7, r8, r4 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
+ STR r6, [r3, #-4] ! // *--pDat_1 = accu3
+ STR r7, [r3, #-4] ! // *--pDat_1 = accu4
+
+ LDR r8, [r1], #4 // val_tw = *twiddle++
+ LDR r5, [r2] // accu2 = pDat_0[0]
+ LDR r6, [r2, #4] // accu3 = pDat_0[1]
+ RSB r5, r5, #0 // accu2 = -accu2
+ SMULWT r9, r5, r8 // accuX = (-accu2)*val_tw.l
+ LDR r4, [r3, #-4] // accu1 = pDat_1[-1]
+ RSB r9, r9, #0 // accuX = -(-accu2)*val_tw.l
+ SMLAWB r9, r4, r8, r9 // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
+ SMULWT r4, r4, r8 // accu1 = accu1*val_tw.l
+ LDR r7, [r3, #-8] // accu4 = pDat_1[-2]
+ SMLAWB r5, r5, r8, r4 // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
+ LDR r8, [r1], #4 // val_tw = *twiddle++
+ STR r5, [r2], #4 // *pDat_0++ = accu2
+ STR r9, [r2], #4 // *pDat_0++ = accu1 (accuX)
+ RSB r7, r7, #0 // accu4 = -accu4
+ SMULWB r5, r7, r8 // accu2 = (-accu4)*val_tw.h
+ SMULWB r4, r6, r8 // accu1 = (-accu4)*val_tw.l
+ RSB r5, r5, #0 // accu2 = -(-accu4)*val_tw.h
+ SMLAWT r6, r6, r8, r5 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
+ SMLAWT r7, r7, r8, r4 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
+ STR r6, [r3, #-4] ! // *--pDat_1 = accu3
+ STR r7, [r3, #-4] ! // *--pDat_1 = accu4
+
+ SUBS r0, r0, #4 // i-= 4
+ BNE dst_IV_loop1
+
+ POP {r4-r9}
+ BX lr
+}
+#endif /* FUNCTION_dst_IV_func1 */
+
+#ifdef FUNCTION_dst_IV_func2
+
+FDK_INLINE
+/* __attribute__((noinline)) */
+static void dst_IV_func2(
+ int i,
+ const FIXP_SPK *twiddle,
+ FIXP_DBL *RESTRICT pDat_0,
+ FIXP_DBL *RESTRICT pDat_1,
+ int inc)
+{
+ FIXP_DBL accu1,accu2,accu3,accu4;
+ LONG val_tw;
+
+ accu4 = pDat_0[0];
+ accu3 = pDat_0[1];
+ accu4 >>= 1;
+ accu3 >>= 1;
+ accu4 = -accu4;
+
+ accu1 = pDat_1[-1];
+ accu2 = pDat_1[0];
+
+ *pDat_0++ = accu3;
+ *pDat_1-- = accu4;
+
+
+ __asm
+ {
+ B dst_IV_loop2_2nd_part
+
+ /* 50 cycles for 2 iterations = 25 cycles/iteration */
+
+dst_IV_loop2:
+
+ LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
+
+ RSB accu2, accu2, #0 // accu2 = -accu2
+ RSB accu1, accu1, #0 // accu1 = -accu1
+ SMULWT accu3, accu2, val_tw // accu3 = (-accu2)*val_tw.l
+ SMULWT accu4, accu1, val_tw // accu4 = (-accu1)*val_tw.l
+ RSB accu3, accu3, #0 // accu3 = -accu2*val_tw.l
+ SMLAWB accu1, accu1, val_tw, accu3 // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
+ SMLAWB accu2, accu2, val_tw, accu4 // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
+ STR accu1, [pDat_1], #-4 // *pDat_1-- = accu1
+ STR accu2, [pDat_0], #4 // *pDat_0++ = accu2
+
+ LDR accu4, [pDat_0] // accu4 = pDat_0[0]
+ LDR accu3, [pDat_0, #4] // accu3 = pDat_0[1]
+
+ RSB accu4, accu4, #0 // accu4 = -accu4
+ RSB accu3, accu3, #0 // accu3 = -accu3
+
+ SMULWB accu1, accu3, val_tw // accu1 = (-accu3)*val_tw.h
+ SMULWT accu2, accu3, val_tw // accu2 = (-accu3)*val_tw.l
+ RSB accu1, accu1, #0 // accu1 = -(-accu3)*val_tw.h
+ SMLAWT accu3, accu4, val_tw, accu1 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
+ SMLAWB accu4, accu4, val_tw, accu2 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
+
+ LDR accu1, [pDat_1, #-4] // accu1 = pDat_1[-1]
+ LDR accu2, [pDat_1] // accu2 = pDat_1[0]
+
+ STR accu3, [pDat_0], #4 // *pDat_0++ = accu3
+ STR accu4, [pDat_1], #-4 // *pDat_1-- = accu4
+
+dst_IV_loop2_2nd_part:
+
+ LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
+
+ RSB accu2, accu2, #0 // accu2 = -accu2
+ RSB accu1, accu1, #0 // accu1 = -accu1
+ SMULWT accu3, accu2, val_tw // accu3 = (-accu2)*val_tw.l
+ SMULWT accu4, accu1, val_tw // accu4 = (-accu1)*val_tw.l
+ RSB accu3, accu3, #0 // accu3 = -accu2*val_tw.l
+ SMLAWB accu1, accu1, val_tw, accu3 // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
+ SMLAWB accu2, accu2, val_tw, accu4 // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
+ STR accu1, [pDat_1], #-4 // *pDat_1-- = accu1
+ STR accu2, [pDat_0], #4 // *pDat_0++ = accu2
+
+ LDR accu4, [pDat_0] // accu4 = pDat_0[0]
+ LDR accu3, [pDat_0, #4] // accu3 = pDat_0[1]
+
+ RSB accu4, accu4, #0 // accu4 = -accu4
+ RSB accu3, accu3, #0 // accu3 = -accu3
+
+ SMULWB accu1, accu3, val_tw // accu1 = (-accu3)*val_tw.h
+ SMULWT accu2, accu3, val_tw // accu2 = (-accu3)*val_tw.l
+ RSB accu1, accu1, #0 // accu1 = -(-accu3)*val_tw.h
+ SMLAWT accu3, accu4, val_tw, accu1 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
+ SMLAWB accu4, accu4, val_tw, accu2 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
+
+ LDR accu1, [pDat_1, #-4] // accu1 = pDat_1[-1]
+ LDR accu2, [pDat_1] // accu2 = pDat_1[0]
+
+ STR accu3, [pDat_0], #4 // *pDat_0++ = accu3
+ STR accu4, [pDat_1], #-4 // *pDat_1-- = accu4
+
+ SUBS i, i, #1
+ BNE dst_IV_loop2
+ }
+
+ /* Last Sin and Cos value pair are the same */
+ accu1 = fMultDiv2(-accu1, WTC(0x5a82799a));
+ accu2 = fMultDiv2(-accu2, WTC(0x5a82799a));
+
+ *pDat_0 = accu1 + accu2;
+ *pDat_1 = accu1 - accu2;
+}
+#endif /* FUNCTION_dst_IV_func2 */
diff --git a/libFDK/src/arm/fft_rad2_arm.cpp b/libFDK/src/arm/fft_rad2_arm.cpp
new file mode 100644
index 0000000..f40961a
--- /dev/null
+++ b/libFDK/src/arm/fft_rad2_arm.cpp
@@ -0,0 +1,259 @@
+/*************************** Fraunhofer IIS FDK Tools **********************
+
+ (C) Copyright Fraunhofer IIS (2005)
+ All Rights Reserved
+
+ Please be advised that this software and/or program delivery is
+ Confidential Information of Fraunhofer and subject to and covered by the
+
+ Fraunhofer IIS Software Evaluation Agreement
+ between Google Inc. and Fraunhofer
+ effective and in full force since March 1, 2012.
+
+ You may use this software and/or program only under the terms and
+ conditions described in the above mentioned Fraunhofer IIS Software
+ Evaluation Agreement. Any other and/or further use requires a separate agreement.
+
+
+ $Id$
+ Author(s):
+ Description: dit_fft ARM assembler replacements.
+
+ This software and/or program is protected by copyright law and international
+ treaties. Any reproduction or distribution of this software and/or program,
+ or any portion of it, may result in severe civil and criminal penalties, and
+ will be prosecuted to the maximum extent possible under law.
+
+******************************************************************************/
+
+/* NEON optimized FFT currently builds only with RVCT toolchain */
+
+#ifndef FUNCTION_dit_fft
+
+/* If dit_fft was not yet defined by ARM-Cortex ... */
+
+#if defined(SINETABLE_16BIT)
+
+#define FUNCTION_dit_fft
+
+/*****************************************************************************
+
+ date: 28.07.2005 srl
+
+ Contents/description: dit-tukey-FFT-algorithm
+
+******************************************************************************/
+
+#if defined(FUNCTION_dit_fft)
+
+
+void dit_fft(FIXP_DBL *x, const INT ldn, const FIXP_STP *trigdata, const INT trigDataSize)
+{
+ const INT n=1<<ldn;
+ INT i;
+
+ scramble(x,n);
+ /*
+ * 1+2 stage radix 4
+ */
+
+ for (i=0;i<n*2;i+=8)
+ {
+ FIXP_DBL a00, a10, a20, a30;
+ a00 = (x[i + 0] + x[i + 2])>>1; /* Re A + Re B */
+ a10 = (x[i + 4] + x[i + 6])>>1; /* Re C + Re D */
+ a20 = (x[i + 1] + x[i + 3])>>1; /* Im A + Im B */
+ a30 = (x[i + 5] + x[i + 7])>>1; /* Im C + Im D */
+
+ x[i + 0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */
+ x[i + 4] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */
+ x[i + 1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */
+ x[i + 5] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */
+
+ a00 = a00 - x[i + 2]; /* Re A - Re B */
+ a10 = a10 - x[i + 6]; /* Re C - Re D */
+ a20 = a20 - x[i + 3]; /* Im A - Im B */
+ a30 = a30 - x[i + 7]; /* Im C - Im D */
+
+ x[i + 2] = a00 + a30; /* Re B' = Re A - Re B + Im C - Im D */
+ x[i + 6] = a00 - a30; /* Re D' = Re A - Re B - Im C + Im D */
+ x[i + 3] = a20 - a10; /* Im B' = Im A - Im B - Re C + Re D */
+ x[i + 7] = a20 + a10; /* Im D' = Im A - Im B + Re C - Re D */
+ }
+
+ INT mh = 1 << 1;
+ INT ldm = ldn - 2;
+ INT trigstep = trigDataSize;
+
+ do
+ {
+ const FIXP_STP *pTrigData = trigdata;
+ INT j;
+
+ mh <<= 1;
+ trigstep >>= 1;
+
+ FDK_ASSERT(trigstep > 0);
+
+ /* Do first iteration with c=1.0 and s=0.0 separately to avoid loosing to much precision.
+ Beware: The impact on the overal FFT precision is rather large. */
+ {
+ FIXP_DBL *xt1 = x;
+ int r = n;
+
+ do {
+ FIXP_DBL *xt2 = xt1 + (mh<<1);
+ /*
+ FIXP_DBL *xt1 = x+ ((r)<<1);
+ FIXP_DBL *xt2 = xt1 + (mh<<1);
+ */
+ FIXP_DBL vr,vi,ur,ui;
+
+ //cplxMultDiv2(&vi, &vr, x[t2+1], x[t2], (FIXP_SGL)1.0, (FIXP_SGL)0.0);
+ vi = xt2[1]>>1;
+ vr = xt2[0]>>1;
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui+vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui-vi;
+
+ xt1 += mh;
+ xt2 += mh;
+
+ //cplxMultDiv2(&vr, &vi, x[t2+1], x[t2], (FIXP_SGL)1.0, (FIXP_SGL)0.0);
+ vr = xt2[1]>>1;
+ vi = xt2[0]>>1;
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui-vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui+vi;
+
+ xt1 = xt2 + mh;
+ } while ((r=r-(mh<<1)) != 0);
+ }
+ for(j=4; j<mh; j+=4)
+ {
+ FIXP_DBL *xt1 = x + (j>>1);
+ FIXP_SPK cs;
+ int r = n;
+
+ pTrigData += trigstep;
+ cs = *pTrigData;
+
+ do
+ {
+ FIXP_DBL *xt2 = xt1 + (mh<<1);
+ FIXP_DBL vr,vi,ur,ui;
+
+ cplxMultDiv2(&vi, &vr, xt2[1], xt2[0], cs);
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui+vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui-vi;
+
+ xt1 += mh;
+ xt2 += mh;
+
+ cplxMultDiv2(&vr, &vi, xt2[1], xt2[0], cs);
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui-vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui+vi;
+
+ /* Same as above but for t1,t2 with j>mh/4 and thus cs swapped */
+ xt1 = xt1 - (j);
+ xt2 = xt1 + (mh<<1);
+
+ cplxMultDiv2(&vi, &vr, xt2[0], xt2[1], cs);
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui-vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui+vi;
+
+ xt1 += mh;
+ xt2 += mh;
+
+ cplxMultDiv2(&vr, &vi, xt2[0], xt2[1], cs);
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur-vr;
+ xt1[1] = ui-vi;
+
+ xt2[0] = ur+vr;
+ xt2[1] = ui+vi;
+
+ xt1 = xt2 + (j);
+ } while ((r=r-(mh<<1)) != 0);
+ }
+ {
+ FIXP_DBL *xt1 = x + (mh>>1);
+ int r = n;
+
+ do
+ {
+ FIXP_DBL *xt2 = xt1 + (mh<<1);
+ FIXP_DBL vr,vi,ur,ui;
+
+ cplxMultDiv2(&vi, &vr, xt2[1], xt2[0], STC(0x5a82799a), STC(0x5a82799a));
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui+vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui-vi;
+
+ xt1 += mh;
+ xt2 += mh;
+
+ cplxMultDiv2(&vr, &vi, xt2[1], xt2[0], STC(0x5a82799a), STC(0x5a82799a));
+
+ ur = xt1[0]>>1;
+ ui = xt1[1]>>1;
+
+ xt1[0] = ur+vr;
+ xt1[1] = ui-vi;
+
+ xt2[0] = ur-vr;
+ xt2[1] = ui+vi;
+
+ xt1 = xt2 + mh;
+ } while ((r=r-(mh<<1)) != 0);
+ }
+ } while (--ldm != 0);
+}
+
+#endif /* if defined(FUNCTION_dit_fft) */
+
+#endif /* if defined(SINETABLE_16BIT) */
+
+#endif /* ifndef FUNCTION_dit_fft */
diff --git a/libFDK/src/arm/qmf_arm.cpp b/libFDK/src/arm/qmf_arm.cpp
new file mode 100644
index 0000000..df538a4
--- /dev/null
+++ b/libFDK/src/arm/qmf_arm.cpp
@@ -0,0 +1,710 @@
+/****************************************************************************
+
+ (C) Copyright Fraunhofer IIS (2004)
+ All Rights Reserved
+
+ Please be advised that this software and/or program delivery is
+ Confidential Information of Fraunhofer and subject to and covered by the
+
+ Fraunhofer IIS Software Evaluation Agreement
+ between Google Inc. and Fraunhofer
+ effective and in full force since March 1, 2012.
+
+ You may use this software and/or program only under the terms and
+ conditions described in the above mentioned Fraunhofer IIS Software
+ Evaluation Agreement. Any other and/or further use requires a separate agreement.
+
+
+ This software and/or program is protected by copyright law and international
+ treaties. Any reproduction or distribution of this software and/or program,
+ or any portion of it, may result in severe civil and criminal penalties, and
+ will be prosecuted to the maximum extent possible under law.
+
+ $Id$
+
+ History: 04-NOV-2009 A. Tritthart Optimized qmfSynPrototypeFirSlot1
+
+****************************************************************************/
+#if (QMF_NO_POLY==5)
+
+#define FUNCTION_qmfForwardModulationLP_odd
+
+#ifdef FUNCTION_qmfForwardModulationLP_odd
+static void
+qmfForwardModulationLP_odd( HANDLE_QMF_FILTER_BANK anaQmf, /*!< Handle of Qmf Analysis Bank */
+ const FIXP_QMF *timeIn, /*!< Time Signal */
+ FIXP_QMF *rSubband ) /*!< Real Output */
+{
+ int i;
+ int L = anaQmf->no_channels;
+ int M = L>>1;
+ int shift = (anaQmf->no_channels>>6) + 1;
+ int rSubband_e = 0;
+
+ FIXP_QMF *rSubbandPtr0 = &rSubband[M+0]; /* runs with increment */
+ FIXP_QMF *rSubbandPtr1 = &rSubband[M-1]; /* runs with decrement */
+ FIXP_QMF *timeIn0 = (FIXP_DBL *) &timeIn[0]; /* runs with increment */
+ FIXP_QMF *timeIn1 = (FIXP_DBL *) &timeIn[L]; /* runs with increment */
+ FIXP_QMF *timeIn2 = (FIXP_DBL *) &timeIn[L-1]; /* runs with decrement */
+ FIXP_QMF *timeIn3 = (FIXP_DBL *) &timeIn[2*L-1]; /* runs with decrement */
+
+ for (i = 0; i < M; i++)
+ {
+ *rSubbandPtr0++ = (*timeIn2-- >> 1) - (*timeIn0++ >> shift);
+ *rSubbandPtr1-- = (*timeIn1++ >> 1) + (*timeIn3-- >> shift);
+ }
+
+ dct_IV(rSubband,L, &rSubband_e);
+}
+#endif /* FUNCTION_qmfForwardModulationLP_odd */
+
+
+/* NEON optimized QMF currently builts only with RVCT toolchain */
+
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_5TE__)
+
+#if (SAMPLE_BITS == 16)
+#define FUNCTION_qmfAnaPrototypeFirSlot
+#endif
+
+#ifdef FUNCTION_qmfAnaPrototypeFirSlot
+
+#if defined(__GNUC__) /* cppp replaced: elif */
+
+inline INT SMULBB (const SHORT a, const LONG b)
+{
+ INT result ;
+ __asm__ ("smulbb %0, %1, %2"
+ : "=r" (result)
+ : "r" (a), "r" (b)) ;
+ return result ;
+}
+inline INT SMULBT (const SHORT a, const LONG b)
+{
+ INT result ;
+ __asm__ ("smulbt %0, %1, %2"
+ : "=r" (result)
+ : "r" (a), "r" (b)) ;
+ return result ;
+}
+
+inline INT SMLABB(const LONG accu, const SHORT a, const LONG b)
+{
+ INT result ;
+ __asm__ ("smlabb %0, %1, %2,%3"
+ : "=r" (result)
+ : "r" (a), "r" (b), "r" (accu)) ;
+ return result;
+}
+inline INT SMLABT(const LONG accu, const SHORT a, const LONG b)
+{
+ INT result ;
+ __asm__ ("smlabt %0, %1, %2,%3"
+ : "=r" (result)
+ : "r" (a), "r" (b), "r" (accu)) ;
+ return result;
+}
+#endif /* compiler selection */
+
+
+void qmfAnaPrototypeFirSlot( FIXP_QMF *analysisBuffer,
+ int no_channels, /*!< Number channels of analysis filter */
+ const FIXP_PFT *p_filter,
+ int p_stride, /*!< Stide of analysis filter */
+ FIXP_QAS *RESTRICT pFilterStates
+ )
+{
+ LONG *p_flt = (LONG *) p_filter;
+ LONG flt;
+ FIXP_QMF *RESTRICT pData_0 = analysisBuffer + 2*no_channels - 1;
+ FIXP_QMF *RESTRICT pData_1 = analysisBuffer;
+
+ FIXP_QAS *RESTRICT sta_0 = (FIXP_QAS *)pFilterStates;
+ FIXP_QAS *RESTRICT sta_1 = (FIXP_QAS *)pFilterStates + (2*QMF_NO_POLY*no_channels) - 1;
+
+ FIXP_DBL accu0, accu1;
+ FIXP_QAS sta0, sta1;
+
+ int staStep1 = no_channels<<1;
+ int staStep2 = (no_channels<<3) - 1; /* Rewind one less */
+
+ if (p_stride == 1)
+ {
+ /* FIR filter 0 */
+ flt = *p_flt++;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu1 = SMULBB( sta1, flt);
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = *p_flt++;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu1 = SMLABB( accu1, sta1, flt);
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = *p_flt++;
+ sta1 = *sta_1; sta_1 += staStep2;
+ accu1 = SMLABB( accu1, sta1, flt);
+ *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
+
+ /* FIR filters 1..63 127..65 or 1..31 63..33 */
+ no_channels >>= 1;
+ for (; --no_channels; )
+ {
+ sta0 = *sta_0; sta_0 += staStep1; /* 1,3,5, ... 29/61 */
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMULBT( sta0, flt);
+ accu1 = SMULBT( sta1, flt);
+
+ flt = *p_flt++;
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABB( accu0, sta0, flt);
+ accu1 = SMLABB( accu1, sta1, flt);
+
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABT( accu0, sta0, flt);
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = *p_flt++;
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABB( accu0, sta0, flt);
+ accu1 = SMLABB( accu1, sta1, flt);
+
+ sta0 = *sta_0; sta_0 -= staStep2;
+ sta1 = *sta_1; sta_1 += staStep2;
+ accu0 = SMLABT( accu0, sta0, flt);
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
+ *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
+
+ /* Same sequence as above, but mix B=bottom with T=Top */
+
+ flt = *p_flt++;
+ sta0 = *sta_0; sta_0 += staStep1; /* 2,4,6, ... 30/62 */
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMULBB( sta0, flt);
+ accu1 = SMULBB( sta1, flt);
+
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABT( accu0, sta0, flt);
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = *p_flt++;
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABB( accu0, sta0, flt);
+ accu1 = SMLABB( accu1, sta1, flt);
+
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABT( accu0, sta0, flt);
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = *p_flt++;
+ sta0 = *sta_0; sta_0 -= staStep2;
+ sta1 = *sta_1; sta_1 += staStep2;
+ accu0 = SMLABB( accu0, sta0, flt);
+ accu1 = SMLABB( accu1, sta1, flt);
+
+ *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
+ *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
+ }
+
+ /* FIR filter 31/63 and 33/65 */
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMULBT( sta0, flt);
+ accu1 = SMULBT( sta1, flt);
+
+ flt = *p_flt++;
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABB( accu0, sta0, flt);
+ accu1 = SMLABB( accu1, sta1, flt);
+
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABT( accu0, sta0, flt);
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = *p_flt++;
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABB( accu0, sta0, flt);
+ accu1 = SMLABB( accu1, sta1, flt);
+
+ sta0 = *sta_0; sta_0 -= staStep2;
+ sta1 = *sta_1; sta_1 += staStep2;
+ accu0 = SMLABT( accu0, sta0, flt);
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
+ *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
+
+ /* FIR filter 32/64 */
+ flt = *p_flt++;
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMULBB( sta0, flt);
+ accu1 = SMULBB( sta1, flt);
+
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABT( accu0, sta0, flt);
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = *p_flt++;
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABB( accu0, sta0, flt);
+ accu1 = SMLABB( accu1, sta1, flt);
+
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABT( accu0, sta0, flt);
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = *p_flt;
+ sta0 = *sta_0;
+ sta1 = *sta_1;
+ accu0 = SMLABB( accu0, sta0, flt);
+ accu1 = SMLABB( accu1, sta1, flt);
+
+ *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
+ *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
+ }
+ else
+ {
+ int pfltStep = QMF_NO_POLY * (p_stride-1);
+
+ flt = p_flt[0];
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu1 = SMULBB( sta1, flt);
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = p_flt[1];
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu1 = SMLABB( accu1, sta1, flt);
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = p_flt[2]; p_flt += pfltStep;
+ sta1 = *sta_1; sta_1 += staStep2;
+ accu1 = SMLABB( accu1, sta1, flt);
+ *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
+
+ /* FIR filters 1..63 127..65 or 1..31 63..33 */
+ for (; --no_channels; )
+ {
+ flt = p_flt[0];
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMULBB( sta0, flt);
+ accu1 = SMULBB( sta1, flt);
+
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABT( accu0, sta0, flt);
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = p_flt[1];
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABB( accu0, sta0, flt);
+ accu1 = SMLABB( accu1, sta1, flt);
+
+ sta0 = *sta_0; sta_0 += staStep1;
+ sta1 = *sta_1; sta_1 -= staStep1;
+ accu0 = SMLABT( accu0, sta0, flt);
+ accu1 = SMLABT( accu1, sta1, flt);
+
+ flt = p_flt[2]; p_flt += pfltStep;
+ sta0 = *sta_0; sta_0 -= staStep2;
+ sta1 = *sta_1; sta_1 += staStep2;
+ accu0 = SMLABB( accu0, sta0, flt);
+ accu1 = SMLABB( accu1, sta1, flt);
+
+ *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
+ *pData_1++ = FX_DBL2FX_QMF(accu1<<1);
+ }
+
+ /* FIR filter 32/64 */
+ flt = p_flt[0];
+ sta0 = *sta_0; sta_0 += staStep1;
+ accu0 = SMULBB( sta0, flt);
+ sta0 = *sta_0; sta_0 += staStep1;
+ accu0 = SMLABT( accu0, sta0, flt);
+
+ flt = p_flt[1];
+ sta0 = *sta_0; sta_0 += staStep1;
+ accu0 = SMLABB( accu0, sta0, flt);
+ sta0 = *sta_0; sta_0 += staStep1;
+ accu0 = SMLABT( accu0, sta0, flt);
+
+ flt = p_flt[2];
+ sta0 = *sta_0;
+ accu0 = SMLABB( accu0, sta0, flt);
+ *pData_0-- = FX_DBL2FX_QMF(accu0<<1);
+ }
+}
+#endif /* FUNCTION_qmfAnaPrototypeFirSlot */
+#endif /* #if defined(__CC_ARM) && defined(__ARM_ARCH_6__) */
+
+#if ( defined(__ARM_ARCH_5TE__) && (SAMPLE_BITS == 16) ) && !defined(QMF_TABLE_FULL)
+
+#define FUNCTION_qmfSynPrototypeFirSlot
+
+#if defined(FUNCTION_qmfSynPrototypeFirSlot)
+
+#if defined(__GNUC__) /* cppp replaced: elif */
+
+inline INT SMULWB (const LONG a, const LONG b)
+{
+ INT result ;
+ __asm__ ("smulwb %0, %1, %2"
+ : "=r" (result)
+ : "r" (a), "r" (b)) ;
+
+ return result ;
+}
+inline INT SMULWT (const LONG a, const LONG b)
+{
+ INT result ;
+ __asm__ ("smulwt %0, %1, %2"
+ : "=r" (result)
+ : "r" (a), "r" (b)) ;
+
+ return result ;
+}
+
+inline INT SMLAWB(const LONG accu, const LONG a, const LONG b)
+{
+ INT result;
+ asm("smlawb %0, %1, %2, %3 "
+ : "=r" (result)
+ : "r" (a), "r" (b), "r" (accu) );
+ return result ;
+}
+
+inline INT SMLAWT(const LONG accu, const LONG a, const LONG b)
+{
+ INT result;
+ asm("smlawt %0, %1, %2, %3 "
+ : "=r" (result)
+ : "r" (a), "r" (b), "r" (accu) );
+ return result ;
+}
+
+#endif /* ARM compiler selector */
+
+
+static void qmfSynPrototypeFirSlot1_filter(FIXP_QMF *RESTRICT realSlot,
+ FIXP_QMF *RESTRICT imagSlot,
+ const FIXP_DBL *RESTRICT p_flt,
+ FIXP_QSS *RESTRICT sta,
+ FIXP_DBL *pMyTimeOut,
+ int no_channels)
+{
+ /* This code was the base for the above listed assembler sequence */
+ /* It can be used for debugging purpose or further optimizations */
+ const FIXP_DBL *RESTRICT p_fltm = p_flt + 155;
+
+ do
+ {
+ FIXP_DBL result;
+ FIXP_DBL A, B, real, imag, sta0;
+
+ real = *--realSlot;
+ imag = *--imagSlot;
+ B = p_flt[4]; /* Bottom=[8] Top=[9] */
+ A = p_fltm[3]; /* Bottom=[316] Top=[317] */
+ sta0 = sta[0]; /* save state[0] */
+ *sta++ = SMLAWT( sta[1], imag, B ); /* index=9...........319 */
+ *sta++ = SMLAWB( sta[1], real, A ); /* index=316...........6 */
+ *sta++ = SMLAWB( sta[1], imag, B ); /* index=8,18, ...318 */
+ B = p_flt[3]; /* Bottom=[6] Top=[7] */
+ *sta++ = SMLAWT( sta[1], real, A ); /* index=317...........7 */
+ A = p_fltm[4]; /* Bottom=[318] Top=[319] */
+ *sta++ = SMLAWT( sta[1], imag, B ); /* index=7...........317 */
+ *sta++ = SMLAWB( sta[1], real, A ); /* index=318...........8 */
+ *sta++ = SMLAWB( sta[1], imag, B ); /* index=6...........316 */
+ B = p_flt[2]; /* Bottom=[X] Top=[5] */
+ *sta++ = SMLAWT( sta[1], real, A ); /* index=9...........319 */
+ A = p_fltm[2]; /* Bottom=[X] Top=[315] */
+ *sta++ = SMULWT( imag, B ); /* index=5,15, ... 315 */
+ result = SMLAWT( sta0, real, A ); /* index=315...........5 */
+
+ *pMyTimeOut++ = result;
+
+ real = *--realSlot;
+ imag = *--imagSlot;
+ A = p_fltm[0]; /* Bottom=[310] Top=[311] */
+ B = p_flt[7]; /* Bottom=[14] Top=[15] */
+ result = SMLAWB( sta[0], real, A ); /* index=310...........0 */
+ *sta++ = SMLAWB( sta[1], imag, B ); /* index=14..........324 */
+ *pMyTimeOut++ = result;
+ B = p_flt[6]; /* Bottom=[12] Top=[13] */
+ *sta++ = SMLAWT( sta[1], real, A ); /* index=311...........1 */
+ A = p_fltm[1]; /* Bottom=[312] Top=[313] */
+ *sta++ = SMLAWT( sta[1], imag, B ); /* index=13..........323 */
+ *sta++ = SMLAWB( sta[1], real, A ); /* index=312...........2 */
+ *sta++ = SMLAWB( sta[1], imag, B ); /* index=12..........322 */
+ *sta++ = SMLAWT( sta[1], real, A ); /* index=313...........3 */
+ A = p_fltm[2]; /* Bottom=[314] Top=[315] */
+ B = p_flt[5]; /* Bottom=[10] Top=[11] */
+ *sta++ = SMLAWT( sta[1], imag, B ); /* index=11..........321 */
+ *sta++ = SMLAWB( sta[1], real, A ); /* index=314...........4 */
+ *sta++ = SMULWB( imag, B ); /* index=10..........320 */
+
+
+ p_flt += 5;
+ p_fltm -= 5;
+ }
+ while ((--no_channels) != 0);
+
+}
+
+
+
+INT qmfSynPrototypeFirSlot2(
+ HANDLE_QMF_FILTER_BANK qmf,
+ FIXP_QMF *RESTRICT realSlot, /*!< Input: Pointer to real Slot */
+ FIXP_QMF *RESTRICT imagSlot, /*!< Input: Pointer to imag Slot */
+ INT_PCM *RESTRICT timeOut, /*!< Time domain data */
+ INT stride /*!< Time output buffer stride factor*/
+ )
+{
+ FIXP_QSS *RESTRICT sta = (FIXP_QSS*)qmf->FilterStates;
+ int no_channels = qmf->no_channels;
+ int scale = ((DFRACT_BITS-SAMPLE_BITS)-1-qmf->outScalefactor);
+
+ /* We map an arry of 16-bit values upon an array of 2*16-bit values to read 2 values in one shot */
+ const FIXP_DBL *RESTRICT p_flt = (FIXP_DBL *) qmf->p_filter; /* low=[0], high=[1] */
+ const FIXP_DBL *RESTRICT p_fltm = (FIXP_DBL *) qmf->p_filter + 155; /* low=[310], high=[311] */
+
+ FDK_ASSERT(SAMPLE_BITS-1-qmf->outScalefactor >= 0); // (DFRACT_BITS-SAMPLE_BITS)-1-qmf->outScalefactor >= 0);
+ FDK_ASSERT(qmf->p_stride==2 && qmf->no_channels == 32);
+
+ FDK_ASSERT((no_channels&3) == 0); /* should be a multiple of 4 */
+
+ realSlot += no_channels-1; // ~~"~~
+ imagSlot += no_channels-1; // no_channels-1 .. 0
+
+ FIXP_DBL MyTimeOut[32];
+ FIXP_DBL *pMyTimeOut = &MyTimeOut[0];
+
+ for (no_channels = no_channels; no_channels--;)
+ {
+ FIXP_DBL result;
+ FIXP_DBL A, B, real, imag;
+
+ real = *realSlot--;
+ imag = *imagSlot--;
+ A = p_fltm[0]; /* Bottom=[310] Top=[311] */
+ B = p_flt[7]; /* Bottom=[14] Top=[15] */
+ result = SMLAWB( sta[0], real, A ); /* index=310...........0 */
+ *sta++ = SMLAWB( sta[1], imag, B ); /* index=14..........324 */
+ B = p_flt[6]; /* Bottom=[12] Top=[13] */
+ *sta++ = SMLAWT( sta[1], real, A ); /* index=311...........1 */
+ A = p_fltm[1]; /* Bottom=[312] Top=[313] */
+ *sta++ = SMLAWT( sta[1], imag, B ); /* index=13..........323 */
+ *sta++ = SMLAWB( sta[1], real, A ); /* index=312...........2 */
+ *sta++ = SMLAWB( sta[1], imag, B ); /* index=12..........322 */
+ *sta++ = SMLAWT( sta[1], real, A ); /* index=313...........3 */
+ A = p_fltm[2]; /* Bottom=[314] Top=[315] */
+ B = p_flt[5]; /* Bottom=[10] Top=[11] */
+ *sta++ = SMLAWT( sta[1], imag, B ); /* index=11..........321 */
+ *sta++ = SMLAWB( sta[1], real, A ); /* index=314...........4 */
+ *sta++ = SMULWB( imag, B ); /* index=10..........320 */
+
+ *pMyTimeOut++ = result;
+
+ p_fltm -= 5;
+ p_flt += 5;
+ }
+
+ pMyTimeOut = &MyTimeOut[0];
+#if (SAMPLE_BITS == 16)
+ const FIXP_DBL max_pos = (FIXP_DBL) 0x00007FFF << scale;
+ const FIXP_DBL max_neg = (FIXP_DBL) 0xFFFF8001 << scale;
+#else
+ scale = -scale;
+ const FIXP_DBL max_pos = (FIXP_DBL) 0x7FFFFFFF >> scale;
+ const FIXP_DBL max_neg = (FIXP_DBL) 0x80000001 >> scale;
+#endif
+ const FIXP_DBL add_neg = (1 << scale) - 1;
+
+ no_channels = qmf->no_channels;
+
+ timeOut += no_channels*stride;
+
+ FDK_ASSERT(scale >= 0);
+
+ if (qmf->outGain != 0x80000000)
+ {
+ FIXP_DBL gain = qmf->outGain;
+ for (no_channels>>=2; no_channels--;)
+ {
+ FIXP_DBL result1, result2;
+
+ result1 = *pMyTimeOut++;
+ result2 = *pMyTimeOut++;
+
+ result1 = fMult(result1,gain);
+ timeOut -= stride;
+ if (result1 < 0) result1 += add_neg;
+ if (result1 < max_neg) result1 = max_neg;
+ if (result1 > max_pos) result1 = max_pos;
+#if (SAMPLE_BITS == 16)
+ timeOut[0] = result1 >> scale;
+#else
+ timeOut[0] = result1 << scale;
+#endif
+
+ result2 = fMult(result2,gain);
+ timeOut -= stride;
+ if (result2 < 0) result2 += add_neg;
+ if (result2 < max_neg) result2 = max_neg;
+ if (result2 > max_pos) result2 = max_pos;
+#if (SAMPLE_BITS == 16)
+ timeOut[0] = result2 >> scale;
+#else
+ timeOut[0] = result2 << scale;
+#endif
+
+ result1 = *pMyTimeOut++;
+ result2 = *pMyTimeOut++;
+
+ result1 = fMult(result1,gain);
+ timeOut -= stride;
+ if (result1 < 0) result1 += add_neg;
+ if (result1 < max_neg) result1 = max_neg;
+ if (result1 > max_pos) result1 = max_pos;
+#if (SAMPLE_BITS == 16)
+ timeOut[0] = result1 >> scale;
+#else
+ timeOut[0] = result1 << scale;
+#endif
+
+ result2 = fMult(result2,gain);
+ timeOut -= stride;
+ if (result2 < 0) result2 += add_neg;
+ if (result2 < max_neg) result2 = max_neg;
+ if (result2 > max_pos) result2 = max_pos;
+#if (SAMPLE_BITS == 16)
+ timeOut[0] = result2 >> scale;
+#else
+ timeOut[0] = result2 << scale;
+#endif
+ }
+ }
+ else
+ {
+ for (no_channels>>=2; no_channels--;)
+ {
+ FIXP_DBL result1, result2;
+ result1 = *pMyTimeOut++;
+ result2 = *pMyTimeOut++;
+ timeOut -= stride;
+ if (result1 < 0) result1 += add_neg;
+ if (result1 < max_neg) result1 = max_neg;
+ if (result1 > max_pos) result1 = max_pos;
+#if (SAMPLE_BITS == 16)
+ timeOut[0] = result1 >> scale;
+#else
+ timeOut[0] = result1 << scale;
+#endif
+
+ timeOut -= stride;
+ if (result2 < 0) result2 += add_neg;
+ if (result2 < max_neg) result2 = max_neg;
+ if (result2 > max_pos) result2 = max_pos;
+#if (SAMPLE_BITS == 16)
+ timeOut[0] = result2 >> scale;
+#else
+ timeOut[0] = result2 << scale;
+#endif
+
+ result1 = *pMyTimeOut++;
+ result2 = *pMyTimeOut++;
+ timeOut -= stride;
+ if (result1 < 0) result1 += add_neg;
+ if (result1 < max_neg) result1 = max_neg;
+ if (result1 > max_pos) result1 = max_pos;
+#if (SAMPLE_BITS == 16)
+ timeOut[0] = result1 >> scale;
+#else
+ timeOut[0] = result1 << scale;
+#endif
+
+ timeOut -= stride;
+ if (result2 < 0) result2 += add_neg;
+ if (result2 < max_neg) result2 = max_neg;
+ if (result2 > max_pos) result2 = max_pos;
+#if (SAMPLE_BITS == 16)
+ timeOut[0] = result2 >> scale;
+#else
+ timeOut[0] = result2 << scale;
+#endif
+ }
+ }
+ return 0;
+}
+
+static
+void qmfSynPrototypeFirSlot_fallback( HANDLE_QMF_FILTER_BANK qmf,
+ FIXP_DBL *realSlot, /*!< Input: Pointer to real Slot */
+ FIXP_DBL *imagSlot, /*!< Input: Pointer to imag Slot */
+ INT_PCM *timeOut, /*!< Time domain data */
+ const int stride
+ );
+
+/*!
+ \brief Perform Synthesis Prototype Filtering on a single slot of input data.
+
+ The filter takes 2 * #MAX_SYNTHESIS_CHANNELS of input data and
+ generates #MAX_SYNTHESIS_CHANNELS time domain output samples.
+*/
+
+static
+void qmfSynPrototypeFirSlot( HANDLE_QMF_FILTER_BANK qmf,
+ FIXP_DBL *realSlot, /*!< Input: Pointer to real Slot */
+ FIXP_DBL *imagSlot, /*!< Input: Pointer to imag Slot */
+ INT_PCM *timeOut, /*!< Time domain data */
+ const int stride
+ )
+{
+ INT err = -1;
+
+ switch (qmf->p_stride) {
+ case 2:
+ err = qmfSynPrototypeFirSlot2(qmf, realSlot, imagSlot, timeOut, stride);
+ break;
+ default:
+ err = -1;
+ }
+
+ /* fallback if configuration not available or failed */
+ if(err!=0) {
+ qmfSynPrototypeFirSlot_fallback(qmf, realSlot, imagSlot, timeOut, stride);
+ }
+}
+#endif /* FUNCTION_qmfSynPrototypeFirSlot */
+
+#endif /* ( defined(__CC_ARM) && defined(__ARM_ARCH_5TE__) && (SAMPLE_BITS == 16) ) && !defined(QMF_TABLE_FULL) */
+
+
+
+/* #####################################################################################*/
+
+
+
+#endif /* (QMF_NO_POLY==5) */
+
diff --git a/libFDK/src/arm/scale_arm.cpp b/libFDK/src/arm/scale_arm.cpp
new file mode 100644
index 0000000..906766f
--- /dev/null
+++ b/libFDK/src/arm/scale_arm.cpp
@@ -0,0 +1,110 @@
+/*************************** Fraunhofer IIS FDK Tools **********************
+
+ (C) Copyright Fraunhofer IIS (2005)
+ All Rights Reserved
+
+ Please be advised that this software and/or program delivery is
+ Confidential Information of Fraunhofer and subject to and covered by the
+
+ Fraunhofer IIS Software Evaluation Agreement
+ between Google Inc. and Fraunhofer
+ effective and in full force since March 1, 2012.
+
+ You may use this software and/or program only under the terms and
+ conditions described in the above mentioned Fraunhofer IIS Software
+ Evaluation Agreement. Any other and/or further use requires a separate agreement.
+
+
+ $Id$
+ Author(s): Arthur Tritthart
+ Description: Scaling operations for ARM
+
+ This software and/or program is protected by copyright law and international
+ treaties. Any reproduction or distribution of this software and/or program,
+ or any portion of it, may result in severe civil and criminal penalties, and
+ will be prosecuted to the maximum extent possible under law.
+
+******************************************************************************/
+/* prevent multiple inclusion with re-definitions */
+#ifndef __INCLUDE_SCALE_ARM__
+#define __INCLUDE_SCALE_ARM__
+
+#define FUNCTION_scaleValuesWithFactor_DBL
+
+SCALE_INLINE
+void scaleValuesWithFactor(
+ FIXP_DBL *vector,
+ FIXP_DBL factor,
+ INT len,
+ INT scalefactor
+ )
+{
+ /* This code combines the fMult with the scaling */
+ /* It performs a fMultDiv2 and increments shift by 1 */
+ int shift = scalefactor + 1;
+ FIXP_DBL *mySpec = vector;
+
+ shift = fixmin_I(shift,(INT)DFRACT_BITS-1);
+
+ if (shift >= 0)
+ {
+ for (int i=0; i<(len>>2); i++)
+ {
+ FIXP_DBL tmp0 = mySpec[0];
+ FIXP_DBL tmp1 = mySpec[1];
+ FIXP_DBL tmp2 = mySpec[2];
+ FIXP_DBL tmp3 = mySpec[3];
+ tmp0 = fMultDiv2(tmp0, factor);
+ tmp1 = fMultDiv2(tmp1, factor);
+ tmp2 = fMultDiv2(tmp2, factor);
+ tmp3 = fMultDiv2(tmp3, factor);
+ tmp0 <<= shift;
+ tmp1 <<= shift;
+ tmp2 <<= shift;
+ tmp3 <<= shift;
+ *mySpec++ = tmp0;
+ *mySpec++ = tmp1;
+ *mySpec++ = tmp2;
+ *mySpec++ = tmp3;
+ }
+ for (int i=len&3; i--;)
+ {
+ FIXP_DBL tmp0 = mySpec[0];
+ tmp0 = fMultDiv2(tmp0, factor);
+ tmp0 <<= shift;
+ *mySpec++ = tmp0;
+ }
+ }
+ else
+ {
+ shift = -shift;
+ for (int i=0; i<(len>>2); i++)
+ {
+ FIXP_DBL tmp0 = mySpec[0];
+ FIXP_DBL tmp1 = mySpec[1];
+ FIXP_DBL tmp2 = mySpec[2];
+ FIXP_DBL tmp3 = mySpec[3];
+ tmp0 = fMultDiv2(tmp0, factor);
+ tmp1 = fMultDiv2(tmp1, factor);
+ tmp2 = fMultDiv2(tmp2, factor);
+ tmp3 = fMultDiv2(tmp3, factor);
+ tmp0 >>= shift;
+ tmp1 >>= shift;
+ tmp2 >>= shift;
+ tmp3 >>= shift;
+ *mySpec++ = tmp0;
+ *mySpec++ = tmp1;
+ *mySpec++ = tmp2;
+ *mySpec++ = tmp3;
+ }
+ for (int i=len&3; i--;)
+ {
+ FIXP_DBL tmp0 = mySpec[0];
+ tmp0 = fMultDiv2(tmp0, factor);
+ tmp0 >>= shift;
+ *mySpec++ = tmp0;
+ }
+ }
+}
+
+#endif /* #ifndef __INCLUDE_SCALE_ARM__ */