From 9bf37cc9712506b2483650c82d3c41152337ef7e Mon Sep 17 00:00:00 2001 From: Dave Burke Date: Tue, 17 Apr 2012 09:51:45 -0700 Subject: Fraunhofer AAC codec. License boilerplate update to follow. Change-Id: I2810460c11a58b6d148d84673cc031f3685e79b5 --- libFDK/src/dct.cpp | 513 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 513 insertions(+) create mode 100644 libFDK/src/dct.cpp (limited to 'libFDK/src/dct.cpp') diff --git a/libFDK/src/dct.cpp b/libFDK/src/dct.cpp new file mode 100644 index 0000000..d05a26f --- /dev/null +++ b/libFDK/src/dct.cpp @@ -0,0 +1,513 @@ +/**************************************************************************** + + (C) copyright Fraunhofer IIS (2004) + All Rights Reserved + + Please be advised that this software and/or program delivery is + Confidential Information of Fraunhofer and subject to and covered by the + + Fraunhofer IIS Software Evaluation Agreement + between Google Inc. and Fraunhofer + effective and in full force since March 1, 2012. + + You may use this software and/or program only under the terms and + conditions described in the above mentioned Fraunhofer IIS Software + Evaluation Agreement. Any other and/or further use requires a separate agreement. + + + + $Id$ + +*******************************************************************************/ +/*! + \file dct.cpp + \brief DCT Implementations $Revision: 36871 $ + Library functions to calculate standard DCTs. This will most likely be replaced by hand-optimized + functions for the specific target processor. + + Three different implementations of the dct type II and the dct type III transforms are provided. + + By default implementations which are based on a single, standard complex FFT-kernel are used (dctII_f() and dctIII_f()). + These are specifically helpful in cases where optimized FFT libraries are already available. The FFT used in these + implementation is FFT rad2 from FDK_tools. + + Of course, one might also use DCT-libraries should they be available. The DCT and DST + type IV implementations are only available in a version based on a complex FFT kernel. +*/ + +#include "dct.h" + + +#include "FDK_tools_rom.h" +#include "fft.h" + + +#if defined(__arm__) +#include "arm/dct_arm.cpp" +#endif + + +/*! + * + * \brief Perform dct type 3 + * The dct 3 is calculated by a inverse real fft, with + * some pre twiddeling before the inverse real fft, as discribed by Takuya OOURA. + * (http://momonga.t.u-tokyo.ac.jp/~ooura/fftman/ftmn2_32.html#sec2_3_2) + * The real inverse fft is calculated by a inverse complex fft, as described + * in numerical recipes in C, Cambridge University press + * the auxiliary function is built by reversing the odd samples + * + * Instead of doing 2 times the conjugation to calculate the + * inverse cfft with the cfft, one can also swap elements before + * calling the cfft to get the same result: + * swap + * r(1),i(1) with r(n-1),i(n-1), + * r(2),i(2) with r(n-2),i(n-2), ... + * r(n/2-1),i(n/2-1) with r(n/2+1),i(n/2+1) + * + * + * Scaling 1 shift in pre twiddeling, + * log2(L)-1 in cfft + */ +#if !defined(FUNCTION_dct_III) +void dct_III(FIXP_DBL *pDat, /*!< pointer to input/output */ + FIXP_DBL *tmp, /*!< pointer to temporal working buffer */ + int L, /*!< lenght of transform */ + int *pDat_e + ) +{ + FDK_ASSERT(L == 64 || L == 32); + int i; + FIXP_DBL xr, accu1, accu2; + int inc; + int M = L>>1; + int ld_M; + + if (L == 64) ld_M = 5; + else ld_M = 4; + + /* This loop performs multiplication for index i (i*inc) */ + inc = (64/2) >> ld_M; /* 64/L */ + + FIXP_DBL *pTmp_0 = &tmp[2]; + FIXP_DBL *pTmp_1 = &tmp[(M-1)*2]; + + for(i=1; i>1; i++,pTmp_0+=2,pTmp_1-=2) { + + FIXP_DBL accu3,accu4,accu5,accu6; + + cplxMultDiv2(&accu2, &accu1, pDat[L - i], pDat[i], sin_twiddle_L64[i*inc]); + cplxMultDiv2(&accu4, &accu3, pDat[M+i], pDat[M-i], sin_twiddle_L64[(M-i)*inc]); + accu3 >>= 1; accu4 >>= 1; + + /* This method is better for ARM926, that uses operand2 shifted right by 1 always */ + cplxMultDiv2(&accu6, &accu5, (accu3 - (accu1>>1)), ((accu2>>1) + accu4), sin_twiddle_L64[(4*i)*inc]); + xr = (accu1>>1) + accu3; + pTmp_0[0] = (xr>>1) - accu5; + pTmp_1[0] = (xr>>1) + accu5; + + xr = (accu2>>1) - accu4; + pTmp_0[1] = (xr>>1) - accu6; + pTmp_1[1] = -((xr>>1) + accu6); + + } + + xr = fMultDiv2(pDat[M], sin_twiddle_L64[64/2].v.re );/* cos((PI/(2*L))*M); */ + tmp[0] = ((pDat[0]>>1) + xr)>>1; + tmp[1] = ((pDat[0]>>1) - xr)>>1; + + cplxMultDiv2(&accu2, &accu1, pDat[L - (M/2)], pDat[M/2], sin_twiddle_L64[64/4]); + tmp[M] = accu1>>1; + tmp[M+1] = accu2>>1; + + /* dit_fft expects 1 bit scaled input values */ + fft(M, tmp, pDat_e); + + /* ARM926: 12 cycles per 2-iteration, no overhead code by compiler */ + pTmp_1 = &tmp[L]; + for (i = M>>1; i--;) + { + FIXP_DBL tmp1, tmp2, tmp3, tmp4; + tmp1 = *tmp++; + tmp2 = *tmp++; + tmp3 = *--pTmp_1; + tmp4 = *--pTmp_1; + *pDat++ = tmp1; + *pDat++ = tmp3; + *pDat++ = tmp2; + *pDat++ = tmp4; + } + + *pDat_e += 2; +} +#endif + +/*! + * + * \brief Perform dct type 2 + * The dct 2 is calculated by a real inverse fft, with + * some pre twiddeling after the fft, as discribed by Takuya OOURA. + * (http://momonga.t.u-tokyo.ac.jp/~ooura/fftman/ftmn2_32.html#sec2_3_2) + * The real inverse fft is calculated by a inverse complex fft, as described + * in numerical recipes in C, Cambridge University press + * the auxilery function is build by reversing the odd samples + * + * Scaling 1 shift in pre twiddeling, + * 5 in cfft + */ +#if !defined(FUNCTION_dct_II) +void dct_II(FIXP_DBL *pDat, /*!< pointer to input/output */ + FIXP_DBL *tmp, /*!< pointer to temporal working buffer */ + int L, /*!< lenght of transform */ + int *pDat_e + ) +{ + FDK_ASSERT(L == 64 || L == 32); + FIXP_DBL accu1,accu2; + FIXP_DBL *pTmp_0, *pTmp_1; + + int i; + int inc; + int M = L>>1; + int ld_M; + + FDK_ASSERT(L == 64 || L == 32); + ld_M = 4 + (L >> 6); /* L=64: 5, L=32: 4 */ + + inc = (64/2) >> ld_M; /* L=64: 1, L=32: 2 */ + + FIXP_DBL *pdat = &pDat[0]; + FIXP_DBL accu3, accu4; + pTmp_0 = &tmp[0]; + pTmp_1 = &tmp[L-1]; + for (i = M>>1; i--; ) + { + accu1 = *pdat++; + accu2 = *pdat++; + accu3 = *pdat++; + accu4 = *pdat++; + accu1 >>= 1; + accu2 >>= 1; + accu3 >>= 1; + accu4 >>= 1; + *pTmp_0++ = accu1; + *pTmp_0++ = accu3; + *pTmp_1-- = accu2; + *pTmp_1-- = accu4; + } + + + fft(M, tmp, pDat_e); + + pTmp_0 = &tmp[2]; + pTmp_1 = &tmp[(M-1)*2]; + + for (i=1; i>1; i++,pTmp_0+=2,pTmp_1-=2) { + + FIXP_DBL a1,a2; + FIXP_DBL accu3, accu4; + + a1 = ((pTmp_0[1]>>1) + (pTmp_1[1]>>1)); + a2 = ((pTmp_1[0]>>1) - (pTmp_0[0]>>1)); + + cplxMultDiv2(&accu1, &accu2, a2, a1, sin_twiddle_L64[(4*i)*inc]); + accu1<<=1; accu2<<=1; + + a1 = ((pTmp_0[0]>>1) + (pTmp_1[0]>>1)); + a2 = ((pTmp_0[1]>>1) - (pTmp_1[1]>>1)); + + cplxMultDiv2(&accu3, &accu4, (a1 + accu2), -(accu1 + a2), sin_twiddle_L64[i*inc]); + pDat[L - i] = accu4; + pDat[i] = accu3; + + cplxMultDiv2(&accu3, &accu4, (a1 - accu2), -(accu1 - a2), sin_twiddle_L64[(M-i)*inc]); + pDat[M + i] = accu4; + pDat[M - i] = accu3; + + } + + cplxMultDiv2(&accu1, &accu2, tmp[M], tmp[M+1], sin_twiddle_L64[(M/2)*inc]); + pDat[L - (M/2)] = accu2; + pDat[M/2] = accu1; + + pDat[0] = (tmp[0]>>1)+(tmp[1]>>1); + pDat[M] = fMult(((tmp[0]>>1)-(tmp[1]>>1)), sin_twiddle_L64[64/2].v.re);/* cos((PI/(2*L))*M); */ + + *pDat_e += 2; +} +#endif + +static +void getTables(const FIXP_WTP **twiddle, const FIXP_STP **sin_twiddle, int *sin_step, int length) +{ + int ld2_length; + + /* Get ld2 of length - 2 + 1 + -2: because first table entry is window of size 4 + +1: because we already include +1 because of ceil(log2(length)) */ + ld2_length = DFRACT_BITS-1-fNormz((FIXP_DBL)length) - 1; + + /* Extract sort of "eigenvalue" (the 4 left most bits) of length. */ + switch ( (length) >> (ld2_length-1) ) { + case 0x4: /* radix 2 */ + *sin_twiddle = SineTable512; + *sin_step = 1<<(9 - ld2_length); + *twiddle = windowSlopes[0][0][ld2_length-1]; + break; + case 0x7: /* 10 ms */ + *sin_twiddle = SineTable480; + *sin_step = 1<<(8 - ld2_length); + *twiddle = windowSlopes[0][1][ld2_length]; + break; + default: + *sin_twiddle = NULL; + *sin_step = 0; + *twiddle = NULL; + break; + } + + FDK_ASSERT(*twiddle != NULL); + + FDK_ASSERT(*sin_step > 0); + +} + +#if !defined(FUNCTION_dct_IV) + +void dct_IV(FIXP_DBL *pDat, + int L, + int *pDat_e) +{ + int sin_step = 0; + int M = L >> 1; + + const FIXP_WTP *twiddle; + const FIXP_STP *sin_twiddle; + + FDK_ASSERT(L >= 4); + + getTables(&twiddle, &sin_twiddle, &sin_step, L); + +#ifdef FUNCTION_dct_IV_func1 + if (M>=4 && (M&3) == 0) { + /* ARM926: 44 cycles for 2 iterations = 22 cycles/iteration */ + dct_IV_func1(M>>2, twiddle, &pDat[0], &pDat[L-1]); + } else +#endif /* FUNCTION_dct_IV_func1 */ + { + FIXP_DBL *RESTRICT pDat_0 = &pDat[0]; + FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2]; + register int i; + + /* 29 cycles on ARM926 */ + for (i = 0; i < M-1; i+=2,pDat_0+=2,pDat_1-=2) + { + register FIXP_DBL accu1,accu2,accu3,accu4; + + accu1 = pDat_1[1]; accu2 = pDat_0[0]; + accu3 = pDat_0[1]; accu4 = pDat_1[0]; + + cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); + cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i+1]); + + pDat_0[0] = accu2; pDat_0[1] = accu1; + pDat_1[0] = accu4; pDat_1[1] = -accu3; + } + if (M&1) + { + register FIXP_DBL accu1,accu2; + + accu1 = pDat_1[1]; accu2 = pDat_0[0]; + + cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); + + pDat_0[0] = accu2; pDat_0[1] = accu1; + } + } + + fft(M, pDat, pDat_e); + +#ifdef FUNCTION_dct_IV_func2 + if (M>=4 && (M&3) == 0) { + /* ARM926: 42 cycles for 2 iterations = 21 cycles/iteration */ + dct_IV_func2(M>>2, sin_twiddle, &pDat[0], &pDat[L], sin_step); + } else +#endif /* FUNCTION_dct_IV_func2 */ + { + FIXP_DBL *RESTRICT pDat_0 = &pDat[0]; + FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2]; + register FIXP_DBL accu1,accu2,accu3,accu4; + int idx, i; + + /* Sin and Cos values are 0.0f and 1.0f */ + accu1 = pDat_1[0]; + accu2 = pDat_1[1]; + + pDat_1[1] = -(pDat_0[1]>>1); + pDat_0[0] = (pDat_0[0]>>1); + + + /* 28 cycles for ARM926 */ + for (idx = sin_step,i=1; i<(M+1)>>1; i++, idx+=sin_step) + { + FIXP_STP twd = sin_twiddle[idx]; + cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd); + pDat_0[1] = accu3; + pDat_1[0] = accu4; + + pDat_0+=2; + pDat_1-=2; + + cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd); + + accu1 = pDat_1[0]; + accu2 = pDat_1[1]; + + pDat_1[1] = -accu3; + pDat_0[0] = accu4; + } + + if ( (M&1) == 0 ) + { + /* Last Sin and Cos value pair are the same */ + accu1 = fMultDiv2(accu1, WTC(0x5a82799a)); + accu2 = fMultDiv2(accu2, WTC(0x5a82799a)); + + pDat_1[0] = accu1 + accu2; + pDat_0[1] = accu1 - accu2; + } + } + + /* Add twiddeling scale. */ + *pDat_e += 2; +} +#endif /* defined (FUNCTION_dct_IV) */ + +#if !defined(FUNCTION_dst_IV) +void dst_IV(FIXP_DBL *pDat, + int L, + int *pDat_e ) +{ + int sin_step = 0; + int M = L >> 1; + + const FIXP_WTP *twiddle; + const FIXP_STP *sin_twiddle; + +#ifdef DSTIV2_ENABLE + if (L == 2) { + const FIXP_STP tab = STCP(0x7641AF3D, 0x30FB9452); + FIXP_DBL tmp1, tmp2; + + cplxMultDiv2(&tmp2, &tmp1, pDat[0], pDat[1], tab); + + pDat[0] = tmp1; + pDat[1] = tmp2; + + *pDat_e += 1; + + return; + } +#else + FDK_ASSERT(L >= 4); +#endif + + getTables(&twiddle, &sin_twiddle, &sin_step, L); + +#ifdef FUNCTION_dst_IV_func1 + if ( (M>=4) && ((M&3) == 0) ) { + dst_IV_func1(M, twiddle, &pDat[0], &pDat[L]); + } else +#endif + { + FIXP_DBL *RESTRICT pDat_0 = &pDat[0]; + FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2]; + + register int i; + + /* 34 cycles on ARM926 */ + for (i = 0; i < M-1; i+=2,pDat_0+=2,pDat_1-=2) + { + register FIXP_DBL accu1,accu2,accu3,accu4; + + accu1 = pDat_1[1]; accu2 = -pDat_0[0]; + accu3 = pDat_0[1]; accu4 = -pDat_1[0]; + + cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); + cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i+1]); + + pDat_0[0] = accu2; pDat_0[1] = accu1; + pDat_1[0] = accu4; pDat_1[1] = -accu3; + } + if (M&1) + { + register FIXP_DBL accu1,accu2; + + accu1 = pDat_1[1]; accu2 = -pDat_0[0]; + + cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); + + pDat_0[0] = accu2; pDat_0[1] = accu1; + } + } + + fft(M, pDat, pDat_e); + +#ifdef FUNCTION_dst_IV_func2 + if ( (M>=4) && ((M&3) == 0) ) { + dst_IV_func2(M>>2, sin_twiddle + sin_step, &pDat[0], &pDat[L - 1], sin_step); + } else +#endif /* FUNCTION_dst_IV_func2 */ + { + FIXP_DBL *RESTRICT pDat_0; + FIXP_DBL *RESTRICT pDat_1; + register FIXP_DBL accu1,accu2,accu3,accu4; + int idx, i; + + pDat_0 = &pDat[0]; + pDat_1 = &pDat[L - 2]; + + /* Sin and Cos values are 0.0f and 1.0f */ + accu1 = pDat_1[0]; + accu2 = pDat_1[1]; + + pDat_1[1] = -(pDat_0[0]>>1); + pDat_0[0] = (pDat_0[1]>>1); + + for (idx = sin_step,i=1; i<(M+1)>>1; i++, idx+=sin_step) + { + FIXP_STP twd = sin_twiddle[idx]; + + cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd); + pDat_1[0] = -accu3; + pDat_0[1] = -accu4; + + pDat_0+=2; + pDat_1-=2; + + cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd); + + accu1 = pDat_1[0]; + accu2 = pDat_1[1]; + + pDat_0[0] = accu3; + pDat_1[1] = -accu4; + } + + if ( (M&1) == 0 ) + { + /* Last Sin and Cos value pair are the same */ + accu1 = fMultDiv2(accu1, WTC(0x5a82799a)); + accu2 = fMultDiv2(accu2, WTC(0x5a82799a)); + + pDat_0[1] = - accu1 - accu2; + pDat_1[0] = accu2 - accu1; + } + } + + /* Add twiddeling scale. */ + *pDat_e += 2; +} +#endif /* !defined(FUNCTION_dst_IV) */ + + -- cgit v1.2.3