From 6cfabd35363c3ef5e3b209b867169a500b3ccc3c Mon Sep 17 00:00:00 2001 From: Fraunhofer IIS FDK Date: Mon, 26 Feb 2018 20:17:00 +0100 Subject: Upgrade to FDKv2 Bug: 71430241 Test: CTS DecoderTest and DecoderTestAacDrc original-Change-Id: Iaa20f749b8a04d553b20247cfe1a8930ebbabe30 Apply clang-format also on header files. original-Change-Id: I14de1ef16bbc79ec0283e745f98356a10efeb2e4 Fixes for MPEG-D DRC original-Change-Id: If1de2d74bbbac84b3f67de3b88b83f6a23b8a15c Catch unsupported tw_mdct at an early stage original-Change-Id: Ied9dd00d754162a0e3ca1ae3e6b854315d818afe Fixing PVC transition frames original-Change-Id: Ib75725abe39252806c32d71176308f2c03547a4e Move qmf bands sanity check original-Change-Id: Iab540c3013c174d9490d2ae100a4576f51d8dbc4 Initialize scaling variable original-Change-Id: I3c4087101b70e998c71c1689b122b0d7762e0f9e Add 16 qmf band configuration to getSlotNrgHQ() original-Change-Id: I49a5d30f703a1b126ff163df9656db2540df21f1 Always apply byte alignment at the end of the AudioMuxElement original-Change-Id: I42d560287506d65d4c3de8bfe3eb9a4ebeb4efc7 Setup SBR element only if no parse error exists original-Change-Id: I1915b73704bc80ab882b9173d6bec59cbd073676 Additional array index check in HCR original-Change-Id: I18cc6e501ea683b5009f1bbee26de8ddd04d8267 Fix fade-in index selection in concealment module original-Change-Id: Ibf802ed6ed8c05e9257e1f3b6d0ac1162e9b81c1 Enable explicit backward compatible parser for AAC_LD original-Change-Id: I27e9c678dcb5d40ed760a6d1e06609563d02482d Skip spatial specific config in explicit backward compatible ASC original-Change-Id: Iff7cc365561319e886090cedf30533f562ea4d6e Update flags description in decoder API original-Change-Id: I9a5b4f8da76bb652f5580cbd3ba9760425c43830 Add QMF domain reset function original-Change-Id: I4f89a8a2c0277d18103380134e4ed86996e9d8d6 DRC upgrade v2.1.0 original-Change-Id: I5731c0540139dab220094cd978ef42099fc45b74 Fix integer overflow in sqrtFixp_lookup() original-Change-Id: I429a6f0d19aa2cc957e0f181066f0ca73968c914 Fix integer overflow in invSqrtNorm2() original-Change-Id: I84de5cbf9fb3adeb611db203fe492fabf4eb6155 Fix integer overflow in GenerateRandomVector() original-Change-Id: I3118a641008bd9484d479e5b0b1ee2b5d7d44d74 Fix integer overflow in adjustTimeSlot_EldGrid() original-Change-Id: I29d503c247c5c8282349b79df940416a512fb9d5 Fix integer overflow in FDKsbrEnc_codeEnvelope() original-Change-Id: I6b34b61ebb9d525b0c651ed08de2befc1f801449 Follow-up on: Fix integer overflow in adjustTimeSlot_EldGrid() original-Change-Id: I6f8f578cc7089e5eb7c7b93e580b72ca35ad689a Fix integer overflow in get_pk_v2() original-Change-Id: I63375bed40d45867f6eeaa72b20b1f33e815938c Fix integer overflow in Syn_filt_zero() original-Change-Id: Ie0c02fdfbe03988f9d3b20d10cd9fe4c002d1279 Fix integer overflow in CFac_CalcFacSignal() original-Change-Id: Id2d767c40066c591b51768e978eb8af3b803f0c5 Fix integer overflow in FDKaacEnc_FDKaacEnc_calcPeNoAH() original-Change-Id: Idcbd0f4a51ae2550ed106aa6f3d678d1f9724841 Fix integer overflow in sbrDecoder_calculateGainVec() original-Change-Id: I7081bcbe29c5cede9821b38d93de07c7add2d507 Fix integer overflow in CLpc_SynthesisLattice() original-Change-Id: I4a95ddc18de150102352d4a1845f06094764c881 Fix integer overflow in Pred_Lt4() original-Change-Id: I4dbd012b2de7d07c3e70a47b92e3bfae8dbc750a Fix integer overflow in FDKsbrEnc_InitSbrFastTransientDetector() original-Change-Id: I788cbec1a4a00f44c2f3a72ad7a4afa219807d04 Fix unsigned integer overflow in FDKaacEnc_WriteBitstream() original-Change-Id: I68fc75166e7d2cd5cd45b18dbe3d8c2a92f1822a Fix unsigned integer overflow in FDK_MetadataEnc_Init() original-Change-Id: Ie8d025f9bcdb2442c704bd196e61065c03c10af4 Fix overflow in pseudo random number generators original-Change-Id: I3e2551ee01356297ca14e3788436ede80bd5513c Fix unsigned integer overflow in sbrDecoder_Parse() original-Change-Id: I3f231b2f437e9c37db4d5b964164686710eee971 Fix unsigned integer overflow in longsub() original-Change-Id: I73c2bc50415cac26f1f5a29e125bbe75f9180a6e Fix unsigned integer overflow in CAacDecoder_DecodeFrame() original-Change-Id: Ifce2db4b1454b46fa5f887e9d383f1cc43b291e4 Fix overflow at CLpdChannelStream_Read() original-Change-Id: Idb9d822ce3a4272e4794b643644f5434e2d4bf3f Fix unsigned integer overflow in Hcr_State_BODY_SIGN_ESC__ESC_WORD() original-Change-Id: I1ccf77c0015684b85534c5eb97162740a870b71c Fix unsigned integer overflow in UsacConfig_Parse() original-Change-Id: Ie6d27f84b6ae7eef092ecbff4447941c77864d9f Fix unsigned integer overflow in aacDecoder_drcParse() original-Change-Id: I713f28e883eea3d70b6fa56a7b8f8c22bcf66ca0 Fix unsigned integer overflow in aacDecoder_drcReadCompression() original-Change-Id: Ia34dfeb88c4705c558bce34314f584965cafcf7a Fix unsigned integer overflow in CDataStreamElement_Read() original-Change-Id: Iae896cc1d11f0a893d21be6aa90bd3e60a2c25f0 Fix unsigned integer overflow in transportDec_AdjustEndOfAccessUnit() original-Change-Id: I64cf29a153ee784bb4a16fdc088baabebc0007dc Fix unsigned integer overflow in transportDec_GetAuBitsRemaining() original-Change-Id: I975b3420faa9c16a041874ba0db82e92035962e4 Fix unsigned integer overflow in extractExtendedData() original-Change-Id: I2a59eb09e2053cfb58dfb75fcecfad6b85a80a8f Fix signed integer overflow in CAacDecoder_ExtPayloadParse() original-Change-Id: I4ad5ca4e3b83b5d964f1c2f8c5e7b17c477c7929 Fix unsigned integer overflow in CAacDecoder_DecodeFrame() original-Change-Id: I29a39df77d45c52a0c9c5c83c1ba81f8d0f25090 Follow-up on: Fix integer overflow in CLpc_SynthesisLattice() original-Change-Id: I8fb194ffc073a3432a380845be71036a272d388f Fix signed integer overflow in _interpolateDrcGain() original-Change-Id: I879ec9ab14005069a7c47faf80e8bc6e03d22e60 Fix unsigned integer overflow in FDKreadBits() original-Change-Id: I1f47a6a8037ff70375aa8844947d5681bb4287ad Fix unsigned integer overflow in FDKbyteAlign() original-Change-Id: Id5f3a11a0c9e50fc6f76ed6c572dbd4e9f2af766 Fix unsigned integer overflow in FDK_get32() original-Change-Id: I9d33b8e97e3d38cbb80629cb859266ca0acdce96 Fix unsigned integer overflow in FDK_pushBack() original-Change-Id: Ic87f899bc8c6acf7a377a8ca7f3ba74c3a1e1c19 Fix unsigned integer overflow in FDK_pushForward() original-Change-Id: I3b754382f6776a34be1602e66694ede8e0b8effc Fix unsigned integer overflow in ReadPsData() original-Change-Id: I25361664ba8139e32bbbef2ca8c106a606ce9c37 Fix signed integer overflow in E_UTIL_residu() original-Change-Id: I8c3abd1f437ee869caa8fb5903ce7d3d641b6aad REVERT: Follow-up on: Integer overflow in CLpc_SynthesisLattice(). original-Change-Id: I3d340099acb0414795c8dfbe6362bc0a8f045f9b Follow-up on: Fix integer overflow in CLpc_SynthesisLattice() original-Change-Id: I4aedb8b3a187064e9f4d985175aa55bb99cc7590 Follow-up on: Fix unsigned integer overflow in aacDecoder_drcParse() original-Change-Id: I2aa2e13916213bf52a67e8b0518e7bf7e57fb37d Fix integer overflow in acelp original-Change-Id: Ie6390c136d84055f8b728aefbe4ebef6e029dc77 Fix unsigned integer overflow in aacDecoder_UpdateBitStreamCounters() original-Change-Id: I391ffd97ddb0b2c184cba76139bfb356a3b4d2e2 Adjust concealment default settings original-Change-Id: I6a95db935a327c47df348030bcceafcb29f54b21 Saturate estimatedStartPos original-Change-Id: I27be2085e0ae83ec9501409f65e003f6bcba1ab6 Negative shift exponent in _interpolateDrcGain() original-Change-Id: I18edb26b26d002aafd5e633d4914960f7a359c29 Negative shift exponent in calculateICC() original-Change-Id: I3dcd2ae98d2eb70ee0d59750863cbb2a6f4f8aba Too large shift exponent in FDK_put() original-Change-Id: Ib7d9aaa434d2d8de4a13b720ca0464b31ca9b671 Too large shift exponent in CalcInvLdData() original-Change-Id: I43e6e78d4cd12daeb1dcd5d82d1798bdc2550262 Member access within null pointer of type SBR_CHANNEL original-Change-Id: Idc5e4ea8997810376d2f36bbdf628923b135b097 Member access within null pointer of type CpePersistentData original-Change-Id: Ib6c91cb0d37882768e5baf63324e429589de0d9d Member access within null pointer FDKaacEnc_psyMain() original-Change-Id: I7729b7f4479970531d9dc823abff63ca52e01997 Member access within null pointer FDKaacEnc_GetPnsParam() original-Change-Id: I9aa3b9f3456ae2e0f7483dbd5b3dde95fc62da39 Member access within null pointer FDKsbrEnc_EnvEncodeFrame() original-Change-Id: I67936f90ea714e90b3e81bc0dd1472cc713eb23a Add HCR sanity check original-Change-Id: I6c1d9732ebcf6af12f50b7641400752f74be39f7 Fix memory issue for HBE edge case with 8:3 SBR original-Change-Id: I11ea58a61e69fbe8bf75034b640baee3011e63e9 Additional SBR parametrization sanity check for ELD original-Change-Id: Ie26026fbfe174c2c7b3691f6218b5ce63e322140 Add MPEG-D DRC channel layout check original-Change-Id: Iea70a74f171b227cce636a9eac4ba662777a2f72 Additional out-of-bounds checks in MPEG-D DRC original-Change-Id: Ife4a8c3452c6fde8a0a09e941154a39a769777d4 Change-Id: Ic63cb2f628720f54fe9b572b0cb528e2599c624e --- libFDK/src/dct.cpp | 638 +++++++++++++++++++++++++++++------------------------ 1 file changed, 347 insertions(+), 291 deletions(-) (limited to 'libFDK/src/dct.cpp') diff --git a/libFDK/src/dct.cpp b/libFDK/src/dct.cpp index 1e5b93e..a451331 100644 --- a/libFDK/src/dct.cpp +++ b/libFDK/src/dct.cpp @@ -1,74 +1,85 @@ - -/* ----------------------------------------------------------------------------------------------------------- +/* ----------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. - All rights reserved. +© Copyright 1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten +Forschung e.V. All rights reserved. 1. INTRODUCTION -The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements -the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio. -This FDK AAC Codec software is intended to be used on a wide variety of Android devices. - -AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual -audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by -independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part -of the MPEG specifications. - -Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer) -may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners -individually for the purpose of encoding or decoding bit streams in products that are compliant with -the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license -these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec -software may already be covered under those patent licenses when it is used for those licensed purposes only. - -Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality, -are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional -applications information and documentation. +The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software +that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding +scheme for digital audio. This FDK AAC Codec software is intended to be used on +a wide variety of Android devices. + +AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient +general perceptual audio codecs. AAC-ELD is considered the best-performing +full-bandwidth communications codec by independent studies and is widely +deployed. AAC has been standardized by ISO and IEC as part of the MPEG +specifications. + +Patent licenses for necessary patent claims for the FDK AAC Codec (including +those of Fraunhofer) may be obtained through Via Licensing +(www.vialicensing.com) or through the respective patent owners individually for +the purpose of encoding or decoding bit streams in products that are compliant +with the ISO/IEC MPEG audio standards. Please note that most manufacturers of +Android devices already license these patent claims through Via Licensing or +directly from the patent owners, and therefore FDK AAC Codec software may +already be covered under those patent licenses when it is used for those +licensed purposes only. + +Commercially-licensed AAC software libraries, including floating-point versions +with enhanced sound quality, are also available from Fraunhofer. Users are +encouraged to check the Fraunhofer website for additional applications +information and documentation. 2. COPYRIGHT LICENSE -Redistribution and use in source and binary forms, with or without modification, are permitted without -payment of copyright license fees provided that you satisfy the following conditions: +Redistribution and use in source and binary forms, with or without modification, +are permitted without payment of copyright license fees provided that you +satisfy the following conditions: -You must retain the complete text of this software license in redistributions of the FDK AAC Codec or -your modifications thereto in source code form. +You must retain the complete text of this software license in redistributions of +the FDK AAC Codec or your modifications thereto in source code form. -You must retain the complete text of this software license in the documentation and/or other materials -provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form. -You must make available free of charge copies of the complete source code of the FDK AAC Codec and your +You must retain the complete text of this software license in the documentation +and/or other materials provided with redistributions of the FDK AAC Codec or +your modifications thereto in binary form. You must make available free of +charge copies of the complete source code of the FDK AAC Codec and your modifications thereto to recipients of copies in binary form. -The name of Fraunhofer may not be used to endorse or promote products derived from this library without -prior written permission. +The name of Fraunhofer may not be used to endorse or promote products derived +from this library without prior written permission. -You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec -software or your modifications thereto. +You may not charge copyright license fees for anyone to use, copy or distribute +the FDK AAC Codec software or your modifications thereto. -Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software -and the date of any change. For modified versions of the FDK AAC Codec, the term -"Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term -"Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android." +Your modified versions of the FDK AAC Codec must carry prominent notices stating +that you changed the software and the date of any change. For modified versions +of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android" +must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK +AAC Codec Library for Android." 3. NO PATENT LICENSE -NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer, -ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with -respect to this software. +NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without +limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE. +Fraunhofer provides no warranty of patent non-infringement with respect to this +software. -You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized -by appropriate patent licenses. +You may use this FDK AAC Codec software or modifications thereto only for +purposes that are authorized by appropriate patent licenses. 4. DISCLAIMER -This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors -"AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties -of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR -CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages, -including but not limited to procurement of substitute goods or services; loss of use, data, or profits, -or business interruption, however caused and on any theory of liability, whether in contract, strict -liability, or tort (including negligence), arising in any way out of the use of this software, even if -advised of the possibility of such damage. +This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright +holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, +including but not limited to the implied warranties of merchantability and +fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, +or consequential damages, including but not limited to procurement of substitute +goods or services; loss of use, data, or profits, or business interruption, +however caused and on any theory of liability, whether in contract, strict +liability, or tort (including negligence), arising in any way out of the use of +this software, even if advised of the possibility of such damage. 5. CONTACT INFORMATION @@ -79,94 +90,163 @@ Am Wolfsmantel 33 www.iis.fraunhofer.de/amm amm-info@iis.fraunhofer.de ------------------------------------------------------------------------------------------------------------ */ +----------------------------------------------------------------------------- */ + +/******************* Library for basic calculation routines ******************** + + Author(s): + + Description: + +*******************************************************************************/ /*! \file dct.cpp - \brief DCT Implementations - Library functions to calculate standard DCTs. This will most likely be replaced by hand-optimized - functions for the specific target processor. + \brief DCT Implementations + Library functions to calculate standard DCTs. This will most likely be + replaced by hand-optimized functions for the specific target processor. - Three different implementations of the dct type II and the dct type III transforms are provided. + Three different implementations of the dct type II and the dct type III + transforms are provided. - By default implementations which are based on a single, standard complex FFT-kernel are used (dctII_f() and dctIII_f()). - These are specifically helpful in cases where optimized FFT libraries are already available. The FFT used in these - implementation is FFT rad2 from FDK_tools. + By default implementations which are based on a single, standard complex + FFT-kernel are used (dctII_f() and dctIII_f()). These are specifically helpful + in cases where optimized FFT libraries are already available. The FFT used in + these implementation is FFT rad2 from FDK_tools. - Of course, one might also use DCT-libraries should they be available. The DCT and DST - type IV implementations are only available in a version based on a complex FFT kernel. + Of course, one might also use DCT-libraries should they be available. The DCT + and DST type IV implementations are only available in a version based on a + complex FFT kernel. */ #include "dct.h" - #include "FDK_tools_rom.h" #include "fft.h" - #if defined(__arm__) #include "arm/dct_arm.cpp" #endif +void dct_getTables(const FIXP_WTP **ptwiddle, const FIXP_STP **sin_twiddle, + int *sin_step, int length) { + const FIXP_WTP *twiddle; + int ld2_length; + + /* Get ld2 of length - 2 + 1 + -2: because first table entry is window of size 4 + +1: because we already include +1 because of ceil(log2(length)) */ + ld2_length = DFRACT_BITS - 1 - fNormz((FIXP_DBL)length) - 1; + + /* Extract sort of "eigenvalue" (the 4 left most bits) of length. */ + switch ((length) >> (ld2_length - 1)) { + case 0x4: /* radix 2 */ + *sin_twiddle = SineTable1024; + *sin_step = 1 << (10 - ld2_length); + twiddle = windowSlopes[0][0][ld2_length - 1]; + break; + case 0x7: /* 10 ms */ + *sin_twiddle = SineTable480; + *sin_step = 1 << (8 - ld2_length); + twiddle = windowSlopes[0][1][ld2_length]; + break; + case 0x6: /* 3/4 of radix 2 */ + *sin_twiddle = SineTable384; + *sin_step = 1 << (8 - ld2_length); + twiddle = windowSlopes[0][2][ld2_length]; + break; + case 0x5: /* 5/16 of radix 2*/ + *sin_twiddle = SineTable80; + *sin_step = 1 << (6 - ld2_length); + twiddle = windowSlopes[0][3][ld2_length]; + break; + default: + *sin_twiddle = NULL; + *sin_step = 0; + twiddle = NULL; + break; + } + + if (ptwiddle != NULL) { + FDK_ASSERT(twiddle != NULL); + *ptwiddle = twiddle; + } + + FDK_ASSERT(*sin_step > 0); +} #if !defined(FUNCTION_dct_III) void dct_III(FIXP_DBL *pDat, /*!< pointer to input/output */ FIXP_DBL *tmp, /*!< pointer to temporal working buffer */ int L, /*!< lenght of transform */ - int *pDat_e - ) -{ - FDK_ASSERT(L == 64 || L == 32); - int i; + int *pDat_e) { + const FIXP_WTP *sin_twiddle; + int i; FIXP_DBL xr, accu1, accu2; - int inc; - int M = L>>1; - int ld_M; - - if (L == 64) ld_M = 5; - else ld_M = 4; + int inc, index; + int M = L >> 1; - /* This loop performs multiplication for index i (i*inc) */ - inc = (64/2) >> ld_M; /* 64/L */ + FDK_ASSERT(L % 4 == 0); + dct_getTables(NULL, &sin_twiddle, &inc, L); + inc >>= 1; FIXP_DBL *pTmp_0 = &tmp[2]; - FIXP_DBL *pTmp_1 = &tmp[(M-1)*2]; - - for(i=1; i>1; i++,pTmp_0+=2,pTmp_1-=2) { - - FIXP_DBL accu3,accu4,accu5,accu6; + FIXP_DBL *pTmp_1 = &tmp[(M - 1) * 2]; - cplxMultDiv2(&accu2, &accu1, pDat[L - i], pDat[i], sin_twiddle_L64[i*inc]); - cplxMultDiv2(&accu4, &accu3, pDat[M+i], pDat[M-i], sin_twiddle_L64[(M-i)*inc]); - accu3 >>= 1; accu4 >>= 1; - - /* This method is better for ARM926, that uses operand2 shifted right by 1 always */ - cplxMultDiv2(&accu6, &accu5, (accu3 - (accu1>>1)), ((accu2>>1) + accu4), sin_twiddle_L64[(4*i)*inc]); - xr = (accu1>>1) + accu3; - pTmp_0[0] = (xr>>1) - accu5; - pTmp_1[0] = (xr>>1) + accu5; - - xr = (accu2>>1) - accu4; - pTmp_0[1] = (xr>>1) - accu6; - pTmp_1[1] = -((xr>>1) + accu6); + index = 4 * inc; + /* This loop performs multiplication for index i (i*inc) */ + for (i = 1; i> 1; i++, pTmp_0 += 2, pTmp_1 -= 2) { + FIXP_DBL accu3, accu4, accu5, accu6; + + cplxMultDiv2(&accu2, &accu1, pDat[L - i], pDat[i], sin_twiddle[i * inc]); + cplxMultDiv2(&accu4, &accu3, pDat[M + i], pDat[M - i], + sin_twiddle[(M - i) * inc]); + accu3 >>= 1; + accu4 >>= 1; + + /* This method is better for ARM926, that uses operand2 shifted right by 1 + * always */ + if (2 * i < (M / 2)) { + cplxMultDiv2(&accu6, &accu5, (accu3 - (accu1 >> 1)), + ((accu2 >> 1) + accu4), sin_twiddle[index]); + } else { + cplxMultDiv2(&accu6, &accu5, ((accu2 >> 1) + accu4), + (accu3 - (accu1 >> 1)), sin_twiddle[index]); + accu6 = -accu6; + } + xr = (accu1 >> 1) + accu3; + pTmp_0[0] = (xr >> 1) - accu5; + pTmp_1[0] = (xr >> 1) + accu5; + + xr = (accu2 >> 1) - accu4; + pTmp_0[1] = (xr >> 1) - accu6; + pTmp_1[1] = -((xr >> 1) + accu6); + + /* Create index helper variables for (4*i)*inc indexed equivalent values of + * short tables. */ + if (2 * i < ((M / 2) - 1)) { + index += 4 * inc; + } else if (2 * i >= ((M / 2))) { + index -= 4 * inc; + } } - xr = fMultDiv2(pDat[M], sin_twiddle_L64[64/2].v.re );/* cos((PI/(2*L))*M); */ - tmp[0] = ((pDat[0]>>1) + xr)>>1; - tmp[1] = ((pDat[0]>>1) - xr)>>1; + xr = fMultDiv2(pDat[M], sin_twiddle[M * inc].v.re); /* cos((PI/(2*L))*M); */ + tmp[0] = ((pDat[0] >> 1) + xr) >> 1; + tmp[1] = ((pDat[0] >> 1) - xr) >> 1; - cplxMultDiv2(&accu2, &accu1, pDat[L - (M/2)], pDat[M/2], sin_twiddle_L64[64/4]); - tmp[M] = accu1>>1; - tmp[M+1] = accu2>>1; + cplxMultDiv2(&accu2, &accu1, pDat[L - (M / 2)], pDat[M / 2], + sin_twiddle[M * inc / 2]); + tmp[M] = accu1 >> 1; + tmp[M + 1] = accu2 >> 1; /* dit_fft expects 1 bit scaled input values */ fft(M, tmp, pDat_e); /* ARM926: 12 cycles per 2-iteration, no overhead code by compiler */ pTmp_1 = &tmp[L]; - for (i = M>>1; i--;) - { + for (i = M >> 1; i--;) { FIXP_DBL tmp1, tmp2, tmp3, tmp4; tmp1 = *tmp++; tmp2 = *tmp++; @@ -180,131 +260,121 @@ void dct_III(FIXP_DBL *pDat, /*!< pointer to input/output */ *pDat_e += 2; } -#endif - -#if !defined(FUNCTION_dct_II) -void dct_II(FIXP_DBL *pDat, /*!< pointer to input/output */ - FIXP_DBL *tmp, /*!< pointer to temporal working buffer */ - int L, /*!< lenght of transform */ - int *pDat_e - ) -{ - FDK_ASSERT(L == 64 || L == 32); - FIXP_DBL accu1,accu2; - FIXP_DBL *pTmp_0, *pTmp_1; - int i; - int inc; - int M = L>>1; - int ld_M; - - FDK_ASSERT(L == 64 || L == 32); - ld_M = 4 + (L >> 6); /* L=64: 5, L=32: 4 */ - - inc = (64/2) >> ld_M; /* L=64: 1, L=32: 2 */ - - FIXP_DBL *pdat = &pDat[0]; - FIXP_DBL accu3, accu4; - pTmp_0 = &tmp[0]; - pTmp_1 = &tmp[L-1]; - for (i = M>>1; i--; ) - { - accu1 = *pdat++; - accu2 = *pdat++; - accu3 = *pdat++; - accu4 = *pdat++; - accu1 >>= 1; - accu2 >>= 1; - accu3 >>= 1; - accu4 >>= 1; - *pTmp_0++ = accu1; - *pTmp_0++ = accu3; - *pTmp_1-- = accu2; - *pTmp_1-- = accu4; - } - - - fft(M, tmp, pDat_e); - - pTmp_0 = &tmp[2]; - pTmp_1 = &tmp[(M-1)*2]; - - for (i=1; i>1; i++,pTmp_0+=2,pTmp_1-=2) { - - FIXP_DBL a1,a2; - FIXP_DBL accu3, accu4; +void dst_III(FIXP_DBL *pDat, /*!< pointer to input/output */ + FIXP_DBL *tmp, /*!< pointer to temporal working buffer */ + int L, /*!< lenght of transform */ + int *pDat_e) { + int L2 = L >> 1; + int i; + FIXP_DBL t; + + /* note: DCT III is reused here, direct DST III implementation might be more + * efficient */ + + /* mirror input */ + for (i = 0; i < L2; i++) { + t = pDat[i]; + pDat[i] = pDat[L - 1 - i]; + pDat[L - 1 - i] = t; + } - a1 = ((pTmp_0[1]>>1) + (pTmp_1[1]>>1)); - a2 = ((pTmp_1[0]>>1) - (pTmp_0[0]>>1)); + /* DCT-III */ + dct_III(pDat, tmp, L, pDat_e); - cplxMultDiv2(&accu1, &accu2, a2, a1, sin_twiddle_L64[(4*i)*inc]); - accu1<<=1; accu2<<=1; + /* flip signs at odd indices */ + for (i = 1; i < L; i += 2) pDat[i] = -pDat[i]; +} - a1 = ((pTmp_0[0]>>1) + (pTmp_1[0]>>1)); - a2 = ((pTmp_0[1]>>1) - (pTmp_1[1]>>1)); +#endif - cplxMultDiv2(&accu3, &accu4, (a1 + accu2), -(accu1 + a2), sin_twiddle_L64[i*inc]); - pDat[L - i] = accu4; - pDat[i] = accu3; +#if !defined(FUNCTION_dct_II) +void dct_II( + FIXP_DBL *pDat, /*!< pointer to input/output */ + FIXP_DBL *tmp, /*!< pointer to temporal working buffer */ + int L, /*!< lenght of transform (has to be a multiple of 8 (or 4 in case + DCT_II_L_MULTIPLE_OF_4_SUPPORT is defined) */ + int *pDat_e) { + const FIXP_WTP *sin_twiddle; + FIXP_DBL accu1, accu2; + FIXP_DBL *pTmp_0, *pTmp_1; + + int i; + int inc, index = 0; + int M = L >> 1; - cplxMultDiv2(&accu3, &accu4, (a1 - accu2), -(accu1 - a2), sin_twiddle_L64[(M-i)*inc]); - pDat[M + i] = accu4; - pDat[M - i] = accu3; + FDK_ASSERT(L % 4 == 0); + dct_getTables(NULL, &sin_twiddle, &inc, L); + inc >>= 1; + { + for (i = 0; i < M; i++) { + tmp[i] = pDat[2 * i] >> 1; /* dit_fft expects 1 bit scaled input values */ + tmp[L - 1 - i] = + pDat[2 * i + 1] >> 1; /* dit_fft expects 1 bit scaled input values */ } + } - cplxMultDiv2(&accu1, &accu2, tmp[M], tmp[M+1], sin_twiddle_L64[(M/2)*inc]); - pDat[L - (M/2)] = accu2; - pDat[M/2] = accu1; + fft(M, tmp, pDat_e); - pDat[0] = (tmp[0]>>1)+(tmp[1]>>1); - pDat[M] = fMult(((tmp[0]>>1)-(tmp[1]>>1)), sin_twiddle_L64[64/2].v.re);/* cos((PI/(2*L))*M); */ + pTmp_0 = &tmp[2]; + pTmp_1 = &tmp[(M - 1) * 2]; - *pDat_e += 2; -} -#endif + index = inc * 4; -static -void getTables(const FIXP_WTP **twiddle, const FIXP_STP **sin_twiddle, int *sin_step, int length) -{ - int ld2_length; + for (i = 1; i> 1; i++, pTmp_0 += 2, pTmp_1 -= 2) { + FIXP_DBL a1, a2; + FIXP_DBL accu3, accu4; - /* Get ld2 of length - 2 + 1 - -2: because first table entry is window of size 4 - +1: because we already include +1 because of ceil(log2(length)) */ - ld2_length = DFRACT_BITS-1-fNormz((FIXP_DBL)length) - 1; + a1 = ((pTmp_0[1] >> 1) + (pTmp_1[1] >> 1)); + a2 = ((pTmp_1[0] >> 1) - (pTmp_0[0] >> 1)); - /* Extract sort of "eigenvalue" (the 4 left most bits) of length. */ - switch ( (length) >> (ld2_length-1) ) { - case 0x4: /* radix 2 */ - *sin_twiddle = SineTable512; - *sin_step = 1<<(9 - ld2_length); - *twiddle = windowSlopes[0][0][ld2_length-1]; - break; - case 0x7: /* 10 ms */ - *sin_twiddle = SineTable480; - *sin_step = 1<<(8 - ld2_length); - *twiddle = windowSlopes[0][1][ld2_length]; - break; - default: - *sin_twiddle = NULL; - *sin_step = 0; - *twiddle = NULL; - break; + if (2 * i < (M / 2)) { + cplxMultDiv2(&accu1, &accu2, a2, a1, sin_twiddle[index]); + } else { + cplxMultDiv2(&accu1, &accu2, a1, a2, sin_twiddle[index]); + accu1 = -accu1; + } + accu1 <<= 1; + accu2 <<= 1; + + a1 = ((pTmp_0[0] >> 1) + (pTmp_1[0] >> 1)); + a2 = ((pTmp_0[1] >> 1) - (pTmp_1[1] >> 1)); + + cplxMultDiv2(&accu3, &accu4, (a1 + accu2), -(accu1 + a2), + sin_twiddle[i * inc]); + pDat[L - i] = accu4; + pDat[i] = accu3; + + cplxMultDiv2(&accu3, &accu4, (a1 - accu2), -(accu1 - a2), + sin_twiddle[(M - i) * inc]); + pDat[M + i] = accu4; + pDat[M - i] = accu3; + + /* Create index helper variables for (4*i)*inc indexed equivalent values of + * short tables. */ + if (2 * i < ((M / 2) - 1)) { + index += 4 * inc; + } else if (2 * i >= ((M / 2))) { + index -= 4 * inc; + } } - FDK_ASSERT(*twiddle != NULL); + cplxMultDiv2(&accu1, &accu2, tmp[M], tmp[M + 1], sin_twiddle[(M / 2) * inc]); + pDat[L - (M / 2)] = accu2; + pDat[M / 2] = accu1; - FDK_ASSERT(*sin_step > 0); + pDat[0] = (tmp[0] >> 1) + (tmp[1] >> 1); + pDat[M] = fMult(((tmp[0] >> 1) - (tmp[1] >> 1)), + sin_twiddle[M * inc].v.re); /* cos((PI/(2*L))*M); */ + *pDat_e += 2; } +#endif #if !defined(FUNCTION_dct_IV) -void dct_IV(FIXP_DBL *pDat, - int L, - int *pDat_e) -{ +void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) { int sin_step = 0; int M = L >> 1; @@ -313,12 +383,14 @@ void dct_IV(FIXP_DBL *pDat, FDK_ASSERT(L >= 4); - getTables(&twiddle, &sin_twiddle, &sin_step, L); + FDK_ASSERT(L >= 4); + + dct_getTables(&twiddle, &sin_twiddle, &sin_step, L); #ifdef FUNCTION_dct_IV_func1 - if (M>=4 && (M&3) == 0) { - /* ARM926: 44 cycles for 2 iterations = 22 cycles/iteration */ - dct_IV_func1(M>>2, twiddle, &pDat[0], &pDat[L-1]); + if (M >= 4 && (M & 3) == 0) { + /* ARM926: 44 cycles for 2 iterations = 22 cycles/iteration */ + dct_IV_func1(M >> 2, twiddle, &pDat[0], &pDat[L - 1]); } else #endif /* FUNCTION_dct_IV_func1 */ { @@ -327,63 +399,65 @@ void dct_IV(FIXP_DBL *pDat, int i; /* 29 cycles on ARM926 */ - for (i = 0; i < M-1; i+=2,pDat_0+=2,pDat_1-=2) - { - FIXP_DBL accu1,accu2,accu3,accu4; + for (i = 0; i < M - 1; i += 2, pDat_0 += 2, pDat_1 -= 2) { + FIXP_DBL accu1, accu2, accu3, accu4; - accu1 = pDat_1[1]; accu2 = pDat_0[0]; - accu3 = pDat_0[1]; accu4 = pDat_1[0]; + accu1 = pDat_1[1]; + accu2 = pDat_0[0]; + accu3 = pDat_0[1]; + accu4 = pDat_1[0]; cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); - cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i+1]); + cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]); - pDat_0[0] = accu2; pDat_0[1] = accu1; - pDat_1[0] = accu4; pDat_1[1] = -accu3; + pDat_0[0] = accu2; + pDat_0[1] = accu1; + pDat_1[0] = accu4; + pDat_1[1] = -accu3; } - if (M&1) - { - FIXP_DBL accu1,accu2; + if (M & 1) { + FIXP_DBL accu1, accu2; - accu1 = pDat_1[1]; accu2 = pDat_0[0]; + accu1 = pDat_1[1]; + accu2 = pDat_0[0]; cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); - pDat_0[0] = accu2; pDat_0[1] = accu1; + pDat_0[0] = accu2; + pDat_0[1] = accu1; } } fft(M, pDat, pDat_e); #ifdef FUNCTION_dct_IV_func2 - if (M>=4 && (M&3) == 0) { - /* ARM926: 42 cycles for 2 iterations = 21 cycles/iteration */ - dct_IV_func2(M>>2, sin_twiddle, &pDat[0], &pDat[L], sin_step); + if (M >= 4 && (M & 3) == 0) { + /* ARM926: 42 cycles for 2 iterations = 21 cycles/iteration */ + dct_IV_func2(M >> 2, sin_twiddle, &pDat[0], &pDat[L], sin_step); } else #endif /* FUNCTION_dct_IV_func2 */ { FIXP_DBL *RESTRICT pDat_0 = &pDat[0]; FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2]; - FIXP_DBL accu1,accu2,accu3,accu4; + FIXP_DBL accu1, accu2, accu3, accu4; int idx, i; /* Sin and Cos values are 0.0f and 1.0f */ accu1 = pDat_1[0]; accu2 = pDat_1[1]; - pDat_1[1] = -(pDat_0[1]>>1); - pDat_0[0] = (pDat_0[0]>>1); - + pDat_1[1] = -(pDat_0[1] >> 1); + pDat_0[0] = (pDat_0[0] >> 1); /* 28 cycles for ARM926 */ - for (idx = sin_step,i=1; i<(M+1)>>1; i++, idx+=sin_step) - { + for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) { FIXP_STP twd = sin_twiddle[idx]; cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd); - pDat_0[1] = accu3; - pDat_1[0] = accu4; + pDat_0[1] = accu3; + pDat_1[0] = accu4; - pDat_0+=2; - pDat_1-=2; + pDat_0 += 2; + pDat_1 -= 2; cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd); @@ -391,11 +465,10 @@ void dct_IV(FIXP_DBL *pDat, accu2 = pDat_1[1]; pDat_1[1] = -accu3; - pDat_0[0] = accu4; + pDat_0[0] = accu4; } - if ( (M&1) == 0 ) - { + if ((M & 1) == 0) { /* Last Sin and Cos value pair are the same */ accu1 = fMultDiv2(accu1, WTC(0x5a82799a)); accu2 = fMultDiv2(accu2, WTC(0x5a82799a)); @@ -411,84 +484,71 @@ void dct_IV(FIXP_DBL *pDat, #endif /* defined (FUNCTION_dct_IV) */ #if !defined(FUNCTION_dst_IV) -void dst_IV(FIXP_DBL *pDat, - int L, - int *pDat_e ) -{ +void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) { int sin_step = 0; int M = L >> 1; const FIXP_WTP *twiddle; const FIXP_STP *sin_twiddle; -#ifdef DSTIV2_ENABLE - if (L == 2) { - const FIXP_STP tab = STCP(0x7641AF3D, 0x30FB9452); - FIXP_DBL tmp1, tmp2; - - cplxMultDiv2(&tmp2, &tmp1, pDat[0], pDat[1], tab); - - pDat[0] = tmp1; - pDat[1] = tmp2; - - *pDat_e += 1; + FDK_ASSERT(L >= 4); - return; - } -#else FDK_ASSERT(L >= 4); -#endif - getTables(&twiddle, &sin_twiddle, &sin_step, L); + dct_getTables(&twiddle, &sin_twiddle, &sin_step, L); #ifdef FUNCTION_dst_IV_func1 - if ( (M>=4) && ((M&3) == 0) ) { + if ((M >= 4) && ((M & 3) == 0)) { dst_IV_func1(M, twiddle, &pDat[0], &pDat[L]); } else #endif { FIXP_DBL *RESTRICT pDat_0 = &pDat[0]; FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2]; - int i; /* 34 cycles on ARM926 */ - for (i = 0; i < M-1; i+=2,pDat_0+=2,pDat_1-=2) - { - FIXP_DBL accu1,accu2,accu3,accu4; + for (i = 0; i < M - 1; i += 2, pDat_0 += 2, pDat_1 -= 2) { + FIXP_DBL accu1, accu2, accu3, accu4; - accu1 = pDat_1[1]; accu2 = -pDat_0[0]; - accu3 = pDat_0[1]; accu4 = -pDat_1[0]; + accu1 = pDat_1[1]; + accu2 = -pDat_0[0]; + accu3 = pDat_0[1]; + accu4 = -pDat_1[0]; cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); - cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i+1]); + cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]); - pDat_0[0] = accu2; pDat_0[1] = accu1; - pDat_1[0] = accu4; pDat_1[1] = -accu3; + pDat_0[0] = accu2; + pDat_0[1] = accu1; + pDat_1[0] = accu4; + pDat_1[1] = -accu3; } - if (M&1) - { - FIXP_DBL accu1,accu2; + if (M & 1) { + FIXP_DBL accu1, accu2; - accu1 = pDat_1[1]; accu2 = -pDat_0[0]; + accu1 = pDat_1[1]; + accu2 = -pDat_0[0]; cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]); - pDat_0[0] = accu2; pDat_0[1] = accu1; + pDat_0[0] = accu2; + pDat_0[1] = accu1; } } fft(M, pDat, pDat_e); #ifdef FUNCTION_dst_IV_func2 - if ( (M>=4) && ((M&3) == 0) ) { - dst_IV_func2(M>>2, sin_twiddle + sin_step, &pDat[0], &pDat[L - 1], sin_step); + if ((M >= 4) && ((M & 3) == 0)) { + dst_IV_func2(M >> 2, sin_twiddle + sin_step, &pDat[0], &pDat[L - 1], + sin_step); } else #endif /* FUNCTION_dst_IV_func2 */ { FIXP_DBL *RESTRICT pDat_0; FIXP_DBL *RESTRICT pDat_1; - FIXP_DBL accu1,accu2,accu3,accu4; + FIXP_DBL accu1, accu2, accu3, accu4; int idx, i; pDat_0 = &pDat[0]; @@ -498,37 +558,35 @@ void dst_IV(FIXP_DBL *pDat, accu1 = pDat_1[0]; accu2 = pDat_1[1]; - pDat_1[1] = -(pDat_0[0]>>1); - pDat_0[0] = (pDat_0[1]>>1); + pDat_1[1] = -(pDat_0[0] >> 1); + pDat_0[0] = (pDat_0[1] >> 1); - for (idx = sin_step,i=1; i<(M+1)>>1; i++, idx+=sin_step) - { + for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) { FIXP_STP twd = sin_twiddle[idx]; cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd); - pDat_1[0] = -accu3; - pDat_0[1] = -accu4; + pDat_1[0] = -accu3; + pDat_0[1] = -accu4; - pDat_0+=2; - pDat_1-=2; + pDat_0 += 2; + pDat_1 -= 2; cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd); accu1 = pDat_1[0]; accu2 = pDat_1[1]; - pDat_0[0] = accu3; + pDat_0[0] = accu3; pDat_1[1] = -accu4; } - if ( (M&1) == 0 ) - { + if ((M & 1) == 0) { /* Last Sin and Cos value pair are the same */ accu1 = fMultDiv2(accu1, WTC(0x5a82799a)); accu2 = fMultDiv2(accu2, WTC(0x5a82799a)); - pDat_0[1] = - accu1 - accu2; - pDat_1[0] = accu2 - accu1; + pDat_0[1] = -accu1 - accu2; + pDat_1[0] = accu2 - accu1; } } @@ -536,5 +594,3 @@ void dst_IV(FIXP_DBL *pDat, *pDat_e += 2; } #endif /* !defined(FUNCTION_dst_IV) */ - - -- cgit v1.2.3