9 files changed, 88 insertions, 654 deletions
diff --git a/libAACdec/src/aacdecoder_lib.cpp b/libAACdec/src/aacdecoder_lib.cpp
index d98cf5a..b528ef1 100644
--- a/libAACdec/src/aacdecoder_lib.cpp
+++ b/libAACdec/src/aacdecoder_lib.cpp
@@ -368,6 +368,23 @@ static INT aacDecoder_CtrlCFGChangeCallback(
   return errTp;
 }
 
+static INT aacDecoder_SbrCallback(
+    void *handle, HANDLE_FDK_BITSTREAM hBs, const INT sampleRateIn,
+    const INT sampleRateOut, const INT samplesPerFrame,
+    const AUDIO_OBJECT_TYPE coreCodec, const MP4_ELEMENT_ID elementID,
+    const INT elementIndex, const UCHAR harmonicSBR,
+    const UCHAR stereoConfigIndex, const UCHAR configMode, UCHAR *configChanged,
+    const INT downscaleFactor) {
+  HANDLE_SBRDECODER self = (HANDLE_SBRDECODER)handle;
+
+  INT errTp = sbrDecoder_Header(self, hBs, sampleRateIn, sampleRateOut,
+                                samplesPerFrame, coreCodec, elementID,
+                                elementIndex, harmonicSBR, stereoConfigIndex,
+                                configMode, configChanged, downscaleFactor);
+
+  return errTp;
+}
+
 static INT aacDecoder_SscCallback(void *handle, HANDLE_FDK_BITSTREAM hBs,
                                   const AUDIO_OBJECT_TYPE coreCodec,
                                   const INT samplingRate,
@@ -959,7 +976,7 @@ LINKSPEC_CPP HANDLE_AACDECODER aacDecoder_Open(TRANSPORT_TYPE transportFmt,
     goto bail;
   }
   aacDec->qmfModeUser = NOT_DEFINED;
-  transportDec_RegisterSbrCallback(aacDec->hInput, (cbSbr_t)sbrDecoder_Header,
+  transportDec_RegisterSbrCallback(aacDec->hInput, aacDecoder_SbrCallback,
                                    (void *)aacDec->hSbrDecoder);
 
   if (mpegSurroundDecoder_Open(
@@ -1865,7 +1882,7 @@ aacDecoder_DecodeFrame(HANDLE_AACDECODER self, INT_PCM *pTimeData_extern,
 
     } /* USAC DASH IPF flushing possible end */
     if (accessUnit < numPrerollAU) {
-      FDKpushBack(hBsAu, auStartAnchor - FDKgetValidBits(hBsAu));
+      FDKpushBack(hBsAu, auStartAnchor - (INT)FDKgetValidBits(hBsAu));
     } else {
       if ((self->buildUpStatus == AACDEC_RSV60_BUILD_UP_ON) ||
           (self->buildUpStatus == AACDEC_RSV60_BUILD_UP_ON_IN_BAND) ||
diff --git a/libAACdec/src/block.cpp b/libAACdec/src/block.cpp
index 7d2a4b9..b3d09a6 100644
--- a/libAACdec/src/block.cpp
+++ b/libAACdec/src/block.cpp
@@ -127,9 +127,11 @@ amm-info@iis.fraunhofer.de
   The function reads the escape sequence from the bitstream,
   if the absolute value of the quantized coefficient has the
   value 16.
-  A limitation is implemented to maximal 31 bits to prevent endless loops.
-  If it strikes, MAX_QUANTIZED_VALUE + 1 is returned, independent of the sign of
-  parameter q.
+  A limitation is implemented to maximal 21 bits according to
+  ISO/IEC 14496-3:2009(E) 4.6.3.3.
+  This limits the escape prefix to a maximum of eight 1's.
+  If more than eight 1's are read, MAX_QUANTIZED_VALUE + 1 is
+  returned, independent of the sign of parameter q.
 
   \return  quantized coefficient
 */
@@ -139,11 +141,11 @@ LONG CBlock_GetEscape(HANDLE_FDK_BITSTREAM bs, /*!< pointer to bitstream */
   if (fAbs(q) != 16) return (q);
 
   LONG i, off;
-  for (i = 4; i < 32; i++) {
+  for (i = 4; i < 13; i++) {
     if (FDKreadBit(bs) == 0) break;
   }
 
-  if (i == 32) return (MAX_QUANTIZED_VALUE + 1);
+  if (i == 13) return (MAX_QUANTIZED_VALUE + 1);
 
   off = FDKreadBits(bs, i);
   i = off + (1 << i);
diff --git a/libAACdec/src/usacdec_lpd.cpp b/libAACdec/src/usacdec_lpd.cpp
index 22069a6..f53e39f 100644
--- a/libAACdec/src/usacdec_lpd.cpp
+++ b/libAACdec/src/usacdec_lpd.cpp
@@ -1221,8 +1221,7 @@ AAC_DECODER_ERROR CLpdChannelStream_Read(
       (INT)(samplingRate * PIT_MIN_12k8 + (FSCALE_DENOM / 2)) / FSCALE_DENOM -
       (INT)PIT_MIN_12k8;
 
-  if (pSamplingRateInfo->samplingRate >
-      FAC_FSCALE_MAX /* maximum allowed core sampling frequency */) {
+  if ((samplingRate < 6000) || (samplingRate > 24000)) {
     error = AAC_DEC_PARSE_ERROR;
     goto bail;
   }
diff --git a/libDRCdec/src/drcDec_selectionProcess.cpp b/libDRCdec/src/drcDec_selectionProcess.cpp
index 54b731d..9228197 100644
--- a/libDRCdec/src/drcDec_selectionProcess.cpp
+++ b/libDRCdec/src/drcDec_selectionProcess.cpp
@@ -1006,15 +1006,23 @@ static DRCDEC_SELECTION_PROCESS_RETURN _preSelectionRequirement7(
   return DRCDEC_SELECTION_PROCESS_NO_ERROR;
 }
 
-static void _setSelectionDataInfo(DRCDEC_SELECTION_DATA* pData,
-                                  FIXP_DBL loudness,
-                                  FIXP_DBL loudnessNormalizationGainDb,
-                                  FIXP_DBL loudnessNormalizationGainDbMax,
-                                  FIXP_DBL loudnessDeviationMax,
-                                  FIXP_DBL signalPeakLevel,
-                                  FIXP_DBL outputPeakLevelMax,
-                                  int applyAdjustment) {
-  FIXP_DBL adjustment = 0;
+static void _setSelectionDataInfo(
+    DRCDEC_SELECTION_DATA* pData, FIXP_DBL loudness, /* e = 7 */
+    FIXP_DBL loudnessNormalizationGainDb,            /* e = 7 */
+    FIXP_DBL loudnessNormalizationGainDbMax,         /* e = 7 */
+    FIXP_DBL loudnessDeviationMax,                   /* e = 7 */
+    FIXP_DBL signalPeakLevel,                        /* e = 7 */
+    FIXP_DBL outputPeakLevelMax,                     /* e = 7 */
+    int applyAdjustment) {
+  FIXP_DBL adjustment = 0; /* e = 8 */
+
+  /* use e = 8 for all function parameters to prevent overflow */
+  loudness >>= 1;
+  loudnessNormalizationGainDb >>= 1;
+  loudnessNormalizationGainDbMax >>= 1;
+  loudnessDeviationMax >>= 1;
+  signalPeakLevel >>= 1;
+  outputPeakLevelMax >>= 1;
 
   if (applyAdjustment) {
     adjustment =
@@ -1028,6 +1036,14 @@ static void _setSelectionDataInfo(DRCDEC_SELECTION_DATA* pData,
   pData->outputLoudness = loudness + pData->loudnessNormalizationGainDbAdjusted;
   pData->outputPeakLevel =
       signalPeakLevel + pData->loudnessNormalizationGainDbAdjusted;
+
+  /* shift back to e = 7 using saturation */
+  pData->loudnessNormalizationGainDbAdjusted = SATURATE_LEFT_SHIFT(
+      pData->loudnessNormalizationGainDbAdjusted, 1, DFRACT_BITS);
+  pData->outputLoudness =
+      SATURATE_LEFT_SHIFT(pData->outputLoudness, 1, DFRACT_BITS);
+  pData->outputPeakLevel =
+      SATURATE_LEFT_SHIFT(pData->outputPeakLevel, 1, DFRACT_BITS);
 }
 
 static int _targetLoudnessInRange(
diff --git a/libFDK/src/arm/dct_arm.cpp b/libFDK/src/arm/dct_arm.cpp
deleted file mode 100644
index dd66109..0000000
--- a/libFDK/src/arm/dct_arm.cpp
+++ /dev/null
@@ -1,572 +0,0 @@
-/* -----------------------------------------------------------------------------
-Software License for The Fraunhofer FDK AAC Codec Library for Android
-
-© Copyright  1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
-Forschung e.V. All rights reserved.
-
- 1.    INTRODUCTION
-The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
-that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
-scheme for digital audio. This FDK AAC Codec software is intended to be used on
-a wide variety of Android devices.
-
-AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
-general perceptual audio codecs. AAC-ELD is considered the best-performing
-full-bandwidth communications codec by independent studies and is widely
-deployed. AAC has been standardized by ISO and IEC as part of the MPEG
-specifications.
-
-Patent licenses for necessary patent claims for the FDK AAC Codec (including
-those of Fraunhofer) may be obtained through Via Licensing
-(www.vialicensing.com) or through the respective patent owners individually for
-the purpose of encoding or decoding bit streams in products that are compliant
-with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
-Android devices already license these patent claims through Via Licensing or
-directly from the patent owners, and therefore FDK AAC Codec software may
-already be covered under those patent licenses when it is used for those
-licensed purposes only.
-
-Commercially-licensed AAC software libraries, including floating-point versions
-with enhanced sound quality, are also available from Fraunhofer. Users are
-encouraged to check the Fraunhofer website for additional applications
-information and documentation.
-
-2.    COPYRIGHT LICENSE
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted without payment of copyright license fees provided that you
-satisfy the following conditions:
-
-You must retain the complete text of this software license in redistributions of
-the FDK AAC Codec or your modifications thereto in source code form.
-
-You must retain the complete text of this software license in the documentation
-and/or other materials provided with redistributions of the FDK AAC Codec or
-your modifications thereto in binary form. You must make available free of
-charge copies of the complete source code of the FDK AAC Codec and your
-modifications thereto to recipients of copies in binary form.
-
-The name of Fraunhofer may not be used to endorse or promote products derived
-from this library without prior written permission.
-
-You may not charge copyright license fees for anyone to use, copy or distribute
-the FDK AAC Codec software or your modifications thereto.
-
-Your modified versions of the FDK AAC Codec must carry prominent notices stating
-that you changed the software and the date of any change. For modified versions
-of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
-must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
-AAC Codec Library for Android."
-
-3.    NO PATENT LICENSE
-
-NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
-limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
-Fraunhofer provides no warranty of patent non-infringement with respect to this
-software.
-
-You may use this FDK AAC Codec software or modifications thereto only for
-purposes that are authorized by appropriate patent licenses.
-
-4.    DISCLAIMER
-
-This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
-holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
-including but not limited to the implied warranties of merchantability and
-fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
-CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
-or consequential damages, including but not limited to procurement of substitute
-goods or services; loss of use, data, or profits, or business interruption,
-however caused and on any theory of liability, whether in contract, strict
-liability, or tort (including negligence), arising in any way out of the use of
-this software, even if advised of the possibility of such damage.
-
-5.    CONTACT INFORMATION
-
-Fraunhofer Institute for Integrated Circuits IIS
-Attention: Audio and Multimedia Departments - FDK AAC LL
-Am Wolfsmantel 33
-91058 Erlangen, Germany
-
-www.iis.fraunhofer.de/amm
-amm-info@iis.fraunhofer.de
------------------------------------------------------------------------------ */
-
-/******************* Library for basic calculation routines ********************
-
-   Author(s):
-
-   Description:
-
-*******************************************************************************/
-
-#ifdef FUNCTION_dct_IV_func1
-
-/*
-   Note: This assembler routine is here, because the ARM926 compiler does
-         not encode the inline assembler with optimal speed.
-         With this version, we save 2 cycles per loop iteration.
-*/
-
-__asm void dct_IV_func1(int i, const FIXP_SPK *twiddle,
-                        FIXP_DBL *RESTRICT pDat_0, FIXP_DBL *RESTRICT pDat_1) {
-  /* Register map:
-     r0   i
-     r1   twiddle
-     r2   pDat_0
-     r3   pDat_1
-     r4   accu1
-     r5   accu2
-     r6   accu3
-     r7   accu4
-     r8   val_tw
-     r9   accuX
-  */
-  PUSH{r4 - r9}
-
-  /* 44 cycles for 2 iterations = 22 cycles/iteration */
-  dct_IV_loop1_start
-      /*  First iteration */
-      LDR r8,
-      [r1],
-# 4 // val_tw = *twiddle++;
-      LDR r5,
-      [ r2, #0 ]  // accu2 = pDat_0[0]
-      LDR r4,
-      [ r3, #0 ]  // accu1 = pDat_1[0]
-
-      SMULWT r9,
-      r5,
-      r8  // accuX = accu2*val_tw.l
-          SMULWB r5,
-      r5,
-      r8  // accu2 = accu2*val_tw.h
-          RSB r9,
-      r9,
-# 0 // accuX =-accu2*val_tw.l
-      SMLAWT r5, r4, r8,
-      r5  // accu2 = accu2*val_tw.h + accu1*val_tw.l
-          SMLAWB r4,
-      r4, r8,
-      r9  // accu1 = accu1*val_tw.h - accu2*val_tw.l
-
-          LDR r8,
-      [r1],
-# 4 // val_tw = *twiddle++;
-      LDR r7,
-      [ r3, # - 4 ]  // accu4 = pDat_1[-1]
-      LDR r6,
-      [ r2, #4 ]  // accu3 = pDat_0[1]
-
-      SMULWB r9,
-      r7,
-      r8  // accuX = accu4*val_tw.h
-          SMULWT r7,
-      r7,
-      r8  // accu4 = accu4*val_tw.l
-          RSB r9,
-      r9,
-# 0 // accuX =-accu4*val_tw.h
-      SMLAWB r7, r6, r8,
-      r7  // accu4 = accu4*val_tw.l+accu3*val_tw.h
-          SMLAWT r6,
-      r6, r8,
-      r9  // accu3 = accu3*val_tw.l-accu4*val_tw.h
-
-          STR r5,
-      [r2],
-# 4 // *pDat_0++ = accu2
-      STR r4, [r2],
-# 4 // *pDat_0++ = accu1
-      STR r6, [r3],
-#- 4 // *pDat_1-- = accu3
-      STR r7, [r3],
-#- 4 // *pDat_1-- = accu4
-
-      /*  Second iteration */
-      LDR r8, [r1],
-# 4 // val_tw = *twiddle++;
-      LDR r5,
-      [ r2, #0 ]  // accu2 = pDat_0[0]
-      LDR r4,
-      [ r3, #0 ]  // accu1 = pDat_1[0]
-
-      SMULWT r9,
-      r5,
-      r8  // accuX = accu2*val_tw.l
-          SMULWB r5,
-      r5,
-      r8  // accu2 = accu2*val_tw.h
-          RSB r9,
-      r9,
-# 0 // accuX =-accu2*val_tw.l
-      SMLAWT r5, r4, r8,
-      r5  // accu2 = accu2*val_tw.h + accu1*val_tw.l
-          SMLAWB r4,
-      r4, r8,
-      r9  // accu1 = accu1*val_tw.h - accu2*val_tw.l
-
-          LDR r8,
-      [r1],
-# 4 // val_tw = *twiddle++;
-      LDR r7,
-      [ r3, # - 4 ]  // accu4 = pDat_1[-1]
-      LDR r6,
-      [ r2, #4 ]  // accu3 = pDat_0[1]
-
-      SMULWB r9,
-      r7,
-      r8  // accuX = accu4*val_tw.h
-          SMULWT r7,
-      r7,
-      r8  // accu4 = accu4*val_tw.l
-          RSB r9,
-      r9,
-# 0 // accuX =-accu4*val_tw.h
-      SMLAWB r7, r6, r8,
-      r7  // accu4 = accu4*val_tw.l+accu3*val_tw.h
-          SMLAWT r6,
-      r6, r8,
-      r9  // accu3 = accu3*val_tw.l-accu4*val_tw.h
-
-          STR r5,
-      [r2],
-# 4 // *pDat_0++ = accu2
-      STR r4, [r2],
-# 4 // *pDat_0++ = accu1
-      STR r6, [r3],
-#- 4 // *pDat_1-- = accu3
-      STR r7, [r3],
-#- 4 // *pDat_1-- = accu4
-
-      SUBS r0, r0,
-# 1 BNE dct_IV_loop1_start
-
-      POP { r4 - r9 }
-
-  BX lr
-}
-
-#endif /* FUNCTION_dct_IV_func1 */
-
-#ifdef FUNCTION_dct_IV_func2
-
-/* __attribute__((noinline)) */
-static inline void dct_IV_func2(int i, const FIXP_SPK *twiddle,
-                                FIXP_DBL *pDat_0, FIXP_DBL *pDat_1, int inc) {
-  FIXP_DBL accu1, accu2, accu3, accu4, accuX;
-  LONG val_tw;
-
-  accu1 = pDat_1[-2];
-  accu2 = pDat_1[-1];
-
-  *--pDat_1 = -(pDat_0[1] >> 1);
-  *pDat_0++ = (pDat_0[0] >> 1);
-
-  twiddle += inc;
-
-  __asm {
-    LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
-    B       dct_IV_loop2_2nd_part
-
-        /* 42 cycles for 2 iterations = 21 cycles/iteration */
-dct_IV_loop2:
-    SMULWT  accuX, accu2, val_tw
-    SMULWB  accu2, accu2, val_tw
-    RSB     accuX, accuX, #0
-    SMLAWB  accuX, accu1, val_tw, accuX
-    SMLAWT  accu2, accu1, val_tw, accu2
-    STR     accuX, [pDat_0], #4
-    STR     accu2, [pDat_1, #-4] !
-
-    LDR     accu4, [pDat_0, #4]
-    LDR     accu3, [pDat_0]
-    SMULWB  accuX, accu4, val_tw
-    SMULWT  accu4, accu4, val_tw
-    RSB     accuX, accuX, #0
-    SMLAWT  accuX, accu3, val_tw, accuX
-    SMLAWB  accu4, accu3, val_tw, accu4
-
-    LDR     accu1, [pDat_1, #-8]
-    LDR     accu2, [pDat_1, #-4]
-
-    LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
-
-    STR     accuX, [pDat_1, #-4] !
-    STR     accu4, [pDat_0], #4
-
-dct_IV_loop2_2nd_part:
-    SMULWT  accuX, accu2, val_tw
-    SMULWB  accu2, accu2, val_tw
-    RSB     accuX, accuX, #0
-    SMLAWB  accuX, accu1, val_tw, accuX
-    SMLAWT  accu2, accu1, val_tw, accu2
-    STR     accuX, [pDat_0], #4
-    STR     accu2, [pDat_1, #-4] !
-
-    LDR     accu4, [pDat_0, #4]
-    LDR     accu3, [pDat_0]
-    SMULWB  accuX, accu4, val_tw
-    SMULWT  accu4, accu4, val_tw
-    RSB     accuX, accuX, #0
-    SMLAWT  accuX, accu3, val_tw, accuX
-    SMLAWB  accu4, accu3, val_tw, accu4
-
-    LDR     accu1, [pDat_1, #-8]
-    LDR     accu2, [pDat_1, #-4]
-
-    STR     accuX, [pDat_1, #-4] !
-    STR     accu4, [pDat_0], #4
-
-    LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
-
-    SUBS    i, i, #1
-    BNE     dct_IV_loop2
-  }
-
-  /* Last Sin and Cos value pair are the same */
-  accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
-  accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
-
-  *--pDat_1 = accu1 + accu2;
-  *pDat_0++ = accu1 - accu2;
-}
-#endif /* FUNCTION_dct_IV_func2 */
-
-#ifdef FUNCTION_dst_IV_func1
-
-__asm void dst_IV_func1(int i, const FIXP_SPK *twiddle, FIXP_DBL *pDat_0,
-                        FIXP_DBL *pDat_1) {
-  /* Register map:
-     r0   i
-     r1   twiddle
-     r2   pDat_0
-     r3   pDat_1
-     r4   accu1
-     r5   accu2
-     r6   accu3
-     r7   accu4
-     r8   val_tw
-     r9   accuX
-  */
-  PUSH{r4 - r9}
-
-  dst_IV_loop1 LDR r8,
-      [r1],
-# 4 // val_tw = *twiddle++
-      LDR r5,
-      [r2]  // accu2 = pDat_0[0]
-      LDR r6,
-      [ r2, #4 ]  // accu3 = pDat_0[1]
-      RSB r5,
-      r5,
-# 0 // accu2 = -accu2
-      SMULWT r9, r5,
-      r8  // accuX = (-accu2)*val_tw.l
-          LDR r4,
-      [ r3, # - 4 ]  // accu1 = pDat_1[-1]
-      RSB r9,
-      r9,
-# 0 // accuX = -(-accu2)*val_tw.l
-      SMLAWB r9, r4, r8,
-      r9  // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
-          SMULWT r4,
-      r4,
-      r8  // accu1 = accu1*val_tw.l
-          LDR r7,
-      [ r3, # - 8 ]  // accu4 = pDat_1[-2]
-      SMLAWB r5,
-      r5, r8,
-      r4  // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
-          LDR r8,
-      [r1],
-# 4 // val_tw = *twiddle++
-      STR r5, [r2],
-# 4 // *pDat_0++ = accu2
-      STR r9, [r2],
-# 4 // *pDat_0++ = accu1 (accuX)
-      RSB r7, r7,
-# 0 // accu4 = -accu4
-      SMULWB r5, r7,
-      r8  // accu2 = (-accu4)*val_tw.h
-          SMULWB r4,
-      r6,
-      r8  // accu1 = (-accu4)*val_tw.l
-          RSB r5,
-      r5,
-# 0 // accu2 = -(-accu4)*val_tw.h
-      SMLAWT r6, r6, r8,
-      r5  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
-          SMLAWT r7,
-      r7, r8,
-      r4  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
-          STR r6,
-      [ r3, # - 4 ] !  // *--pDat_1 = accu3
-      STR r7,
-      [ r3, # - 4 ] !  // *--pDat_1 = accu4
-
-      LDR r8,
-      [r1],
-# 4 // val_tw = *twiddle++
-      LDR r5,
-      [r2]  // accu2 = pDat_0[0]
-      LDR r6,
-      [ r2, #4 ]  // accu3 = pDat_0[1]
-      RSB r5,
-      r5,
-# 0 // accu2 = -accu2
-      SMULWT r9, r5,
-      r8  // accuX = (-accu2)*val_tw.l
-          LDR r4,
-      [ r3, # - 4 ]  // accu1 = pDat_1[-1]
-      RSB r9,
-      r9,
-# 0 // accuX = -(-accu2)*val_tw.l
-      SMLAWB r9, r4, r8,
-      r9  // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
-          SMULWT r4,
-      r4,
-      r8  // accu1 = accu1*val_tw.l
-          LDR r7,
-      [ r3, # - 8 ]  // accu4 = pDat_1[-2]
-      SMLAWB r5,
-      r5, r8,
-      r4  // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
-          LDR r8,
-      [r1],
-# 4 // val_tw = *twiddle++
-      STR r5, [r2],
-# 4 // *pDat_0++ = accu2
-      STR r9, [r2],
-# 4 // *pDat_0++ = accu1 (accuX)
-      RSB r7, r7,
-# 0 // accu4 = -accu4
-      SMULWB r5, r7,
-      r8  // accu2 = (-accu4)*val_tw.h
-          SMULWB r4,
-      r6,
-      r8  // accu1 = (-accu4)*val_tw.l
-          RSB r5,
-      r5,
-# 0 // accu2 = -(-accu4)*val_tw.h
-      SMLAWT r6, r6, r8,
-      r5  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
-          SMLAWT r7,
-      r7, r8,
-      r4  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
-          STR r6,
-      [ r3, # - 4 ] !  // *--pDat_1 = accu3
-      STR r7,
-      [ r3, # - 4 ] !  // *--pDat_1 = accu4
-
-      SUBS r0,
-      r0,
-# 4 // i-= 4
-      BNE dst_IV_loop1
-
-          POP{r4 - r9} BX lr
-}
-#endif /* FUNCTION_dst_IV_func1 */
-
-#ifdef FUNCTION_dst_IV_func2
-
-/* __attribute__((noinline)) */
-static inline void dst_IV_func2(int i, const FIXP_SPK *twiddle,
-                                FIXP_DBL *RESTRICT pDat_0,
-                                FIXP_DBL *RESTRICT pDat_1, int inc) {
-  FIXP_DBL accu1, accu2, accu3, accu4;
-  LONG val_tw;
-
-  accu4 = pDat_0[0];
-  accu3 = pDat_0[1];
-  accu4 >>= 1;
-  accu3 >>= 1;
-  accu4 = -accu4;
-
-  accu1 = pDat_1[-1];
-  accu2 = pDat_1[0];
-
-  *pDat_0++ = accu3;
-  *pDat_1-- = accu4;
-
-  __asm {
-    B       dst_IV_loop2_2nd_part
-
-        /* 50 cycles for 2 iterations = 25 cycles/iteration */
-
-dst_IV_loop2:
-
-    LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
-
-    RSB     accu2, accu2, #0  // accu2 = -accu2
-    RSB     accu1, accu1, #0  // accu1 = -accu1
-    SMULWT  accu3, accu2, val_tw  // accu3 = (-accu2)*val_tw.l
-    SMULWT  accu4, accu1, val_tw  // accu4 = (-accu1)*val_tw.l
-    RSB     accu3, accu3, #0  // accu3 = -accu2*val_tw.l
-    SMLAWB  accu1, accu1, val_tw, accu3  // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
-    SMLAWB  accu2, accu2, val_tw, accu4  // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
-    STR     accu1, [pDat_1], #-4  // *pDat_1-- = accu1
-    STR     accu2, [pDat_0], #4  // *pDat_0++ = accu2
-
-    LDR     accu4, [pDat_0]  // accu4 = pDat_0[0]
-    LDR     accu3, [pDat_0, #4]  // accu3 = pDat_0[1]
-
-    RSB     accu4, accu4, #0  // accu4 = -accu4
-    RSB     accu3, accu3, #0  // accu3 = -accu3
-
-    SMULWB  accu1, accu3, val_tw  // accu1 = (-accu3)*val_tw.h
-    SMULWT  accu2, accu3, val_tw  // accu2 = (-accu3)*val_tw.l
-    RSB     accu1, accu1, #0  // accu1 = -(-accu3)*val_tw.h
-    SMLAWT  accu3, accu4, val_tw, accu1  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
-    SMLAWB  accu4, accu4, val_tw, accu2  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
-
-    LDR     accu1, [pDat_1, #-4]  // accu1 = pDat_1[-1]
-    LDR     accu2, [pDat_1]  // accu2 = pDat_1[0]
-
-    STR     accu3, [pDat_0], #4  // *pDat_0++ = accu3
-    STR     accu4, [pDat_1], #-4  // *pDat_1-- = accu4
-
-dst_IV_loop2_2nd_part:
-
-    LDR     val_tw, [twiddle], inc, LSL #2  // val_tw = *twiddle; twiddle += inc
-
-    RSB     accu2, accu2, #0  // accu2 = -accu2
-    RSB     accu1, accu1, #0  // accu1 = -accu1
-    SMULWT  accu3, accu2, val_tw  // accu3 = (-accu2)*val_tw.l
-    SMULWT  accu4, accu1, val_tw  // accu4 = (-accu1)*val_tw.l
-    RSB     accu3, accu3, #0  // accu3 = -accu2*val_tw.l
-    SMLAWB  accu1, accu1, val_tw, accu3  // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
-    SMLAWB  accu2, accu2, val_tw, accu4  // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
-    STR     accu1, [pDat_1], #-4  // *pDat_1-- = accu1
-    STR     accu2, [pDat_0], #4  // *pDat_0++ = accu2
-
-    LDR     accu4, [pDat_0]  // accu4 = pDat_0[0]
-    LDR     accu3, [pDat_0, #4]  // accu3 = pDat_0[1]
-
-    RSB     accu4, accu4, #0  // accu4 = -accu4
-    RSB     accu3, accu3, #0  // accu3 = -accu3
-
-    SMULWB  accu1, accu3, val_tw  // accu1 = (-accu3)*val_tw.h
-    SMULWT  accu2, accu3, val_tw  // accu2 = (-accu3)*val_tw.l
-    RSB     accu1, accu1, #0  // accu1 = -(-accu3)*val_tw.h
-    SMLAWT  accu3, accu4, val_tw, accu1  // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
-    SMLAWB  accu4, accu4, val_tw, accu2  // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
-
-    LDR     accu1, [pDat_1, #-4]  // accu1 = pDat_1[-1]
-    LDR     accu2, [pDat_1]  // accu2 = pDat_1[0]
-
-    STR     accu3, [pDat_0], #4  // *pDat_0++ = accu3
-    STR     accu4, [pDat_1], #-4  // *pDat_1-- = accu4
-
-    SUBS    i, i, #1
-    BNE     dst_IV_loop2
-  }
-
-  /* Last Sin and Cos value pair are the same */
-  accu1 = fMultDiv2(-accu1, WTC(0x5a82799a));
-  accu2 = fMultDiv2(-accu2, WTC(0x5a82799a));
-
-  *pDat_0 = accu1 + accu2;
-  *pDat_1 = accu1 - accu2;
-}
-#endif /* FUNCTION_dst_IV_func2 */
diff --git a/libFDK/src/dct.cpp b/libFDK/src/dct.cpp
index a451331..776493e 100644
--- a/libFDK/src/dct.cpp
+++ b/libFDK/src/dct.cpp
@@ -124,10 +124,6 @@ amm-info@iis.fraunhofer.de
 #include "FDK_tools_rom.h"
 #include "fft.h"
 
-#if defined(__arm__)
-#include "arm/dct_arm.cpp"
-#endif
-
 void dct_getTables(const FIXP_WTP **ptwiddle, const FIXP_STP **sin_twiddle,
                    int *sin_step, int length) {
   const FIXP_WTP *twiddle;
@@ -387,12 +383,6 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
 
   dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);
 
-#ifdef FUNCTION_dct_IV_func1
-  if (M >= 4 && (M & 3) == 0) {
-    /* ARM926: 44 cycles for 2 iterations = 22 cycles/iteration */
-    dct_IV_func1(M >> 2, twiddle, &pDat[0], &pDat[L - 1]);
-  } else
-#endif /* FUNCTION_dct_IV_func1 */
   {
     FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
     FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
@@ -410,10 +400,10 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
       cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);
 
-      pDat_0[0] = accu2;
-      pDat_0[1] = accu1;
-      pDat_1[0] = accu4;
-      pDat_1[1] = -accu3;
+      pDat_0[0] = accu2 >> 1;
+      pDat_0[1] = accu1 >> 1;
+      pDat_1[0] = accu4 >> 1;
+      pDat_1[1] = -(accu3 >> 1);
     }
     if (M & 1) {
       FIXP_DBL accu1, accu2;
@@ -423,19 +413,13 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
 
       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
 
-      pDat_0[0] = accu2;
-      pDat_0[1] = accu1;
+      pDat_0[0] = accu2 >> 1;
+      pDat_0[1] = accu1 >> 1;
     }
   }
 
   fft(M, pDat, pDat_e);
 
-#ifdef FUNCTION_dct_IV_func2
-  if (M >= 4 && (M & 3) == 0) {
-    /* ARM926: 42 cycles for 2 iterations = 21 cycles/iteration */
-    dct_IV_func2(M >> 2, sin_twiddle, &pDat[0], &pDat[L], sin_step);
-  } else
-#endif /* FUNCTION_dct_IV_func2 */
   {
     FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
     FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
@@ -446,20 +430,19 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
     accu1 = pDat_1[0];
     accu2 = pDat_1[1];
 
-    pDat_1[1] = -(pDat_0[1] >> 1);
-    pDat_0[0] = (pDat_0[0] >> 1);
+    pDat_1[1] = -pDat_0[1];
 
     /* 28 cycles for ARM926 */
     for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
       FIXP_STP twd = sin_twiddle[idx];
-      cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
+      cplxMult(&accu3, &accu4, accu1, accu2, twd);
       pDat_0[1] = accu3;
       pDat_1[0] = accu4;
 
       pDat_0 += 2;
       pDat_1 -= 2;
 
-      cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
+      cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
 
       accu1 = pDat_1[0];
       accu2 = pDat_1[1];
@@ -470,8 +453,8 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
 
     if ((M & 1) == 0) {
       /* Last Sin and Cos value pair are the same */
-      accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
-      accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
+      accu1 = fMult(accu1, WTC(0x5a82799a));
+      accu2 = fMult(accu2, WTC(0x5a82799a));
 
       pDat_1[0] = accu1 + accu2;
       pDat_0[1] = accu1 - accu2;
@@ -497,11 +480,6 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
 
   dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);
 
-#ifdef FUNCTION_dst_IV_func1
-  if ((M >= 4) && ((M & 3) == 0)) {
-    dst_IV_func1(M, twiddle, &pDat[0], &pDat[L]);
-  } else
-#endif
   {
     FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
     FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
@@ -519,10 +497,10 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
       cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);
 
-      pDat_0[0] = accu2;
-      pDat_0[1] = accu1;
-      pDat_1[0] = accu4;
-      pDat_1[1] = -accu3;
+      pDat_0[0] = accu2 >> 1;
+      pDat_0[1] = accu1 >> 1;
+      pDat_1[0] = accu4 >> 1;
+      pDat_1[1] = -(accu3 >> 1);
     }
     if (M & 1) {
       FIXP_DBL accu1, accu2;
@@ -532,19 +510,13 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
 
       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
 
-      pDat_0[0] = accu2;
-      pDat_0[1] = accu1;
+      pDat_0[0] = accu2 >> 1;
+      pDat_0[1] = accu1 >> 1;
     }
   }
 
   fft(M, pDat, pDat_e);
 
-#ifdef FUNCTION_dst_IV_func2
-  if ((M >= 4) && ((M & 3) == 0)) {
-    dst_IV_func2(M >> 2, sin_twiddle + sin_step, &pDat[0], &pDat[L - 1],
-                 sin_step);
-  } else
-#endif /* FUNCTION_dst_IV_func2 */
   {
     FIXP_DBL *RESTRICT pDat_0;
     FIXP_DBL *RESTRICT pDat_1;
@@ -558,20 +530,20 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
     accu1 = pDat_1[0];
     accu2 = pDat_1[1];
 
-    pDat_1[1] = -(pDat_0[0] >> 1);
-    pDat_0[0] = (pDat_0[1] >> 1);
+    pDat_1[1] = -pDat_0[0];
+    pDat_0[0] = pDat_0[1];
 
     for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
       FIXP_STP twd = sin_twiddle[idx];
 
-      cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
+      cplxMult(&accu3, &accu4, accu1, accu2, twd);
       pDat_1[0] = -accu3;
       pDat_0[1] = -accu4;
 
       pDat_0 += 2;
       pDat_1 -= 2;
 
-      cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
+      cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
 
       accu1 = pDat_1[0];
       accu2 = pDat_1[1];
@@ -582,8 +554,8 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
 
     if ((M & 1) == 0) {
       /* Last Sin and Cos value pair are the same */
-      accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
-      accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
+      accu1 = fMult(accu1, WTC(0x5a82799a));
+      accu2 = fMult(accu2, WTC(0x5a82799a));
 
       pDat_0[1] = -accu1 - accu2;
       pDat_1[0] = accu2 - accu1;
diff --git a/libMpegTPDec/src/tpdec_lib.cpp b/libMpegTPDec/src/tpdec_lib.cpp
index 1d8b7b3..2a40187 100644
--- a/libMpegTPDec/src/tpdec_lib.cpp
+++ b/libMpegTPDec/src/tpdec_lib.cpp
@@ -736,9 +736,9 @@ static TRANSPORTDEC_ERROR transportDec_AdjustEndOfAccessUnit(
             hTp->parser.latm.m_audioMuxLengthBytes > 0) {
           int loasOffset;
 
-          loasOffset = (hTp->parser.latm.m_audioMuxLengthBytes * 8 +
-                        FDKgetValidBits(hBs)) -
-                       hTp->globalFramePos;
+          loasOffset = ((INT)hTp->parser.latm.m_audioMuxLengthBytes * 8 +
+                        (INT)FDKgetValidBits(hBs)) -
+                       (INT)hTp->globalFramePos;
           if (loasOffset != 0) {
             FDKpushBiDirectional(hBs, loasOffset);
             /* For ELD and other payloads there is an unknown amount of padding,
@@ -871,7 +871,7 @@ static TRANSPORTDEC_ERROR transportDec_readHeader(
   int fConfigFound = (pfConfigFound != NULL) ? *pfConfigFound : 0;
   int startPos;
 
-  startPos = FDKgetValidBits(hBs);
+  startPos = (INT)FDKgetValidBits(hBs);
 
   switch (hTp->transportFmt) {
     case TT_MP4_ADTS:
@@ -941,7 +941,7 @@ static TRANSPORTDEC_ERROR transportDec_readHeader(
           fTraverseMoreFrames = 0;
         }
         syncLayerFrameBits = (hTp->parser.adts.bs.frame_length << 3) -
-                             ((INT)startPos - (INT)FDKgetValidBits(hBs)) -
+                             (startPos - (INT)FDKgetValidBits(hBs)) -
                              syncLength;
         if (syncLayerFrameBits <= 0) {
           err = TRANSPORTDEC_SYNC_ERROR;
@@ -952,7 +952,7 @@ static TRANSPORTDEC_ERROR transportDec_readHeader(
       break;
     case TT_MP4_LOAS:
       if (hTp->numberOfRawDataBlocks <= 0) {
-        syncLayerFrameBits = FDKreadBits(hBs, 13);
+        syncLayerFrameBits = (INT)FDKreadBits(hBs, 13);
         hTp->parser.latm.m_audioMuxLengthBytes = syncLayerFrameBits;
         syncLayerFrameBits <<= 3;
       }
@@ -974,7 +974,7 @@ static TRANSPORTDEC_ERROR transportDec_readHeader(
           hTp->numberOfRawDataBlocks =
               CLatmDemux_GetNrOfSubFrames(&hTp->parser.latm);
           if (hTp->transportFmt == TT_MP4_LOAS) {
-            syncLayerFrameBits -= startPos - FDKgetValidBits(hBs) - (13);
+            syncLayerFrameBits -= startPos - (INT)FDKgetValidBits(hBs) - (13);
           }
         }
       } else {
diff --git a/libSACdec/src/sac_bitdec.cpp b/libSACdec/src/sac_bitdec.cpp
index 37e0cf2..45fb17a 100644
--- a/libSACdec/src/sac_bitdec.cpp
+++ b/libSACdec/src/sac_bitdec.cpp
@@ -291,13 +291,13 @@ SACDEC_ERROR SpatialDecParseSpecificConfigHeader(
   if (sacHeaderLen == 127) {
     sacHeaderLen += FDKreadBits(bitstream, 16);
   }
-  numFillBits = FDKgetValidBits(bitstream);
+  numFillBits = (INT)FDKgetValidBits(bitstream);
 
   err = SpatialDecParseSpecificConfig(bitstream, pSpatialSpecificConfig,
                                       sacHeaderLen, coreCodec);
 
   numFillBits -=
-      FDKgetValidBits(bitstream); /* the number of read bits (tmpBits) */
+      (INT)FDKgetValidBits(bitstream); /* the number of read bits (tmpBits) */
   numFillBits = (8 * sacHeaderLen) - numFillBits;
   if (numFillBits < 0) {
     /* Parsing went wrong */
diff --git a/libSACdec/src/sac_dec_lib.cpp b/libSACdec/src/sac_dec_lib.cpp
index ebf9bee..5ae89d1 100644
--- a/libSACdec/src/sac_dec_lib.cpp
+++ b/libSACdec/src/sac_dec_lib.cpp
@@ -1232,7 +1232,7 @@ int mpegSurroundDecoder_Parse(CMpegSurroundDecoder *pMpegSurroundDecoder,
 
   FDK_ASSERT(pMpegSurroundDecoder->pSpatialDec);
 
-  mpsBsBits = FDKgetValidBits(hBs);
+  mpsBsBits = (INT)FDKgetValidBits(hBs);
 
   sscParse = &pMpegSurroundDecoder
                   ->spatialSpecificConfig[pMpegSurroundDecoder->bsFrameParse];
@@ -1308,14 +1308,14 @@ int mpegSurroundDecoder_Parse(CMpegSurroundDecoder *pMpegSurroundDecoder,
                   pMpegSurroundDecoder->spatialSpecificConfigBackup;
 
               /* Parse spatial specific config */
-              bitsRead = FDKgetValidBits(hMpsBsData);
+              bitsRead = (INT)FDKgetValidBits(hMpsBsData);
 
               err = SpatialDecParseSpecificConfigHeader(
                   hMpsBsData,
                   &pMpegSurroundDecoder->spatialSpecificConfigBackup, coreCodec,
                   pMpegSurroundDecoder->upmixType);
 
-              bitsRead = (bitsRead - FDKgetValidBits(hMpsBsData));
+              bitsRead = (bitsRead - (INT)FDKgetValidBits(hMpsBsData));
               parseResult = ((err == MPS_OK) ? bitsRead : -bitsRead);
 
               if (parseResult < 0) {
@@ -1429,7 +1429,7 @@ int mpegSurroundDecoder_Parse(CMpegSurroundDecoder *pMpegSurroundDecoder,
 
 bail:
 
-  *pMpsDataBits -= (mpsBsBits - FDKgetValidBits(hBs));
+  *pMpsDataBits -= (mpsBsBits - (INT)FDKgetValidBits(hBs));
 
   return err;
 }