aboutsummaryrefslogtreecommitdiffstats
path: root/libFDK
diff options
context:
space:
mode:
Diffstat (limited to 'libFDK')
-rw-r--r--libFDK/include/FDK_bitbuffer.h6
-rw-r--r--libFDK/include/FDK_bitstream.h46
-rw-r--r--libFDK/src/FDK_bitbuffer.cpp29
-rw-r--r--libFDK/src/arm/dct_arm.cpp572
-rw-r--r--libFDK/src/dct.cpp74
5 files changed, 23 insertions, 704 deletions
diff --git a/libFDK/include/FDK_bitbuffer.h b/libFDK/include/FDK_bitbuffer.h
index ed0b2f6..19a24b3 100644
--- a/libFDK/include/FDK_bitbuffer.h
+++ b/libFDK/include/FDK_bitbuffer.h
@@ -113,7 +113,6 @@ typedef struct {
UINT ValidBits;
UINT ReadOffset;
UINT WriteOffset;
- UINT BitCnt;
UINT BitNdx;
UCHAR *Buffer;
@@ -159,15 +158,10 @@ void FDK_pushBack(HANDLE_FDK_BITBUF hBitBuffer, const UINT numberOfBits,
void FDK_pushForward(HANDLE_FDK_BITBUF hBitBuffer, const UINT numberOfBits,
UCHAR config);
-void FDK_byteAlign(HANDLE_FDK_BITBUF hBitBuffer, UCHAR config);
-
UINT FDK_getValidBits(HANDLE_FDK_BITBUF hBitBuffer);
INT FDK_getFreeBits(HANDLE_FDK_BITBUF hBitBuffer);
-void FDK_setBitCnt(HANDLE_FDK_BITBUF hBitBuffer, const UINT value);
-INT FDK_getBitCnt(HANDLE_FDK_BITBUF hBitBuffer);
-
void FDK_Feed(HANDLE_FDK_BITBUF hBitBuffer, const UCHAR inputBuffer[],
const UINT bufferSize, UINT *bytesValid);
diff --git a/libFDK/include/FDK_bitstream.h b/libFDK/include/FDK_bitstream.h
index 49eeeaf..f799026 100644
--- a/libFDK/include/FDK_bitstream.h
+++ b/libFDK/include/FDK_bitstream.h
@@ -481,21 +481,6 @@ FDK_INLINE void FDKsyncCacheBwd(HANDLE_FDK_BITSTREAM hBitStream) {
}
/**
- * \brief Byte Alignment Function.
- * This function performs the byte_alignment() syntactic function on the
- * input stream, i.e. some bits will be discarded/padded so that the next bits
- * to be read/written will be aligned on a byte boundary with respect to
- * the bit position 0.
- *
- * \param hBitStream HANDLE_FDK_BITSTREAM handle
- * \return void
- */
-FDK_INLINE void FDKbyteAlign(HANDLE_FDK_BITSTREAM hBitStream) {
- FDKsyncCache(hBitStream);
- FDK_byteAlign(&hBitStream->hBitBuf, (UCHAR)hBitStream->ConfigCache);
-}
-
-/**
* \brief Byte Alignment Function with anchor
* This function performs the byte_alignment() syntactic function on the
* input stream, i.e. some bits will be discarded so that the next bits to be
@@ -604,37 +589,6 @@ FDK_INLINE INT FDKgetFreeBits(HANDLE_FDK_BITSTREAM hBitStream) {
}
/**
- * \brief reset bitcounter in bitBuffer to zero.
- * \param hBitStream HANDLE_FDK_BITSTREAM handle
- * \return void
- */
-FDK_INLINE void FDKresetBitCnt(HANDLE_FDK_BITSTREAM hBitStream) {
- FDKsyncCache(hBitStream);
- FDK_setBitCnt(&hBitStream->hBitBuf, 0);
-}
-
-/**
- * \brief set bitcoutner in bitBuffer to given value.
- * \param hBitStream HANDLE_FDK_BITSTREAM handle
- * \param value new value to be assigned to the bit counter
- * \return void
- */
-FDK_INLINE void FDKsetBitCnt(HANDLE_FDK_BITSTREAM hBitStream, UINT value) {
- FDKsyncCache(hBitStream);
- FDK_setBitCnt(&hBitStream->hBitBuf, value);
-}
-
-/**
- * \brief get bitcounter state from bitBuffer.
- * \param hBitStream HANDLE_FDK_BITSTREAM handle
- * \return current bit counter value
- */
-FDK_INLINE INT FDKgetBitCnt(HANDLE_FDK_BITSTREAM hBitStream) {
- FDKsyncCache(hBitStream);
- return FDK_getBitCnt(&hBitStream->hBitBuf);
-}
-
-/**
* \brief Fill the BitBuffer with a number of input bytes from external source.
* The bytesValid variable returns the number of ramaining valid bytes in
* extern inputBuffer.
diff --git a/libFDK/src/FDK_bitbuffer.cpp b/libFDK/src/FDK_bitbuffer.cpp
index a990c58..98905ea 100644
--- a/libFDK/src/FDK_bitbuffer.cpp
+++ b/libFDK/src/FDK_bitbuffer.cpp
@@ -128,7 +128,6 @@ void FDK_InitBitBuffer(HANDLE_FDK_BITBUF hBitBuf, UCHAR *pBuffer, UINT bufSize,
hBitBuf->ValidBits = validBits;
hBitBuf->ReadOffset = 0;
hBitBuf->WriteOffset = 0;
- hBitBuf->BitCnt = 0;
hBitBuf->BitNdx = 0;
hBitBuf->Buffer = pBuffer;
@@ -151,7 +150,6 @@ void FDK_ResetBitBuffer(HANDLE_FDK_BITBUF hBitBuf) {
hBitBuf->ValidBits = 0;
hBitBuf->ReadOffset = 0;
hBitBuf->WriteOffset = 0;
- hBitBuf->BitCnt = 0;
hBitBuf->BitNdx = 0;
}
@@ -161,7 +159,6 @@ INT FDK_get(HANDLE_FDK_BITBUF hBitBuf, const UINT numberOfBits) {
UINT bitOffset = hBitBuf->BitNdx & 0x07;
hBitBuf->BitNdx = (hBitBuf->BitNdx + numberOfBits) & (hBitBuf->bufBits - 1);
- hBitBuf->BitCnt += numberOfBits;
hBitBuf->ValidBits -= numberOfBits;
UINT byteMask = hBitBuf->bufSize - 1;
@@ -184,7 +181,6 @@ INT FDK_get(HANDLE_FDK_BITBUF hBitBuf, const UINT numberOfBits) {
INT FDK_get32(HANDLE_FDK_BITBUF hBitBuf) {
UINT BitNdx = hBitBuf->BitNdx + 32;
hBitBuf->BitNdx = BitNdx & (hBitBuf->bufBits - 1);
- hBitBuf->BitCnt += 32;
hBitBuf->ValidBits = (UINT)((INT)hBitBuf->ValidBits - (INT)32);
UINT byteOffset = (BitNdx - 1) >> 3;
@@ -223,7 +219,6 @@ INT FDK_getBwd(HANDLE_FDK_BITBUF hBitBuf, const UINT numberOfBits) {
int i;
hBitBuf->BitNdx = (hBitBuf->BitNdx - numberOfBits) & (hBitBuf->bufBits - 1);
- hBitBuf->BitCnt -= numberOfBits;
hBitBuf->ValidBits += numberOfBits;
UINT tx = hBitBuf->Buffer[(byteOffset - 3) & byteMask] << 24 |
@@ -256,7 +251,6 @@ void FDK_put(HANDLE_FDK_BITBUF hBitBuf, UINT value, const UINT numberOfBits) {
UINT bitOffset = hBitBuf->BitNdx & 0x7;
hBitBuf->BitNdx = (hBitBuf->BitNdx + numberOfBits) & (hBitBuf->bufBits - 1);
- hBitBuf->BitCnt += numberOfBits;
hBitBuf->ValidBits += numberOfBits;
UINT byteMask = hBitBuf->bufSize - 1;
@@ -307,7 +301,6 @@ void FDK_putBwd(HANDLE_FDK_BITBUF hBitBuf, UINT value,
int i;
hBitBuf->BitNdx = (hBitBuf->BitNdx - numberOfBits) & (hBitBuf->bufBits - 1);
- hBitBuf->BitCnt -= numberOfBits;
hBitBuf->ValidBits -= numberOfBits;
/* in place turn around */
@@ -344,7 +337,6 @@ void FDK_putBwd(HANDLE_FDK_BITBUF hBitBuf, UINT value,
#ifndef FUNCTION_FDK_pushBack
void FDK_pushBack(HANDLE_FDK_BITBUF hBitBuf, const UINT numberOfBits,
UCHAR config) {
- hBitBuf->BitCnt = (UINT)((INT)hBitBuf->BitCnt - (INT)numberOfBits);
hBitBuf->ValidBits =
(config == 0) ? (UINT)((INT)hBitBuf->ValidBits + (INT)numberOfBits)
: ((UINT)((INT)hBitBuf->ValidBits - (INT)numberOfBits));
@@ -355,7 +347,6 @@ void FDK_pushBack(HANDLE_FDK_BITBUF hBitBuf, const UINT numberOfBits,
void FDK_pushForward(HANDLE_FDK_BITBUF hBitBuf, const UINT numberOfBits,
UCHAR config) {
- hBitBuf->BitCnt = (UINT)((INT)hBitBuf->BitCnt + (INT)numberOfBits);
hBitBuf->ValidBits =
(config == 0) ? ((UINT)((INT)hBitBuf->ValidBits - (INT)numberOfBits))
: (UINT)((INT)hBitBuf->ValidBits + (INT)numberOfBits);
@@ -363,19 +354,6 @@ void FDK_pushForward(HANDLE_FDK_BITBUF hBitBuf, const UINT numberOfBits,
(UINT)((INT)hBitBuf->BitNdx + (INT)numberOfBits) & (hBitBuf->bufBits - 1);
}
-void FDK_byteAlign(HANDLE_FDK_BITBUF hBitBuf, UCHAR config) {
- INT alignment = hBitBuf->BitCnt & 0x07;
-
- if (alignment) {
- if (config == 0)
- FDK_pushForward(hBitBuf, 8 - alignment, config); /* BS_READER */
- else
- FDK_put(hBitBuf, 0, 8 - alignment); /* BS_WRITER */
- }
-
- hBitBuf->BitCnt = 0;
-}
-
#ifndef FUNCTION_FDK_getValidBits
UINT FDK_getValidBits(HANDLE_FDK_BITBUF hBitBuf) { return hBitBuf->ValidBits; }
#endif /* #ifndef FUNCTION_FDK_getValidBits */
@@ -384,12 +362,6 @@ INT FDK_getFreeBits(HANDLE_FDK_BITBUF hBitBuf) {
return (hBitBuf->bufBits - hBitBuf->ValidBits);
}
-void FDK_setBitCnt(HANDLE_FDK_BITBUF hBitBuf, const UINT value) {
- hBitBuf->BitCnt = value;
-}
-
-INT FDK_getBitCnt(HANDLE_FDK_BITBUF hBitBuf) { return hBitBuf->BitCnt; }
-
void FDK_Feed(HANDLE_FDK_BITBUF hBitBuf, const UCHAR *RESTRICT inputBuffer,
const UINT bufferSize, UINT *bytesValid) {
inputBuffer = &inputBuffer[bufferSize - *bytesValid];
@@ -438,7 +410,6 @@ void CopyAlignedBlock(HANDLE_FDK_BITBUF h_BitBufSrc, UCHAR *RESTRICT dstBuffer,
h_BitBufSrc->BitNdx =
(h_BitBufSrc->BitNdx + bToRead) & (h_BitBufSrc->bufBits - 1);
- h_BitBufSrc->BitCnt += bToRead;
h_BitBufSrc->ValidBits -= bToRead;
}
diff --git a/libFDK/src/arm/dct_arm.cpp b/libFDK/src/arm/dct_arm.cpp
deleted file mode 100644
index dd66109..0000000
--- a/libFDK/src/arm/dct_arm.cpp
+++ /dev/null
@@ -1,572 +0,0 @@
-/* -----------------------------------------------------------------------------
-Software License for The Fraunhofer FDK AAC Codec Library for Android
-
-© Copyright 1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
-Forschung e.V. All rights reserved.
-
- 1. INTRODUCTION
-The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
-that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
-scheme for digital audio. This FDK AAC Codec software is intended to be used on
-a wide variety of Android devices.
-
-AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
-general perceptual audio codecs. AAC-ELD is considered the best-performing
-full-bandwidth communications codec by independent studies and is widely
-deployed. AAC has been standardized by ISO and IEC as part of the MPEG
-specifications.
-
-Patent licenses for necessary patent claims for the FDK AAC Codec (including
-those of Fraunhofer) may be obtained through Via Licensing
-(www.vialicensing.com) or through the respective patent owners individually for
-the purpose of encoding or decoding bit streams in products that are compliant
-with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
-Android devices already license these patent claims through Via Licensing or
-directly from the patent owners, and therefore FDK AAC Codec software may
-already be covered under those patent licenses when it is used for those
-licensed purposes only.
-
-Commercially-licensed AAC software libraries, including floating-point versions
-with enhanced sound quality, are also available from Fraunhofer. Users are
-encouraged to check the Fraunhofer website for additional applications
-information and documentation.
-
-2. COPYRIGHT LICENSE
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted without payment of copyright license fees provided that you
-satisfy the following conditions:
-
-You must retain the complete text of this software license in redistributions of
-the FDK AAC Codec or your modifications thereto in source code form.
-
-You must retain the complete text of this software license in the documentation
-and/or other materials provided with redistributions of the FDK AAC Codec or
-your modifications thereto in binary form. You must make available free of
-charge copies of the complete source code of the FDK AAC Codec and your
-modifications thereto to recipients of copies in binary form.
-
-The name of Fraunhofer may not be used to endorse or promote products derived
-from this library without prior written permission.
-
-You may not charge copyright license fees for anyone to use, copy or distribute
-the FDK AAC Codec software or your modifications thereto.
-
-Your modified versions of the FDK AAC Codec must carry prominent notices stating
-that you changed the software and the date of any change. For modified versions
-of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
-must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
-AAC Codec Library for Android."
-
-3. NO PATENT LICENSE
-
-NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
-limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
-Fraunhofer provides no warranty of patent non-infringement with respect to this
-software.
-
-You may use this FDK AAC Codec software or modifications thereto only for
-purposes that are authorized by appropriate patent licenses.
-
-4. DISCLAIMER
-
-This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
-holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
-including but not limited to the implied warranties of merchantability and
-fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
-CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
-or consequential damages, including but not limited to procurement of substitute
-goods or services; loss of use, data, or profits, or business interruption,
-however caused and on any theory of liability, whether in contract, strict
-liability, or tort (including negligence), arising in any way out of the use of
-this software, even if advised of the possibility of such damage.
-
-5. CONTACT INFORMATION
-
-Fraunhofer Institute for Integrated Circuits IIS
-Attention: Audio and Multimedia Departments - FDK AAC LL
-Am Wolfsmantel 33
-91058 Erlangen, Germany
-
-www.iis.fraunhofer.de/amm
-amm-info@iis.fraunhofer.de
------------------------------------------------------------------------------ */
-
-/******************* Library for basic calculation routines ********************
-
- Author(s):
-
- Description:
-
-*******************************************************************************/
-
-#ifdef FUNCTION_dct_IV_func1
-
-/*
- Note: This assembler routine is here, because the ARM926 compiler does
- not encode the inline assembler with optimal speed.
- With this version, we save 2 cycles per loop iteration.
-*/
-
-__asm void dct_IV_func1(int i, const FIXP_SPK *twiddle,
- FIXP_DBL *RESTRICT pDat_0, FIXP_DBL *RESTRICT pDat_1) {
- /* Register map:
- r0 i
- r1 twiddle
- r2 pDat_0
- r3 pDat_1
- r4 accu1
- r5 accu2
- r6 accu3
- r7 accu4
- r8 val_tw
- r9 accuX
- */
- PUSH{r4 - r9}
-
- /* 44 cycles for 2 iterations = 22 cycles/iteration */
- dct_IV_loop1_start
- /* First iteration */
- LDR r8,
- [r1],
-# 4 // val_tw = *twiddle++;
- LDR r5,
- [ r2, #0 ] // accu2 = pDat_0[0]
- LDR r4,
- [ r3, #0 ] // accu1 = pDat_1[0]
-
- SMULWT r9,
- r5,
- r8 // accuX = accu2*val_tw.l
- SMULWB r5,
- r5,
- r8 // accu2 = accu2*val_tw.h
- RSB r9,
- r9,
-# 0 // accuX =-accu2*val_tw.l
- SMLAWT r5, r4, r8,
- r5 // accu2 = accu2*val_tw.h + accu1*val_tw.l
- SMLAWB r4,
- r4, r8,
- r9 // accu1 = accu1*val_tw.h - accu2*val_tw.l
-
- LDR r8,
- [r1],
-# 4 // val_tw = *twiddle++;
- LDR r7,
- [ r3, # - 4 ] // accu4 = pDat_1[-1]
- LDR r6,
- [ r2, #4 ] // accu3 = pDat_0[1]
-
- SMULWB r9,
- r7,
- r8 // accuX = accu4*val_tw.h
- SMULWT r7,
- r7,
- r8 // accu4 = accu4*val_tw.l
- RSB r9,
- r9,
-# 0 // accuX =-accu4*val_tw.h
- SMLAWB r7, r6, r8,
- r7 // accu4 = accu4*val_tw.l+accu3*val_tw.h
- SMLAWT r6,
- r6, r8,
- r9 // accu3 = accu3*val_tw.l-accu4*val_tw.h
-
- STR r5,
- [r2],
-# 4 // *pDat_0++ = accu2
- STR r4, [r2],
-# 4 // *pDat_0++ = accu1
- STR r6, [r3],
-#- 4 // *pDat_1-- = accu3
- STR r7, [r3],
-#- 4 // *pDat_1-- = accu4
-
- /* Second iteration */
- LDR r8, [r1],
-# 4 // val_tw = *twiddle++;
- LDR r5,
- [ r2, #0 ] // accu2 = pDat_0[0]
- LDR r4,
- [ r3, #0 ] // accu1 = pDat_1[0]
-
- SMULWT r9,
- r5,
- r8 // accuX = accu2*val_tw.l
- SMULWB r5,
- r5,
- r8 // accu2 = accu2*val_tw.h
- RSB r9,
- r9,
-# 0 // accuX =-accu2*val_tw.l
- SMLAWT r5, r4, r8,
- r5 // accu2 = accu2*val_tw.h + accu1*val_tw.l
- SMLAWB r4,
- r4, r8,
- r9 // accu1 = accu1*val_tw.h - accu2*val_tw.l
-
- LDR r8,
- [r1],
-# 4 // val_tw = *twiddle++;
- LDR r7,
- [ r3, # - 4 ] // accu4 = pDat_1[-1]
- LDR r6,
- [ r2, #4 ] // accu3 = pDat_0[1]
-
- SMULWB r9,
- r7,
- r8 // accuX = accu4*val_tw.h
- SMULWT r7,
- r7,
- r8 // accu4 = accu4*val_tw.l
- RSB r9,
- r9,
-# 0 // accuX =-accu4*val_tw.h
- SMLAWB r7, r6, r8,
- r7 // accu4 = accu4*val_tw.l+accu3*val_tw.h
- SMLAWT r6,
- r6, r8,
- r9 // accu3 = accu3*val_tw.l-accu4*val_tw.h
-
- STR r5,
- [r2],
-# 4 // *pDat_0++ = accu2
- STR r4, [r2],
-# 4 // *pDat_0++ = accu1
- STR r6, [r3],
-#- 4 // *pDat_1-- = accu3
- STR r7, [r3],
-#- 4 // *pDat_1-- = accu4
-
- SUBS r0, r0,
-# 1 BNE dct_IV_loop1_start
-
- POP { r4 - r9 }
-
- BX lr
-}
-
-#endif /* FUNCTION_dct_IV_func1 */
-
-#ifdef FUNCTION_dct_IV_func2
-
-/* __attribute__((noinline)) */
-static inline void dct_IV_func2(int i, const FIXP_SPK *twiddle,
- FIXP_DBL *pDat_0, FIXP_DBL *pDat_1, int inc) {
- FIXP_DBL accu1, accu2, accu3, accu4, accuX;
- LONG val_tw;
-
- accu1 = pDat_1[-2];
- accu2 = pDat_1[-1];
-
- *--pDat_1 = -(pDat_0[1] >> 1);
- *pDat_0++ = (pDat_0[0] >> 1);
-
- twiddle += inc;
-
- __asm {
- LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
- B dct_IV_loop2_2nd_part
-
- /* 42 cycles for 2 iterations = 21 cycles/iteration */
-dct_IV_loop2:
- SMULWT accuX, accu2, val_tw
- SMULWB accu2, accu2, val_tw
- RSB accuX, accuX, #0
- SMLAWB accuX, accu1, val_tw, accuX
- SMLAWT accu2, accu1, val_tw, accu2
- STR accuX, [pDat_0], #4
- STR accu2, [pDat_1, #-4] !
-
- LDR accu4, [pDat_0, #4]
- LDR accu3, [pDat_0]
- SMULWB accuX, accu4, val_tw
- SMULWT accu4, accu4, val_tw
- RSB accuX, accuX, #0
- SMLAWT accuX, accu3, val_tw, accuX
- SMLAWB accu4, accu3, val_tw, accu4
-
- LDR accu1, [pDat_1, #-8]
- LDR accu2, [pDat_1, #-4]
-
- LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
-
- STR accuX, [pDat_1, #-4] !
- STR accu4, [pDat_0], #4
-
-dct_IV_loop2_2nd_part:
- SMULWT accuX, accu2, val_tw
- SMULWB accu2, accu2, val_tw
- RSB accuX, accuX, #0
- SMLAWB accuX, accu1, val_tw, accuX
- SMLAWT accu2, accu1, val_tw, accu2
- STR accuX, [pDat_0], #4
- STR accu2, [pDat_1, #-4] !
-
- LDR accu4, [pDat_0, #4]
- LDR accu3, [pDat_0]
- SMULWB accuX, accu4, val_tw
- SMULWT accu4, accu4, val_tw
- RSB accuX, accuX, #0
- SMLAWT accuX, accu3, val_tw, accuX
- SMLAWB accu4, accu3, val_tw, accu4
-
- LDR accu1, [pDat_1, #-8]
- LDR accu2, [pDat_1, #-4]
-
- STR accuX, [pDat_1, #-4] !
- STR accu4, [pDat_0], #4
-
- LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
-
- SUBS i, i, #1
- BNE dct_IV_loop2
- }
-
- /* Last Sin and Cos value pair are the same */
- accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
- accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
-
- *--pDat_1 = accu1 + accu2;
- *pDat_0++ = accu1 - accu2;
-}
-#endif /* FUNCTION_dct_IV_func2 */
-
-#ifdef FUNCTION_dst_IV_func1
-
-__asm void dst_IV_func1(int i, const FIXP_SPK *twiddle, FIXP_DBL *pDat_0,
- FIXP_DBL *pDat_1) {
- /* Register map:
- r0 i
- r1 twiddle
- r2 pDat_0
- r3 pDat_1
- r4 accu1
- r5 accu2
- r6 accu3
- r7 accu4
- r8 val_tw
- r9 accuX
- */
- PUSH{r4 - r9}
-
- dst_IV_loop1 LDR r8,
- [r1],
-# 4 // val_tw = *twiddle++
- LDR r5,
- [r2] // accu2 = pDat_0[0]
- LDR r6,
- [ r2, #4 ] // accu3 = pDat_0[1]
- RSB r5,
- r5,
-# 0 // accu2 = -accu2
- SMULWT r9, r5,
- r8 // accuX = (-accu2)*val_tw.l
- LDR r4,
- [ r3, # - 4 ] // accu1 = pDat_1[-1]
- RSB r9,
- r9,
-# 0 // accuX = -(-accu2)*val_tw.l
- SMLAWB r9, r4, r8,
- r9 // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
- SMULWT r4,
- r4,
- r8 // accu1 = accu1*val_tw.l
- LDR r7,
- [ r3, # - 8 ] // accu4 = pDat_1[-2]
- SMLAWB r5,
- r5, r8,
- r4 // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
- LDR r8,
- [r1],
-# 4 // val_tw = *twiddle++
- STR r5, [r2],
-# 4 // *pDat_0++ = accu2
- STR r9, [r2],
-# 4 // *pDat_0++ = accu1 (accuX)
- RSB r7, r7,
-# 0 // accu4 = -accu4
- SMULWB r5, r7,
- r8 // accu2 = (-accu4)*val_tw.h
- SMULWB r4,
- r6,
- r8 // accu1 = (-accu4)*val_tw.l
- RSB r5,
- r5,
-# 0 // accu2 = -(-accu4)*val_tw.h
- SMLAWT r6, r6, r8,
- r5 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
- SMLAWT r7,
- r7, r8,
- r4 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
- STR r6,
- [ r3, # - 4 ] ! // *--pDat_1 = accu3
- STR r7,
- [ r3, # - 4 ] ! // *--pDat_1 = accu4
-
- LDR r8,
- [r1],
-# 4 // val_tw = *twiddle++
- LDR r5,
- [r2] // accu2 = pDat_0[0]
- LDR r6,
- [ r2, #4 ] // accu3 = pDat_0[1]
- RSB r5,
- r5,
-# 0 // accu2 = -accu2
- SMULWT r9, r5,
- r8 // accuX = (-accu2)*val_tw.l
- LDR r4,
- [ r3, # - 4 ] // accu1 = pDat_1[-1]
- RSB r9,
- r9,
-# 0 // accuX = -(-accu2)*val_tw.l
- SMLAWB r9, r4, r8,
- r9 // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
- SMULWT r4,
- r4,
- r8 // accu1 = accu1*val_tw.l
- LDR r7,
- [ r3, # - 8 ] // accu4 = pDat_1[-2]
- SMLAWB r5,
- r5, r8,
- r4 // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
- LDR r8,
- [r1],
-# 4 // val_tw = *twiddle++
- STR r5, [r2],
-# 4 // *pDat_0++ = accu2
- STR r9, [r2],
-# 4 // *pDat_0++ = accu1 (accuX)
- RSB r7, r7,
-# 0 // accu4 = -accu4
- SMULWB r5, r7,
- r8 // accu2 = (-accu4)*val_tw.h
- SMULWB r4,
- r6,
- r8 // accu1 = (-accu4)*val_tw.l
- RSB r5,
- r5,
-# 0 // accu2 = -(-accu4)*val_tw.h
- SMLAWT r6, r6, r8,
- r5 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
- SMLAWT r7,
- r7, r8,
- r4 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
- STR r6,
- [ r3, # - 4 ] ! // *--pDat_1 = accu3
- STR r7,
- [ r3, # - 4 ] ! // *--pDat_1 = accu4
-
- SUBS r0,
- r0,
-# 4 // i-= 4
- BNE dst_IV_loop1
-
- POP{r4 - r9} BX lr
-}
-#endif /* FUNCTION_dst_IV_func1 */
-
-#ifdef FUNCTION_dst_IV_func2
-
-/* __attribute__((noinline)) */
-static inline void dst_IV_func2(int i, const FIXP_SPK *twiddle,
- FIXP_DBL *RESTRICT pDat_0,
- FIXP_DBL *RESTRICT pDat_1, int inc) {
- FIXP_DBL accu1, accu2, accu3, accu4;
- LONG val_tw;
-
- accu4 = pDat_0[0];
- accu3 = pDat_0[1];
- accu4 >>= 1;
- accu3 >>= 1;
- accu4 = -accu4;
-
- accu1 = pDat_1[-1];
- accu2 = pDat_1[0];
-
- *pDat_0++ = accu3;
- *pDat_1-- = accu4;
-
- __asm {
- B dst_IV_loop2_2nd_part
-
- /* 50 cycles for 2 iterations = 25 cycles/iteration */
-
-dst_IV_loop2:
-
- LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
-
- RSB accu2, accu2, #0 // accu2 = -accu2
- RSB accu1, accu1, #0 // accu1 = -accu1
- SMULWT accu3, accu2, val_tw // accu3 = (-accu2)*val_tw.l
- SMULWT accu4, accu1, val_tw // accu4 = (-accu1)*val_tw.l
- RSB accu3, accu3, #0 // accu3 = -accu2*val_tw.l
- SMLAWB accu1, accu1, val_tw, accu3 // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
- SMLAWB accu2, accu2, val_tw, accu4 // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
- STR accu1, [pDat_1], #-4 // *pDat_1-- = accu1
- STR accu2, [pDat_0], #4 // *pDat_0++ = accu2
-
- LDR accu4, [pDat_0] // accu4 = pDat_0[0]
- LDR accu3, [pDat_0, #4] // accu3 = pDat_0[1]
-
- RSB accu4, accu4, #0 // accu4 = -accu4
- RSB accu3, accu3, #0 // accu3 = -accu3
-
- SMULWB accu1, accu3, val_tw // accu1 = (-accu3)*val_tw.h
- SMULWT accu2, accu3, val_tw // accu2 = (-accu3)*val_tw.l
- RSB accu1, accu1, #0 // accu1 = -(-accu3)*val_tw.h
- SMLAWT accu3, accu4, val_tw, accu1 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
- SMLAWB accu4, accu4, val_tw, accu2 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
-
- LDR accu1, [pDat_1, #-4] // accu1 = pDat_1[-1]
- LDR accu2, [pDat_1] // accu2 = pDat_1[0]
-
- STR accu3, [pDat_0], #4 // *pDat_0++ = accu3
- STR accu4, [pDat_1], #-4 // *pDat_1-- = accu4
-
-dst_IV_loop2_2nd_part:
-
- LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
-
- RSB accu2, accu2, #0 // accu2 = -accu2
- RSB accu1, accu1, #0 // accu1 = -accu1
- SMULWT accu3, accu2, val_tw // accu3 = (-accu2)*val_tw.l
- SMULWT accu4, accu1, val_tw // accu4 = (-accu1)*val_tw.l
- RSB accu3, accu3, #0 // accu3 = -accu2*val_tw.l
- SMLAWB accu1, accu1, val_tw, accu3 // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
- SMLAWB accu2, accu2, val_tw, accu4 // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
- STR accu1, [pDat_1], #-4 // *pDat_1-- = accu1
- STR accu2, [pDat_0], #4 // *pDat_0++ = accu2
-
- LDR accu4, [pDat_0] // accu4 = pDat_0[0]
- LDR accu3, [pDat_0, #4] // accu3 = pDat_0[1]
-
- RSB accu4, accu4, #0 // accu4 = -accu4
- RSB accu3, accu3, #0 // accu3 = -accu3
-
- SMULWB accu1, accu3, val_tw // accu1 = (-accu3)*val_tw.h
- SMULWT accu2, accu3, val_tw // accu2 = (-accu3)*val_tw.l
- RSB accu1, accu1, #0 // accu1 = -(-accu3)*val_tw.h
- SMLAWT accu3, accu4, val_tw, accu1 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
- SMLAWB accu4, accu4, val_tw, accu2 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
-
- LDR accu1, [pDat_1, #-4] // accu1 = pDat_1[-1]
- LDR accu2, [pDat_1] // accu2 = pDat_1[0]
-
- STR accu3, [pDat_0], #4 // *pDat_0++ = accu3
- STR accu4, [pDat_1], #-4 // *pDat_1-- = accu4
-
- SUBS i, i, #1
- BNE dst_IV_loop2
- }
-
- /* Last Sin and Cos value pair are the same */
- accu1 = fMultDiv2(-accu1, WTC(0x5a82799a));
- accu2 = fMultDiv2(-accu2, WTC(0x5a82799a));
-
- *pDat_0 = accu1 + accu2;
- *pDat_1 = accu1 - accu2;
-}
-#endif /* FUNCTION_dst_IV_func2 */
diff --git a/libFDK/src/dct.cpp b/libFDK/src/dct.cpp
index a451331..776493e 100644
--- a/libFDK/src/dct.cpp
+++ b/libFDK/src/dct.cpp
@@ -124,10 +124,6 @@ amm-info@iis.fraunhofer.de
#include "FDK_tools_rom.h"
#include "fft.h"
-#if defined(__arm__)
-#include "arm/dct_arm.cpp"
-#endif
-
void dct_getTables(const FIXP_WTP **ptwiddle, const FIXP_STP **sin_twiddle,
int *sin_step, int length) {
const FIXP_WTP *twiddle;
@@ -387,12 +383,6 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);
-#ifdef FUNCTION_dct_IV_func1
- if (M >= 4 && (M & 3) == 0) {
- /* ARM926: 44 cycles for 2 iterations = 22 cycles/iteration */
- dct_IV_func1(M >> 2, twiddle, &pDat[0], &pDat[L - 1]);
- } else
-#endif /* FUNCTION_dct_IV_func1 */
{
FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
@@ -410,10 +400,10 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);
- pDat_0[0] = accu2;
- pDat_0[1] = accu1;
- pDat_1[0] = accu4;
- pDat_1[1] = -accu3;
+ pDat_0[0] = accu2 >> 1;
+ pDat_0[1] = accu1 >> 1;
+ pDat_1[0] = accu4 >> 1;
+ pDat_1[1] = -(accu3 >> 1);
}
if (M & 1) {
FIXP_DBL accu1, accu2;
@@ -423,19 +413,13 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
- pDat_0[0] = accu2;
- pDat_0[1] = accu1;
+ pDat_0[0] = accu2 >> 1;
+ pDat_0[1] = accu1 >> 1;
}
}
fft(M, pDat, pDat_e);
-#ifdef FUNCTION_dct_IV_func2
- if (M >= 4 && (M & 3) == 0) {
- /* ARM926: 42 cycles for 2 iterations = 21 cycles/iteration */
- dct_IV_func2(M >> 2, sin_twiddle, &pDat[0], &pDat[L], sin_step);
- } else
-#endif /* FUNCTION_dct_IV_func2 */
{
FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
@@ -446,20 +430,19 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
accu1 = pDat_1[0];
accu2 = pDat_1[1];
- pDat_1[1] = -(pDat_0[1] >> 1);
- pDat_0[0] = (pDat_0[0] >> 1);
+ pDat_1[1] = -pDat_0[1];
/* 28 cycles for ARM926 */
for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
FIXP_STP twd = sin_twiddle[idx];
- cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
+ cplxMult(&accu3, &accu4, accu1, accu2, twd);
pDat_0[1] = accu3;
pDat_1[0] = accu4;
pDat_0 += 2;
pDat_1 -= 2;
- cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
+ cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
accu1 = pDat_1[0];
accu2 = pDat_1[1];
@@ -470,8 +453,8 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
if ((M & 1) == 0) {
/* Last Sin and Cos value pair are the same */
- accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
- accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
+ accu1 = fMult(accu1, WTC(0x5a82799a));
+ accu2 = fMult(accu2, WTC(0x5a82799a));
pDat_1[0] = accu1 + accu2;
pDat_0[1] = accu1 - accu2;
@@ -497,11 +480,6 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);
-#ifdef FUNCTION_dst_IV_func1
- if ((M >= 4) && ((M & 3) == 0)) {
- dst_IV_func1(M, twiddle, &pDat[0], &pDat[L]);
- } else
-#endif
{
FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
@@ -519,10 +497,10 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);
- pDat_0[0] = accu2;
- pDat_0[1] = accu1;
- pDat_1[0] = accu4;
- pDat_1[1] = -accu3;
+ pDat_0[0] = accu2 >> 1;
+ pDat_0[1] = accu1 >> 1;
+ pDat_1[0] = accu4 >> 1;
+ pDat_1[1] = -(accu3 >> 1);
}
if (M & 1) {
FIXP_DBL accu1, accu2;
@@ -532,19 +510,13 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
- pDat_0[0] = accu2;
- pDat_0[1] = accu1;
+ pDat_0[0] = accu2 >> 1;
+ pDat_0[1] = accu1 >> 1;
}
}
fft(M, pDat, pDat_e);
-#ifdef FUNCTION_dst_IV_func2
- if ((M >= 4) && ((M & 3) == 0)) {
- dst_IV_func2(M >> 2, sin_twiddle + sin_step, &pDat[0], &pDat[L - 1],
- sin_step);
- } else
-#endif /* FUNCTION_dst_IV_func2 */
{
FIXP_DBL *RESTRICT pDat_0;
FIXP_DBL *RESTRICT pDat_1;
@@ -558,20 +530,20 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
accu1 = pDat_1[0];
accu2 = pDat_1[1];
- pDat_1[1] = -(pDat_0[0] >> 1);
- pDat_0[0] = (pDat_0[1] >> 1);
+ pDat_1[1] = -pDat_0[0];
+ pDat_0[0] = pDat_0[1];
for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
FIXP_STP twd = sin_twiddle[idx];
- cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
+ cplxMult(&accu3, &accu4, accu1, accu2, twd);
pDat_1[0] = -accu3;
pDat_0[1] = -accu4;
pDat_0 += 2;
pDat_1 -= 2;
- cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
+ cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
accu1 = pDat_1[0];
accu2 = pDat_1[1];
@@ -582,8 +554,8 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
if ((M & 1) == 0) {
/* Last Sin and Cos value pair are the same */
- accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
- accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
+ accu1 = fMult(accu1, WTC(0x5a82799a));
+ accu2 = fMult(accu2, WTC(0x5a82799a));
pDat_0[1] = -accu1 - accu2;
pDat_1[0] = accu2 - accu1;