From 46ba3676b854acbc69a4c7845f578d4c2886377b Mon Sep 17 00:00:00 2001 From: Jean-Michel Trivi Date: Fri, 8 Apr 2016 10:52:42 -0700 Subject: AAC/SBR encoder improvements * AAC-Encoder - AAC-ELD core encoder audio quality tuning. Update tuning tables, configure bitreservoir size and adapt afterburner iteration value. Modified file(s): libAACenc/src/aacenc.h libAACenc/src/aacenc_lib.cpp libAACenc/src/adj_thr.cpp libAACenc/src/adj_thr.h libAACenc/src/adj_thr_data.h libAACenc/src/bandwidth.cpp libAACenc/src/pnsparam.cpp libAACenc/src/qc_main.cpp - Introduze dead zone quantizer for ELD to improve audio quality at certain configurations. Modified file(s): libAACenc/src/aacenc_lib.cpp libAACenc/src/adj_thr.cpp libAACenc/src/adj_thr.h libAACenc/src/qc_data.h libAACenc/src/qc_main.cpp libAACenc/src/quantize.cpp libAACenc/src/quantize.h libAACenc/src/sf_estim.cpp libAACenc/src/sf_estim.h - Revise TNS module to improve ELD audio quality. - Use new window function and separate prediction gain according TNS filters. - Add missing memory initilization to TNS configuration. Modified file(s): libAACenc/src/aacenc_lib.cpp libAACenc/src/aacenc_tns.cpp libAACenc/src/aacenc_tns.h libAACenc/src/psy_main.cpp libAACenc/src/tns_func.h * SBR-Encoder - Revise frequency resolution calculation and handle differently depending on number of envelopes and split frames decision. - Add and adjust ELD SBR tuning tables. Modified file(s): libSBRenc/include/sbr_encoder.h libSBRenc/src/bit_sbr.h libSBRenc/src/env_est.cpp libSBRenc/src/fram_gen.cpp libSBRenc/src/fram_gen.h libSBRenc/src/mh_det.cpp libSBRenc/src/sbr_def.h libSBRenc/src/sbr_encoder.cpp libSBRenc/src/sbr_rom.cpp libSBRenc/src/tran_det.cpp - Replace ELD transient detector with fast implementation. Modified file(s): libSBRenc/src/env_est.cpp libSBRenc/src/env_est.h libSBRenc/src/fram_gen.cpp libSBRenc/src/sbr_def.h libSBRenc/src/sbr_encoder.cpp libSBRenc/src/tran_det.cpp libSBRenc/src/tran_det.h * FDK-Library - Introduce generic compare function in tools library. Modified file(s): libFDK/include/fixpoint_math.h libFDK/src/FDK_core.cpp * SBR-Encoder - Revise ELD frame splitter to improve bit distribution. Modified file(s): libSBRenc/include/sbr_encoder.h libSBRenc/src/bit_sbr.h libSBRenc/src/env_est.cpp libSBRenc/src/fram_gen.cpp libSBRenc/src/fram_gen.h libSBRenc/src/sbr_encoder.cpp libSBRenc/src/tran_det.cpp libSBRenc/src/tran_det.h - Configure amplitude resolution according the tonality of the audio signal. Modified file(s): libSBRenc/include/sbr_encoder.h libSBRenc/src/bit_sbr.h libSBRenc/src/env_est.cpp libSBRenc/src/nf_est.cpp libSBRenc/src/nf_est.h libSBRenc/src/sbr_def.h libSBRenc/src/sbr_encoder.cpp libSBRenc/src/ton_corr.cpp libSBRenc/src/ton_corr.h libSBRenc/src/tran_det.cpp libSBRenc/src/tran_det.h Change-Id: Ie0672b989a06ee63b50240616b8d1d4b790b6cb2 --- libSBRenc/include/sbr_encoder.h | 19 +- libSBRenc/src/bit_sbr.h | 8 +- libSBRenc/src/env_est.cpp | 175 ++++++++++++++- libSBRenc/src/env_est.h | 3 +- libSBRenc/src/fram_gen.cpp | 82 ++++--- libSBRenc/src/fram_gen.h | 30 +-- libSBRenc/src/mh_det.cpp | 32 ++- libSBRenc/src/nf_est.cpp | 2 +- libSBRenc/src/nf_est.h | 4 +- libSBRenc/src/sbr_def.h | 14 +- libSBRenc/src/sbr_encoder.cpp | 113 +++++++++- libSBRenc/src/sbr_rom.cpp | 11 +- libSBRenc/src/ton_corr.cpp | 8 +- libSBRenc/src/ton_corr.h | 4 +- libSBRenc/src/tran_det.cpp | 487 +++++++++++++++++++++++++++++++++++----- libSBRenc/src/tran_det.h | 61 ++++- 16 files changed, 895 insertions(+), 158 deletions(-) (limited to 'libSBRenc') diff --git a/libSBRenc/include/sbr_encoder.h b/libSBRenc/include/sbr_encoder.h index 93dc46d..aec0398 100644 --- a/libSBRenc/include/sbr_encoder.h +++ b/libSBRenc/include/sbr_encoder.h @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -135,6 +135,12 @@ enum SBR_SYNTAX_DRM_CRC = 0x0008 }; +typedef enum +{ + FREQ_RES_LOW = 0, + FREQ_RES_HIGH +} FREQ_RES; + typedef struct { CODEC_TYPE coreCoder; /*!< LC or ELD */ @@ -168,8 +174,9 @@ typedef struct sbrConfiguration INT dynBwSupported; /*!< Flag: support for dynamic bandwidth in this combination. */ INT parametricCoding; /*!< Flag: usage of parametric coding tool. */ INT downSampleFactor; /*!< Sampling rate relation between the SBR and the core encoder. */ - int freq_res_fixfix[3]; /*!< Frequency resolution of envelopes in frame class FIXFIX - 0=1 Env; 1=2 Env; 2=4 Env; */ + FREQ_RES freq_res_fixfix[2];/*!< Frequency resolution of envelopes in frame class FIXFIX, for non-split case and split case */ + UCHAR fResTransIsLow; /*!< Frequency resolution of envelopes in transient frames: low (0) or variable (1) */ + /* core coder dependent tuning parameters */ @@ -221,6 +228,8 @@ typedef struct sbrConfiguration INT sbr_interpol_freq; /*!< Flag: use interpolation in freq. direction. */ INT sbr_smoothing_length; /*!< Flag: choose length 4 or 0 (=on, off). */ UCHAR init_amp_res_FF; + FIXP_DBL threshold_AmpRes_FF_m; + SCHAR threshold_AmpRes_FF_e; } sbrConfiguration, *sbrConfigurationPtr ; typedef struct SBR_CONFIG_DATA @@ -237,7 +246,7 @@ typedef struct SBR_CONFIG_DATA INT noQmfBands; /**< Number of QMF frequency bands. */ INT noQmfSlots; /**< Number of QMF slots. */ - UCHAR *freqBandTable[2]; /**< Frequency table for low and hires, only MAX_FREQ_COEFFS/2 +1 coeefs actually needed for lowres. */ + UCHAR *freqBandTable[2]; /**< Frequency table for low and hires, only MAX_FREQ_COEFFS/2 +1 coeffs actually needed for lowres. */ UCHAR *v_k_master; /**< Master BandTable where freqBandTable is derived from. */ @@ -249,6 +258,8 @@ typedef struct SBR_CONFIG_DATA INT xposCtrlSwitch; /**< Flag indicates whether to switch xpos ctrl on the fly. */ INT switchTransposers; /**< Flag indicates whether to switch xpos on the fly . */ UCHAR initAmpResFF; + FIXP_DBL thresholdAmpResFF_m; + SCHAR thresholdAmpResFF_e; } SBR_CONFIG_DATA, *HANDLE_SBR_CONFIG_DATA; typedef struct { diff --git a/libSBRenc/src/bit_sbr.h b/libSBRenc/src/bit_sbr.h index 1ce2c1e..de4ac89 100644 --- a/libSBRenc/src/bit_sbr.h +++ b/libSBRenc/src/bit_sbr.h @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -141,8 +141,8 @@ struct SBR_ENV_DATA { INT sbr_xpos_ctrl; - INT freq_res_fixfix; - + FREQ_RES freq_res_fixfix[2]; + UCHAR fResTransIsLow; INVF_MODE sbr_invf_mode; INVF_MODE sbr_invf_mode_vec[MAX_NUM_NOISE_VALUES]; @@ -205,6 +205,8 @@ struct SBR_ENV_DATA INT balance; AMP_RES init_sbr_amp_res; AMP_RES currentAmpResFF; + FIXP_DBL ton_HF[SBR_GLOBAL_TONALITY_VALUES]; /* tonality is scaled by 2^19/0.524288f (fract part of RELAXATION) */ + FIXP_DBL global_tonality; /* extended data */ INT extended_data; diff --git a/libSBRenc/src/env_est.cpp b/libSBRenc/src/env_est.cpp index 929f229..4fcda51 100644 --- a/libSBRenc/src/env_est.cpp +++ b/libSBRenc/src/env_est.cpp @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -103,6 +103,114 @@ static const UCHAR panTable[2][10] = { { 0, 2, 4, 6, 8,12,16,20,24}, static const UCHAR maxIndex[2] = {9, 5}; +/****************************************************************************** + Functionname: FDKsbrEnc_GetTonality +******************************************************************************/ +/***************************************************************************/ +/*! + + \brief Calculates complete energy per band from the energy values + of the QMF subsamples. + + \brief quotaMatrix - calculated in FDKsbrEnc_CalculateTonalityQuotas() + \brief noEstPerFrame - number of estimations per frame + \brief startIndex - start index for the quota matrix + \brief Energies - energy matrix + \brief startBand - start band + \brief stopBand - number of QMF bands + \brief numberCols - number of QMF subsamples + + \return mean tonality of the 5 bands with the highest energy + scaled by 2^(RELAXATION_SHIFT+2)*RELAXATION_FRACT + +****************************************************************************/ +static FIXP_DBL FDKsbrEnc_GetTonality( + const FIXP_DBL *const *quotaMatrix, + const INT noEstPerFrame, + const INT startIndex, + const FIXP_DBL *const *Energies, + const UCHAR startBand, + const INT stopBand, + const INT numberCols + ) +{ + UCHAR b, e, k; + INT no_enMaxBand[SBR_MAX_ENERGY_VALUES] = { -1, -1, -1, -1, -1 }; + FIXP_DBL energyMax[SBR_MAX_ENERGY_VALUES] = { FL2FXCONST_DBL(0.0f), FL2FXCONST_DBL(0.0f), FL2FXCONST_DBL(0.0f), FL2FXCONST_DBL(0.0f), FL2FXCONST_DBL(0.0f) }; + FIXP_DBL energyMaxMin = MAXVAL_DBL; /* min. energy in energyMax array */ + UCHAR posEnergyMaxMin = 0; /* min. energy in energyMax array position */ + FIXP_DBL tonalityBand[SBR_MAX_ENERGY_VALUES] = { FL2FXCONST_DBL(0.0f), FL2FXCONST_DBL(0.0f), FL2FXCONST_DBL(0.0f), FL2FXCONST_DBL(0.0f), FL2FXCONST_DBL(0.0f) }; + FIXP_DBL globalTonality = FL2FXCONST_DBL(0.0f); + FIXP_DBL energyBand[QMF_CHANNELS]; + INT maxNEnergyValues; /* max. number of max. energy values */ + + /*** Sum up energies for each band ***/ + FDK_ASSERT(numberCols==15||numberCols==16); + /* numberCols is always 15 or 16 for ELD. In case of 16 bands, the + energyBands are initialized with the [15]th column. + The rest of the column energies are added in the next step. */ + if (numberCols==15) { + for (b=startBand; b>4; + } + } + + for (k=0; k<15; k++) { + for (b=startBand; b>4; + } + } + + /*** Determine 5 highest band-energies ***/ + maxNEnergyValues = fMin(SBR_MAX_ENERGY_VALUES, stopBand-startBand); + + /* Get min. value in energyMax array */ + energyMaxMin = energyMax[0] = energyBand[startBand]; + no_enMaxBand[0] = startBand; + posEnergyMaxMin = 0; + for (k=1; k energyMax[k]) { + energyMaxMin = energyMax[k]; + posEnergyMaxMin = k; + } + } + + for (b=startBand+maxNEnergyValues; b energyMaxMin) { + energyMax[posEnergyMaxMin] = energyBand[b]; + no_enMaxBand[posEnergyMaxMin] = b; + + /* Again, get min. value in energyMax array */ + energyMaxMin = energyMax[0]; + posEnergyMaxMin = 0; + for (k=1; k energyMax[k]) { + energyMaxMin = energyMax[k]; + posEnergyMaxMin = k; + } + } + } + } + /*** End determine 5 highest band-energies ***/ + + /* Get tonality values for 5 highest energies */ + for (e=0; e> 1; + } + globalTonality += tonalityBand[e] >> 2; /* headroom of 2+1 (max. 5 additions) */ + } + + return globalTonality; +} + /***************************************************************************/ /*! @@ -919,10 +1027,42 @@ FDKsbrEnc_extractSbrEnvelope1 ( hEnvChan->qmfScale); + if(h_con->sbrSyntaxFlags & SBR_SYNTAX_LOW_DELAY) { + FIXP_DBL tonality = FDKsbrEnc_GetTonality ( + hEnvChan->TonCorr.quotaMatrix, + hEnvChan->TonCorr.numberOfEstimatesPerFrame, + hEnvChan->TonCorr.startIndexMatrix, + sbrExtrEnv->YBuffer + sbrExtrEnv->YBufferWriteOffset, + h_con->freqBandTable[HI][0]+1, + h_con->noQmfBands, + sbrExtrEnv->no_cols + ); + + hEnvChan->encEnvData.ton_HF[1] = hEnvChan->encEnvData.ton_HF[0]; + hEnvChan->encEnvData.ton_HF[0] = tonality; + + /* tonality is scaled by 2^19/0.524288f (fract part of RELAXATION) */ + hEnvChan->encEnvData.global_tonality = (hEnvChan->encEnvData.ton_HF[0]>>1) + (hEnvChan->encEnvData.ton_HF[1]>>1); + } + + /* Transient detection COEFF Transform OK */ + if(h_con->sbrSyntaxFlags & SBR_SYNTAX_LOW_DELAY) + { + FDKsbrEnc_fastTransientDetect( + &hEnvChan->sbrFastTransientDetector, + sbrExtrEnv->YBuffer, + sbrExtrEnv->YBufferScale, + sbrExtrEnv->YBufferWriteOffset, + eData->transient_info + ); + + } + else + { FDKsbrEnc_transientDetect(&hEnvChan->sbrTransientDetector, sbrExtrEnv->YBuffer, sbrExtrEnv->YBufferScale, @@ -931,6 +1071,7 @@ FDKsbrEnc_extractSbrEnvelope1 ( sbrExtrEnv->YBufferSzShift, sbrExtrEnv->time_step, hEnvChan->SbrEnvFrame.frameMiddleSlot); + } @@ -951,7 +1092,8 @@ FDKsbrEnc_extractSbrEnvelope1 ( sbrExtrEnv->YBufferSzShift, h_con->nSfb[1], sbrExtrEnv->time_step, - sbrExtrEnv->no_cols); + sbrExtrEnv->no_cols, + &hEnvChan->encEnvData.global_tonality); } @@ -1128,12 +1270,26 @@ FDKsbrEnc_extractSbrEnvelope2 ( && ( ed->nEnvelopes == 1 ) ) { - if (hEnvChan->encEnvData.ldGrid) - hEnvChan->encEnvData.currentAmpResFF = (AMP_RES)h_con->initAmpResFF; - else + if (h_con->sbrSyntaxFlags & SBR_SYNTAX_LOW_DELAY) + { + /* Note: global_tonaliy_float_value == ((float)hEnvChan->encEnvData.global_tonality/((INT64)(1)<<(31-(19+2)))/0.524288*(2.0/3.0))); + threshold_float_value == ((float)h_con->thresholdAmpResFF_m/((INT64)(1)<<(31-(h_con->thresholdAmpResFF_e)))/0.524288*(2.0/3.0))); */ + /* decision of SBR_AMP_RES */ + if (fIsLessThan( /* global_tonality > threshold ? */ + h_con->thresholdAmpResFF_m, h_con->thresholdAmpResFF_e, + hEnvChan->encEnvData.global_tonality, RELAXATION_SHIFT+2 ) + ) + { + hEnvChan->encEnvData.currentAmpResFF = SBR_AMP_RES_1_5; + } + else { + hEnvChan->encEnvData.currentAmpResFF = SBR_AMP_RES_3_0; + } + } else { hEnvChan->encEnvData.currentAmpResFF = SBR_AMP_RES_1_5; + } - if ( hEnvChan->encEnvData.currentAmpResFF != hEnvChan->encEnvData.init_sbr_amp_res) { + if ( hEnvChan->encEnvData.currentAmpResFF != hEnvChan->encEnvData.init_sbr_amp_res) { FDKsbrEnc_InitSbrHuffmanTables(&hEnvChan->encEnvData, &hEnvChan->sbrCodeEnvelope, @@ -1172,7 +1328,12 @@ FDKsbrEnc_extractSbrEnvelope2 ( } /* Low energy in low band fix */ - if ( hEnvChan->sbrTransientDetector.prevLowBandEnergy < hEnvChan->sbrTransientDetector.prevHighBandEnergy && hEnvChan->sbrTransientDetector.prevHighBandEnergy > FL2FX_DBL(0.03)) + if ( hEnvChan->sbrTransientDetector.prevLowBandEnergy < hEnvChan->sbrTransientDetector.prevHighBandEnergy + && hEnvChan->sbrTransientDetector.prevHighBandEnergy > FL2FX_DBL(0.03) + /* The fix needs the non-fast transient detector running. + It sets prevLowBandEnergy and prevHighBandEnergy. */ + && !(h_con->sbrSyntaxFlags & SBR_SYNTAX_LOW_DELAY) + ) { int i; diff --git a/libSBRenc/src/env_est.h b/libSBRenc/src/env_est.h index 5e632a4..e17a974 100644 --- a/libSBRenc/src/env_est.h +++ b/libSBRenc/src/env_est.h @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -127,6 +127,7 @@ typedef SBR_EXTRACT_ENVELOPE *HANDLE_SBR_EXTRACT_ENVELOPE; struct ENV_CHANNEL { + FAST_TRAN_DETECTOR sbrFastTransientDetector; SBR_TRANSIENT_DETECTOR sbrTransientDetector; SBR_CODE_ENVELOPE sbrCodeEnvelope; SBR_CODE_ENVELOPE sbrCodeNoiseFloor; diff --git a/libSBRenc/src/fram_gen.cpp b/libSBRenc/src/fram_gen.cpp index 86c3c81..9a35111 100644 --- a/libSBRenc/src/fram_gen.cpp +++ b/libSBRenc/src/fram_gen.cpp @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -266,7 +266,7 @@ static void calcCtrlSignal (HANDLE_SBR_GRID hSbrGrid, FRAME_CLASS frameClass, static void ctrlSignal2FrameInfo (HANDLE_SBR_GRID hSbrGrid, HANDLE_SBR_FRAME_INFO hFrameInfo, - INT freq_res_fixfix); + FREQ_RES *freq_res_fixfix); /* table for 8 time slot index */ @@ -341,8 +341,9 @@ static const FREQ_RES freqRes_table_16[16] = { static void generateFixFixOnly ( HANDLE_SBR_FRAME_INFO hSbrFrameInfo, HANDLE_SBR_GRID hSbrGrid, int tranPosInternal, - int numberTimeSlots - ); + int numberTimeSlots, + UCHAR fResTransIsLow + ); /*! @@ -402,11 +403,10 @@ FDKsbrEnc_frameInfoGenerator (HANDLE_SBR_ENVELOPE_FRAME hSbrEnvFrame, const int *v_tuningFreq = v_tuning + 3; hSbrEnvFrame->v_tuningSegm = v_tuningSegm; - INT freq_res_fixfix = hSbrEnvFrame->freq_res_fixfix; if (ldGrid) { /* in case there was a transient at the very end of the previous frame, start with a transient envelope */ - if(v_transient_info_pre[1] && (numberTimeSlots - v_transient_info_pre[0] < minFrameTranDistance)){ + if ( !tranFlag && v_transient_info_pre[1] && (numberTimeSlots - v_transient_info_pre[0] < minFrameTranDistance) ){ tranFlag = 1; tranPos = 0; } @@ -529,7 +529,8 @@ FDKsbrEnc_frameInfoGenerator (HANDLE_SBR_ENVELOPE_FRAME hSbrEnvFrame, generateFixFixOnly ( &(hSbrEnvFrame->SbrFrameInfo), &(hSbrEnvFrame->SbrGrid), tranPosInternal, - numberTimeSlots + numberTimeSlots, + hSbrEnvFrame->fResTransIsLow ); return &(hSbrEnvFrame->SbrFrameInfo); @@ -677,7 +678,7 @@ FDKsbrEnc_frameInfoGenerator (HANDLE_SBR_ENVELOPE_FRAME hSbrEnvFrame, ---------------------------------------------------------------------------*/ ctrlSignal2FrameInfo (&hSbrEnvFrame->SbrGrid, &hSbrEnvFrame->SbrFrameInfo, - freq_res_fixfix); + hSbrEnvFrame->freq_res_fixfix); return &hSbrEnvFrame->SbrFrameInfo; } @@ -692,7 +693,8 @@ FDKsbrEnc_frameInfoGenerator (HANDLE_SBR_ENVELOPE_FRAME hSbrEnvFrame, static void generateFixFixOnly ( HANDLE_SBR_FRAME_INFO hSbrFrameInfo, HANDLE_SBR_GRID hSbrGrid, int tranPosInternal, - int numberTimeSlots + int numberTimeSlots, + UCHAR fResTransIsLow ) { int nEnv, i, k=0, tranIdx; @@ -727,8 +729,12 @@ static void generateFixFixOnly ( HANDLE_SBR_FRAME_INFO hSbrFrameInfo, /* adjust segment-frequency-resolution according to the segment-length */ for (i=0; iborders[i+1] - hSbrFrameInfo->borders[i]; - hSbrFrameInfo->freqRes[i] = freqResTable[k]; - hSbrGrid->v_f[i] = freqResTable[k]; + if (!fResTransIsLow) + hSbrFrameInfo->freqRes[i] = freqResTable[k]; + else + hSbrFrameInfo->freqRes[i] = FREQ_RES_LOW; + + hSbrGrid->v_f[i] = hSbrFrameInfo->freqRes[i]; } hSbrFrameInfo->nEnvelopes = nEnv; @@ -765,15 +771,16 @@ static void generateFixFixOnly ( HANDLE_SBR_FRAME_INFO hSbrFrameInfo, *******************************************************************************/ void -FDKsbrEnc_initFrameInfoGenerator (HANDLE_SBR_ENVELOPE_FRAME hSbrEnvFrame, - INT allowSpread, - INT numEnvStatic, - INT staticFraming, - INT timeSlots, - INT freq_res_fixfix - ,int ldGrid - ) - +FDKsbrEnc_initFrameInfoGenerator ( + HANDLE_SBR_ENVELOPE_FRAME hSbrEnvFrame, + INT allowSpread, + INT numEnvStatic, + INT staticFraming, + INT timeSlots, + const FREQ_RES* freq_res_fixfix + ,UCHAR fResTransIsLow, + INT ldGrid + ) { /* FH 00-06-26 */ FDKmemclear(hSbrEnvFrame,sizeof(SBR_ENVELOPE_FRAME )); @@ -786,7 +793,9 @@ FDKsbrEnc_initFrameInfoGenerator (HANDLE_SBR_ENVELOPE_FRAME hSbrEnvFrame, hSbrEnvFrame->allowSpread = allowSpread; hSbrEnvFrame->numEnvStatic = numEnvStatic; hSbrEnvFrame->staticFraming = staticFraming; - hSbrEnvFrame->freq_res_fixfix = freq_res_fixfix; + hSbrEnvFrame->freq_res_fixfix[0] = freq_res_fixfix[0]; + hSbrEnvFrame->freq_res_fixfix[1] = freq_res_fixfix[1]; + hSbrEnvFrame->fResTransIsLow = fResTransIsLow; hSbrEnvFrame->length_v_bord = 0; hSbrEnvFrame->length_v_bordFollow = 0; @@ -804,6 +813,7 @@ FDKsbrEnc_initFrameInfoGenerator (HANDLE_SBR_ENVELOPE_FRAME hSbrEnvFrame, hSbrEnvFrame->dmin = 2; hSbrEnvFrame->dmax = 16; hSbrEnvFrame->frameMiddleSlot = FRAME_MIDDLE_SLOT_512LD; + hSbrEnvFrame->SbrGrid.bufferFrameStart = 0; } else switch(timeSlots){ case NUMBER_TIME_SLOTS_1920: @@ -1862,19 +1872,28 @@ createDefFrameInfo(HANDLE_SBR_FRAME_INFO hSbrFrameInfo, INT nEnv, INT nTimeSlots Functionname: ctrlSignal2FrameInfo ******************************************************************************* - Description: Calculates frame_info struct from control signal. + Description: Convert "clear-text" sbr_grid() to "frame info" used by the + envelope and noise floor estimators. + This is basically (except for "low level" calculations) the + bitstream decoder defined in the MPEG-4 standard, sub clause + 4.6.18.3.3, Time / Frequency Grid. See inline comments for + explanation of the shorten and noise border algorithms. Arguments: hSbrGrid - source hSbrFrameInfo - destination + freq_res_fixfix - frequency resolution for FIXFIX frames Return: void; hSbrFrameInfo contains the updated FRAME_INFO struct *******************************************************************************/ static void -ctrlSignal2FrameInfo (HANDLE_SBR_GRID hSbrGrid, - HANDLE_SBR_FRAME_INFO hSbrFrameInfo, - INT freq_res_fixfix) +ctrlSignal2FrameInfo ( + HANDLE_SBR_GRID hSbrGrid, /* input : the grid handle */ + HANDLE_SBR_FRAME_INFO hSbrFrameInfo, /* output: the frame info handle */ + FREQ_RES *freq_res_fixfix /* in/out: frequency resolution for FIXFIX frames */ + ) { + INT frameSplit = 0; INT nEnv = 0, border = 0, i, k, p /*?*/; INT *v_r = hSbrGrid->bs_rel_bord; INT *v_f = hSbrGrid->v_f; @@ -1887,17 +1906,10 @@ ctrlSignal2FrameInfo (HANDLE_SBR_GRID hSbrGrid, case FIXFIX: createDefFrameInfo(hSbrFrameInfo, hSbrGrid->bs_num_env, numberTimeSlots); - /* At this point all frequency resolutions are set to FREQ_RES_HIGH, so - * only if freq_res_fixfix is set to FREQ_RES_LOW, they all have to be - * changed. - * snd */ - if (freq_res_fixfix == FREQ_RES_LOW) { - for (i = 0; i < hSbrFrameInfo->nEnvelopes; i++) { - hSbrFrameInfo->freqRes[i] = FREQ_RES_LOW; - } + frameSplit = (hSbrFrameInfo->nEnvelopes > 1); + for (i = 0; i < hSbrFrameInfo->nEnvelopes; i++) { + hSbrGrid->v_f[i] = hSbrFrameInfo->freqRes[i] = freq_res_fixfix[frameSplit]; } - /* ELD: store current frequency resolution */ - hSbrGrid->v_f[0] = hSbrFrameInfo->freqRes[0]; break; case FIXVAR: diff --git a/libSBRenc/src/fram_gen.h b/libSBRenc/src/fram_gen.h index 3769266..00473d4 100644 --- a/libSBRenc/src/fram_gen.h +++ b/libSBRenc/src/fram_gen.h @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -89,6 +89,7 @@ amm-info@iis.fraunhofer.de #define _FRAM_GEN_H #include "sbr_def.h" /* for MAX_ENVELOPES and MAX_NOISE_ENVELOPES in struct FRAME_INFO and CODEC_TYPE */ +#include "sbr_encoder.h" /* for FREQ_RES */ #define MAX_ENVELOPES_VARVAR MAX_ENVELOPES /*!< worst case number of envelopes in a VARVAR frame */ #define MAX_ENVELOPES_FIXVAR_VARFIX 4 /*!< worst case number of envelopes in VARFIX and FIXVAR frames */ @@ -114,7 +115,7 @@ typedef enum { #define NUMBER_TIME_SLOTS_1920 15 #define LD_PRETRAN_OFF 3 -#define FRAME_MIDDLE_SLOT_512LD 0 +#define FRAME_MIDDLE_SLOT_512LD 4 #define NUMBER_TIME_SLOTS_512LD 8 #define TRANSIENT_OFFSET_LD 0 @@ -248,9 +249,10 @@ typedef struct INT frameMiddleSlot; /*!< transient detector offset in SBR timeslots */ /* basic tuning parameters */ - INT staticFraming; /*!< 1: run static framing in time, i.e. exclusive use of bs_frame_class = FIXFIX */ - INT numEnvStatic; /*!< number of envelopes per frame for static framing */ - INT freq_res_fixfix; /*!< envelope frequency resolution to use for bs_frame_class = FIXFIX */ + INT staticFraming; /*!< 1: run static framing in time, i.e. exclusive use of bs_frame_class = FIXFIX */ + INT numEnvStatic; /*!< number of envelopes per frame for static framing */ + FREQ_RES freq_res_fixfix[2]; /*!< envelope frequency resolution to use for bs_frame_class = FIXFIX; single env and split */ + UCHAR fResTransIsLow; /*!< frequency resolution for transient frames - always low (0) or according to table (1) */ /* expert tuning parameters */ const int *v_tuningSegm; /*!< segment lengths to use around transient */ @@ -286,14 +288,16 @@ typedef SBR_ENVELOPE_FRAME *HANDLE_SBR_ENVELOPE_FRAME; void -FDKsbrEnc_initFrameInfoGenerator (HANDLE_SBR_ENVELOPE_FRAME hSbrEnvFrame, - INT allowSpread, - INT numEnvStatic, - INT staticFraming, - INT timeSlots, - INT freq_res_fixfix - ,int ldGrid - ); +FDKsbrEnc_initFrameInfoGenerator ( + HANDLE_SBR_ENVELOPE_FRAME hSbrEnvFrame, + INT allowSpread, + INT numEnvStatic, + INT staticFraming, + INT timeSlots, + const FREQ_RES* freq_res_fixfix + ,UCHAR fResTransIsLow, + INT ldGrid + ); HANDLE_SBR_FRAME_INFO FDKsbrEnc_frameInfoGenerator (HANDLE_SBR_ENVELOPE_FRAME hSbrEnvFrame, diff --git a/libSBRenc/src/mh_det.cpp b/libSBRenc/src/mh_det.cpp index 73d1b8b..bc80a15 100644 --- a/libSBRenc/src/mh_det.cpp +++ b/libSBRenc/src/mh_det.cpp @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -663,10 +663,27 @@ static void transientCleanUp(FIXP_DBL **quotaBuffer, } -/**************************************************************************/ +/*****************************************************************************/ /*! - \brief Do detection for one tonality estimate. + \brief Detection for one tonality estimate. + + This is the actual missing harmonics detection, using information from the + previous detection. + + If a missing harmonic was detected (in a previous frame) due to too high + tonality differences, but there was not enough tonality difference in the + current frame, the detection algorithm still continues to trace the strongest + tone in the scalefactor band (assuming that this is the tone that is going to + be replaced in the decoder). This is done to avoid abrupt endings of sines + fading out (e.g. in the glockenspiel). + + The function also tries to estimate where one sine is going to be replaced + with multiple sines (due to the patching). This is done by comparing the + tonality flatness measure of the original and the SBR signal. + The function also tries to estimate (for the scalefactor bands only + containing one qmf subband) when a strong tone in the original will be + replaced by a strong tone in the adjacent QMF subband. \return none. @@ -694,10 +711,10 @@ static void detection(FIXP_DBL *quotaBuffer, for(i=0;i thresTemp){ pHarmVec[i] = 1; @@ -813,8 +830,11 @@ static void detectionWithPrediction(FIXP_DBL **quotaBuffer, if(newDetectionAllowed){ + /* Since we don't want to use the transient region for detection (since the tonality values + tend to be a bit unreliable for this region) the guide-values are copied to the current + starting point. */ if(totNoEst > 1){ - start = detectionStart; + start = detectionStart+1; if (start != 0) { FDKmemcpy(guideVectors[start].guideVectorDiff,guideVectors[0].guideVectorDiff,nSfb*sizeof(FIXP_DBL)); diff --git a/libSBRenc/src/nf_est.cpp b/libSBRenc/src/nf_est.cpp index 385a043..a4c5574 100644 --- a/libSBRenc/src/nf_est.cpp +++ b/libSBRenc/src/nf_est.cpp @@ -298,7 +298,7 @@ FDKsbrEnc_sbrNoiseFloorEstimateQmf(HANDLE_SBR_NOISE_FLOOR_ESTIMATE h_sbrNoiseFlo SCHAR *indexVector, /*!< Index vector to obtain the patched data. */ INT missingHarmonicsFlag, /*!< Flag indicating if a strong tonal component will be missing. */ INT startIndex, /*!< Start index. */ - int numberOfEstimatesPerFrame, /*!< The number of tonality estimates per frame. */ + UINT numberOfEstimatesPerFrame, /*!< The number of tonality estimates per frame. */ int transientFrame, /*!< A flag indicating if a transient is present. */ INVF_MODE* pInvFiltLevels, /*!< Pointer to the vector holding the inverse filtering levels. */ UINT sbrSyntaxFlags diff --git a/libSBRenc/src/nf_est.h b/libSBRenc/src/nf_est.h index d407274..f26f74f 100644 --- a/libSBRenc/src/nf_est.h +++ b/libSBRenc/src/nf_est.h @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -119,7 +119,7 @@ FDKsbrEnc_sbrNoiseFloorEstimateQmf(HANDLE_SBR_NOISE_FLOOR_ESTIMATE h_sbrNoiseFlo SCHAR* indexVector, /*!< Index vector to obtain the patched data. */ INT missingHarmonicsFlag, /*!< Flag indicating if a strong tonal component will be missing. */ INT startIndex, /*!< Start index. */ - int numberOfEstimatesPerFrame, /*!< The number of tonality estimates per frame. */ + UINT numberOfEstimatesPerFrame, /*!< The number of tonality estimates per frame. */ INT transientFrame, /*!< A flag indicating if a transient is present. */ INVF_MODE* pInvFiltLevels, /*!< Pointer to the vector holding the inverse filtering levels. */ UINT sbrSyntaxFlags diff --git a/libSBRenc/src/sbr_def.h b/libSBRenc/src/sbr_def.h index 8b7cfc6..85ac587 100644 --- a/libSBRenc/src/sbr_def.h +++ b/libSBRenc/src/sbr_def.h @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -122,6 +122,8 @@ amm-info@iis.fraunhofer.de /************ Definitions ***************/ #define SBR_COMP_MODE_DELTA 0 #define SBR_COMP_MODE_CTS 1 +#define SBR_MAX_ENERGY_VALUES 5 +#define SBR_GLOBAL_TONALITY_VALUES 2 #define MAX_NUM_CHANNELS 2 @@ -232,6 +234,8 @@ amm-info@iis.fraunhofer.de #define FREQ 0 #define TIME 1 +/* qmf data scaling */ +#define QMF_SCALE_OFFSET 7 /* huffman tables */ #define CODE_BOOK_SCF_LAV00 60 @@ -268,12 +272,4 @@ typedef enum } INVF_MODE; -typedef enum -{ - FREQ_RES_LOW = 0, - FREQ_RES_HIGH -} -FREQ_RES; - - #endif diff --git a/libSBRenc/src/sbr_encoder.cpp b/libSBRenc/src/sbr_encoder.cpp index 9bb98c8..90b19cf 100644 --- a/libSBRenc/src/sbr_encoder.cpp +++ b/libSBRenc/src/sbr_encoder.cpp @@ -103,7 +103,7 @@ amm-info@iis.fraunhofer.de #define SBRENCODER_LIB_VL0 3 #define SBRENCODER_LIB_VL1 3 -#define SBRENCODER_LIB_VL2 8 +#define SBRENCODER_LIB_VL2 12 @@ -412,6 +412,23 @@ FDKsbrEnc_AdjustSbrSettings (const sbrConfigurationPtr config, /*! output, modif config->codecSettings.transFac = transFac; config->codecSettings.standardBitrate = standardBitrate; + if (bitRate < 28000) { + config->threshold_AmpRes_FF_m = (FIXP_DBL)MAXVAL_DBL; + config->threshold_AmpRes_FF_e = 7; + } + else if (bitRate >= 28000 && bitRate <= 48000) { + /* The float threshold is 75 + 0.524288f is fractional part of RELAXATION, the quotaMatrix and therefore tonality are scaled by this + 2/3 is because the original implementation divides the tonality values by 3, here it's divided by 2 + 128 compensates the necessary shiftfactor of 7 */ + config->threshold_AmpRes_FF_m = FL2FXCONST_DBL(75.0f*0.524288f/(2.0f/3.0f)/128.0f); + config->threshold_AmpRes_FF_e = 7; + } + else if (bitRate > 48000) { + config->threshold_AmpRes_FF_m = FL2FXCONST_DBL(0); + config->threshold_AmpRes_FF_e = 0; + } + if (bitRate==0) { /* map vbr quality to bitrate */ if (vbrMode < 30) @@ -467,6 +484,57 @@ FDKsbrEnc_AdjustSbrSettings (const sbrConfigurationPtr config, /*! output, modif config->stereoMode = sbrTuningTable[idx].stereoMode ; config->freqScale = sbrTuningTable[idx].freqScale ; + if (numChannels == 1) { + /* stereo case */ + switch (core) { + case AOT_AAC_LC: + if (bitRate <= (useSpeechConfig?24000U:20000U)) { + config->freq_res_fixfix[0] = FREQ_RES_LOW; /* set low frequency resolution for non-split frames */ + config->freq_res_fixfix[1] = FREQ_RES_LOW; /* set low frequency resolution for split frames */ + } + break; + case AOT_ER_AAC_ELD: + if (bitRate < 36000) + config->freq_res_fixfix[1] = FREQ_RES_LOW; /* set low frequency resolution for split frames */ + if (bitRate < 26000) { + config->freq_res_fixfix[0] = FREQ_RES_LOW; /* set low frequency resolution for non-split frames */ + config->fResTransIsLow = 1; /* for transient frames, set low frequency resolution */ + } + break; + default: + break; + } + } + else { + /* stereo case */ + switch (core) { + case AOT_AAC_LC: + if (bitRate <= 28000) { + config->freq_res_fixfix[0] = FREQ_RES_LOW; /* set low frequency resolution for non-split frames */ + config->freq_res_fixfix[1] = FREQ_RES_LOW; /* set low frequency resolution for split frames */ + } + break; + case AOT_ER_AAC_ELD: + if (bitRate < 72000) { + config->freq_res_fixfix[1] = FREQ_RES_LOW; /* set low frequency resolution for split frames */ + } + if (bitRate < 52000) { + config->freq_res_fixfix[0] = FREQ_RES_LOW; /* set low frequency resolution for non-split frames */ + config->fResTransIsLow = 1; /* for transient frames, set low frequency resolution */ + } + break; + default: + break; + } + if (bitRate <= 28000) { + /* + additionally restrict frequency resolution in FIXFIX frames + to further reduce SBR payload size */ + config->freq_res_fixfix[0] = FREQ_RES_LOW; + config->freq_res_fixfix[1] = FREQ_RES_LOW; + } + } + /* adjust usage of parametric coding dependent on bitrate and speech config flag */ if (useSpeechConfig) config->parametricCoding = 0; @@ -515,6 +583,7 @@ static UINT FDKsbrEnc_InitializeSbrDefaults (sbrConfigurationPtr config, INT downSampleFactor, UINT codecGranuleLen + ,const INT isLowDelay ) { if ( (downSampleFactor < 1 || downSampleFactor > 2) || @@ -525,7 +594,11 @@ FDKsbrEnc_InitializeSbrDefaults (sbrConfigurationPtr config, config->useWaveCoding = 0; config->crcSbr = 0; config->dynBwSupported = 1; - config->tran_thr = 13000; + if (isLowDelay) + config->tran_thr = 6000; + else + config->tran_thr = 13000; + config->parametricCoding = 1; config->sbrFrameSize = codecGranuleLen * downSampleFactor; @@ -558,7 +631,9 @@ FDKsbrEnc_InitializeSbrDefaults (sbrConfigurationPtr config, config->noiseFloorOffset = 0; config->startFreq = 5; /* 5.9 respectively 6.0 kHz at fs = 44.1/48 kHz */ config->stopFreq = 9; /* 16.2 respectively 16.8 kHz at fs = 44.1/48 kHz */ - + config->freq_res_fixfix[0] = FREQ_RES_HIGH; /* non-split case */ + config->freq_res_fixfix[1] = FREQ_RES_HIGH; /* split case */ + config->fResTransIsLow = 0; /* for transient frames, set variable frequency resolution according to freqResTable */ /* header_extra_1 */ config->freqScale = SBR_FREQ_SCALE_DEFAULT; @@ -1206,7 +1281,10 @@ initEnvChannel ( HANDLE_SBR_CONFIG_DATA sbrConfigData, FDK_ASSERT(params->e >= 0); - hEnv->encEnvData.freq_res_fixfix = 1; + hEnv->encEnvData.freq_res_fixfix[0] = params->freq_res_fixfix[0]; + hEnv->encEnvData.freq_res_fixfix[1] = params->freq_res_fixfix[1]; + hEnv->encEnvData.fResTransIsLow = params->fResTransIsLow; + hEnv->fLevelProtect = 0; hEnv->encEnvData.ldGrid = (sbrConfigData->sbrSyntaxFlags & SBR_SYNTAX_LOW_DELAY) ? 1 : 0; @@ -1348,11 +1426,29 @@ initEnvChannel ( HANDLE_SBR_CONFIG_DATA sbrConfigData, e, params->stat, timeSlots, - hEnv->encEnvData.freq_res_fixfix - ,hEnv->encEnvData.ldGrid + hEnv->encEnvData.freq_res_fixfix, + hEnv->encEnvData.fResTransIsLow, + hEnv->encEnvData.ldGrid ); + if(sbrConfigData->sbrSyntaxFlags & SBR_SYNTAX_LOW_DELAY) + { + INT bandwidth_qmf_slot = (sbrConfigData->sampleFreq>>1) / (sbrConfigData->noQmfBands); + if(FDKsbrEnc_InitSbrFastTransientDetector( + &hEnv->sbrFastTransientDetector, + sbrConfigData->noQmfSlots, + bandwidth_qmf_slot, + sbrConfigData->noQmfBands, + sbrConfigData->freqBandTable[0][0] + )) + return(1); + } + + /* The transient detector has to be initialized also if the fast transient + detector was active, because the values from the transient detector + structure are used. */ if(FDKsbrEnc_InitSbrTransientDetector (&hEnv->sbrTransientDetector, + sbrConfigData->sbrSyntaxFlags, sbrConfigData->frameSize, sbrConfigData->sampleFreq, params, @@ -1658,6 +1754,8 @@ INT FDKsbrEnc_EnvInit ( /* other switches */ hSbrElement->sbrConfigData.useWaveCoding = params->useWaveCoding; hSbrElement->sbrConfigData.useParametricCoding = params->parametricCoding; + hSbrElement->sbrConfigData.thresholdAmpResFF_m = params->threshold_AmpRes_FF_m; + hSbrElement->sbrConfigData.thresholdAmpResFF_e = params->threshold_AmpRes_FF_e; /* init freq band table */ if(updateFreqBandTable(&hSbrElement->sbrConfigData, @@ -1999,7 +2097,8 @@ INT sbrEncoder_Init( */ if ( ! FDKsbrEnc_InitializeSbrDefaults ( &sbrConfig[el], *downSampleFactor, - coreFrameLength + coreFrameLength, + IS_LOWDELAY(aot) ) ) { error = 1; diff --git a/libSBRenc/src/sbr_rom.cpp b/libSBRenc/src/sbr_rom.cpp index a2b6527..7a51668 100644 --- a/libSBRenc/src/sbr_rom.cpp +++ b/libSBRenc/src/sbr_rom.cpp @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -684,6 +684,9 @@ const sbrTuningTable_t sbrTuningTable[] = /** AAC LOW DELAY SECTION **/ + /* 24 kHz dual rate - 12kHz singlerate is not allowed (deactivated in FDKsbrEnc_IsSbrSettingAvail()) */ + { CODEC_AACLD, 8000, 32000, 12000, 1, 1, 1, 0, 0, 1, 0, 6, SBR_MONO, 3 }, /* nominal: 8 kbit/s */ + /*** mono ***/ /* 16/32 kHz dual rate not yet tuned ->alb copied from non LD tables*/ { CODEC_AACLD, 16000, 18000, 16000, 1, 4, 5, 9, 7, 1, 0, 6, SBR_MONO, 3 }, /* nominal: 16 kbit/s wrr: tuned */ @@ -702,10 +705,10 @@ const sbrTuningTable_t sbrTuningTable[] = { CODEC_AACLD, 52000, 64001, 22050, 1, 13,11,11,10, 2, 0, 3, SBR_MONO, 1 }, /* nominal: 56 kbit/s */ /* 24/48 kHz dual rate */ - { CODEC_AACLD, 20000, 22000, 24000, 1, 4, 1, 8, 4, 2, 3, 6, SBR_MONO, 2 }, /* nominal: 20 kbit/s */ + { CODEC_AACLD, 20000, 22000, 24000, 1, 3, 4, 8, 8, 2, 0, 6, SBR_MONO, 2 }, /* nominal: 20 kbit/s */ { CODEC_AACLD, 22000, 28000, 24000, 1, 3, 8, 8, 7, 2, 0, 3, SBR_MONO, 2 }, /* nominal: 24 kbit/s */ { CODEC_AACLD, 28000, 36000, 24000, 1, 4, 8, 8, 7, 2, 0, 3, SBR_MONO, 2 }, /* nominal: 32 kbit/s */ - { CODEC_AACLD, 36000, 56000, 24000, 1, 8, 9, 9, 9, 2, 0, 3, SBR_MONO, 1 }, /* nominal: 40 kbit/s */ + { CODEC_AACLD, 36000, 56000, 24000, 1, 8, 9, 9, 8, 2, 0, 3, SBR_MONO, 1 }, /* nominal: 40 kbit/s */ { CODEC_AACLD, 56000, 64001, 24000, 1, 13,11,11,10, 2, 0, 3, SBR_MONO, 1 }, /* nominal: 64 kbit/s */ /* 32/64 kHz dual rate */ /* placebo settings */ /*jgr: new, copy from CODEC_AAC */ @@ -722,7 +725,7 @@ const sbrTuningTable_t sbrTuningTable[] = { CODEC_AACLD, 100000,160001, 44100, 1, 13,13,11,11, 2, 0, 3, SBR_MONO, 1 }, /* nominal: 128 */ /* 48/96 kHz dual rate */ /* 32 and 40kbps line tuned for dual-rate SBR */ - { CODEC_AACLD, 36000, 60000, 48000, 1, 8, 7, 6, 9, 2, 0, 3, SBR_MONO, 2 }, /* nominal: 40 */ + { CODEC_AACLD, 36000, 60000, 48000, 1, 4, 7, 4, 4, 2, 0, 3, SBR_MONO, 3 }, /* nominal: 40 */ { CODEC_AACLD, 60000, 72000, 48000, 1, 9, 9,10,10, 2, 0, 3, SBR_MONO, 1 }, /* nominal: 64 */ { CODEC_AACLD, 72000,100000, 48000, 1, 11,11,11,11, 2, 0, 3, SBR_MONO, 1 }, /* nominal: 80 */ { CODEC_AACLD, 100000,160001, 48000, 1, 13,13,11,11, 2, 0, 3, SBR_MONO, 1 }, /* nominal: 128 */ diff --git a/libSBRenc/src/ton_corr.cpp b/libSBRenc/src/ton_corr.cpp index 224da11..af5afba 100644 --- a/libSBRenc/src/ton_corr.cpp +++ b/libSBRenc/src/ton_corr.cpp @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -682,7 +682,7 @@ FDKsbrEnc_InitTonCorrParamExtr (INT frameSize, /*!< Current /* Reset the patching and allocate memory for the quota matrix. - Assing parameters for the LPC analysis. + Assuming parameters for the LPC analysis. */ if (sbrCfg->sbrSyntaxFlags & SBR_SYNTAX_LOW_DELAY) { switch (timeSlots) { @@ -690,7 +690,7 @@ FDKsbrEnc_InitTonCorrParamExtr (INT frameSize, /*!< Current hTonCorr->lpcLength[0] = 8 - LPC_ORDER; hTonCorr->lpcLength[1] = 7 - LPC_ORDER; hTonCorr->numberOfEstimates = NO_OF_ESTIMATES_LD; - hTonCorr->numberOfEstimatesPerFrame = sbrCfg->noQmfSlots / 7; + hTonCorr->numberOfEstimatesPerFrame = 2; /* sbrCfg->noQmfSlots / 7 */ hTonCorr->frameStartIndexInvfEst = 0; hTonCorr->transientPosOffset = FRAME_MIDDLE_SLOT_512LD; break; @@ -698,7 +698,7 @@ FDKsbrEnc_InitTonCorrParamExtr (INT frameSize, /*!< Current hTonCorr->lpcLength[0] = 8 - LPC_ORDER; hTonCorr->lpcLength[1] = 8 - LPC_ORDER; hTonCorr->numberOfEstimates = NO_OF_ESTIMATES_LD; - hTonCorr->numberOfEstimatesPerFrame = sbrCfg->noQmfSlots / 8; + hTonCorr->numberOfEstimatesPerFrame = 2; /* sbrCfg->noQmfSlots / 8 */ hTonCorr->frameStartIndexInvfEst = 0; hTonCorr->transientPosOffset = FRAME_MIDDLE_SLOT_512LD; break; diff --git a/libSBRenc/src/ton_corr.h b/libSBRenc/src/ton_corr.h index 8c8425c..504ab03 100644 --- a/libSBRenc/src/ton_corr.h +++ b/libSBRenc/src/ton_corr.h @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -118,7 +118,7 @@ typedef struct INT bufferLength; /*!< Length of the r and i buffers. */ INT stepSize; /*!< Stride for the lpc estimate. */ INT numberOfEstimates; /*!< The total number of estiamtes, available in the quotaMatrix.*/ - INT numberOfEstimatesPerFrame; /*!< The number of estimates per frame available in the quotaMatrix.*/ + UINT numberOfEstimatesPerFrame; /*!< The number of estimates per frame available in the quotaMatrix.*/ INT lpcLength[2]; /*!< Segment length used for second order LPC analysis.*/ INT nextSample; /*!< Where to start the LPC analysis of the current frame.*/ INT move; /*!< How many estimates to move in the quotaMatrix, when buffering. */ diff --git a/libSBRenc/src/tran_det.cpp b/libSBRenc/src/tran_det.cpp index 6c62b4c..33ea60e 100644 --- a/libSBRenc/src/tran_det.cpp +++ b/libSBRenc/src/tran_det.cpp @@ -89,7 +89,7 @@ amm-info@iis.fraunhofer.de #include "genericStds.h" -#define NORM_QMF_ENERGY 5.684341886080801486968994140625e-14 /* 2^-44 */ +#define NORM_QMF_ENERGY 9.31322574615479E-10 /* 2^-30 */ /* static FIXP_DBL ABS_THRES = fixMax( FL2FXCONST_DBL(1.28e5 * NORM_QMF_ENERGY), (FIXP_DBL)1) Minimum threshold for detecting changes */ #define ABS_THRES ((FIXP_DBL)16) @@ -106,22 +106,30 @@ amm-info@iis.fraunhofer.de \return calculated value *******************************************************************************/ +#define NRG_SHIFT 3 /* for energy summation */ + static FIXP_DBL spectralChange(FIXP_DBL Energies[NUMBER_TIME_SLOTS_2304][MAX_FREQ_COEFFS], INT *scaleEnergies, FIXP_DBL EnergyTotal, INT nSfb, INT start, INT border, - INT stop) + INT YBufferWriteOffset, + INT stop, + INT *result_e) { INT i,j; INT len1,len2; - FIXP_DBL delta,tmp0,tmp1,tmp2; - FIXP_DBL accu1,accu2,delta_sum,result; + SCHAR energies_e_diff[NUMBER_TIME_SLOTS_2304], energies_e, energyTotal_e=19, energies_e_add; + SCHAR prevEnergies_e_diff, newEnergies_e_diff; + FIXP_DBL tmp0,tmp1; + FIXP_DBL accu1,accu2,accu1_init,accu2_init; + FIXP_DBL delta, delta_sum; + INT accu_e, tmp_e; - FDK_ASSERT(scaleEnergies[0] >= 0); + delta_sum = FL2FXCONST_DBL(0.0f); + *result_e = 0; - /* equal for aac (would be not equal for mp3) */ len1 = border-start; len2 = stop-border; @@ -130,43 +138,91 @@ static FIXP_DBL spectralChange(FIXP_DBL Energies[NUMBER_TIME_SLOTS_2304][MAX_FRE pos_weight = FL2FXCONST_DBL(0.5f) - (len1*GetInvInt(len1+len2)); pos_weight = /*FL2FXCONST_DBL(1.0)*/ (FIXP_DBL)MAXVAL_DBL - (fMult(pos_weight, pos_weight)<<2); - delta_sum = FL2FXCONST_DBL(0.0f); + /*** Calc scaling for energies ***/ + FDK_ASSERT(scaleEnergies[0] >= 0); + FDK_ASSERT(scaleEnergies[1] >= 0); + + energies_e = 19 - FDKmin(scaleEnergies[0], scaleEnergies[1]); + + /* limit shift for energy accumulation, energies_e can be -10 min. */ + if (energies_e < -10) { + energies_e_add = -10 - energies_e; + energies_e = -10; + } else if (energies_e > 17) { + energies_e_add = energies_e - 17; + energies_e = 17; + } else { + energies_e_add = 0; + } + + /* compensate scaling differences between scaleEnergies[0] and scaleEnergies[1] */ + prevEnergies_e_diff = scaleEnergies[0] - FDKmin(scaleEnergies[0], scaleEnergies[1]) + energies_e_add + NRG_SHIFT; + newEnergies_e_diff = scaleEnergies[1] - FDKmin(scaleEnergies[0], scaleEnergies[1]) + energies_e_add + NRG_SHIFT; + + prevEnergies_e_diff = fMin(prevEnergies_e_diff, DFRACT_BITS-1); + newEnergies_e_diff = fMin(newEnergies_e_diff, DFRACT_BITS-1); + + for (i=start; i>NRG_SCALE; /* complex init for compare with original version */ - accu2 = ((FL2FXCONST_DBL((1.0e6*NORM_QMF_ENERGY*8.0/32))) << fixMin(scaleEnergies[0],25))>>NRG_SCALE; /* can be simplified in dsp implementation */ + + accu1 = accu1_init; + accu2 = accu2_init; + accu_e = energies_e+3; /* Sum up energies in first half */ for (i=start; i>NRG_SCALE); + accu1 += scaleValue(Energies[i][j], -energies_e_diff[i]); } /* Sum up energies in second half */ for (i=border; i>NRG_SCALE); + accu2 += scaleValue(Energies[i][j], -energies_e_diff[i]); } /* Energy change in current band */ - tmp0 = CalcLdData(accu2); - tmp1 = CalcLdData(accu1); - tmp2 = (tmp0 - tmp1 + CalcLdData(len1)-CalcLdData(len2)); - delta = fixp_abs(fMult(tmp2, FL2FXCONST_DBL(0.6931471806f))); + #define LN2 FL2FXCONST_DBL(0.6931471806f) /* ln(2) */ + tmp0 = fLog2(accu2, accu_e) - fLog2(accu1, accu_e); + tmp1 = fLog2((FIXP_DBL)len1, 31) - fLog2((FIXP_DBL)len2, 31); + delta = fMult(LN2, (tmp0 + tmp1)); + delta = (FIXP_DBL)FDKabs( delta ); /* Weighting with amplitude ratio of this band */ - result = (EnergyTotal == FL2FXCONST_DBL(0.0f)) - ? FL2FXCONST_DBL(0.f) - : FDKsbrEnc_LSI_divide_scale_fract( (accu1+accu2), - (EnergyTotal>>NRG_SCALE)+(FIXP_DBL)1, - (FIXP_DBL)MAXVAL_DBL >> fixMin(scaleEnergies[0],(DFRACT_BITS-1)) ); + accu_e++; + accu1>>=1; + accu2>>=1; + if (accu_e & 1) { + accu_e++; + accu1>>=1; + accu2>>=1; + } - delta_sum += (FIXP_DBL)(fMult(sqrtFixp(result), delta)); + delta_sum += fMult(sqrtFixp(accu1+accu2), delta); + *result_e = ((accu_e>>1) + LD_DATA_SHIFT); } + energyTotal_e+=1; /* for a defined square result exponent, the exponent has to be even */ + EnergyTotal<<=1; + delta_sum = fMult(delta_sum, invSqrtNorm2(EnergyTotal, &tmp_e)); + *result_e = *result_e + (tmp_e-(energyTotal_e>>1)); + return fMult(delta_sum, pos_weight); + } @@ -175,9 +231,12 @@ static FIXP_DBL spectralChange(FIXP_DBL Energies[NUMBER_TIME_SLOTS_2304][MAX_FRE ******************************************************************************* \brief Calculates total lowband energy - The return value nrgTotal is scaled by the factor (1/32.0) + The input values Energies[0] (low-band) are scaled by the factor + 2^(14-*scaleEnergies[0]) + The input values Energies[1] (high-band) are scaled by the factor + 2^(14-*scaleEnergies[1]) - \return total energy in the lowband + \return total energy in the lowband, scaled by the factor 2^19 *******************************************************************************/ static FIXP_DBL addLowbandEnergies(FIXP_DBL **Energies, int *scaleEnergies, @@ -194,6 +253,7 @@ static FIXP_DBL addLowbandEnergies(FIXP_DBL **Energies, int ts,k; /* Sum up lowband energy from one frame at offset tran_off */ + /* freqBandTable[LORES] has MAX_FREQ_COEFFS/2 +1 coeefs max. */ for (ts=tran_offdiv2; ts> 6; @@ -201,12 +261,12 @@ static FIXP_DBL addLowbandEnergies(FIXP_DBL **Energies, } for (; ts>nrgSzShift); ts++) { for (k = 0; k < freqBandTable[0]; k++) { - accu2 += Energies[ts][k] >> 6; + accu2 += Energies[ts][k] >> 9; } } - nrgTotal = ( (accu1 >> fixMin(scaleEnergies[0],(DFRACT_BITS-1))) - + (accu2 >> fixMin(scaleEnergies[1],(DFRACT_BITS-1))) ) << (2); + nrgTotal = ( scaleValueSaturate(accu1, 1-scaleEnergies[0]) ) + + ( scaleValueSaturate(accu2, 4-scaleEnergies[1]) ); return(nrgTotal); } @@ -222,21 +282,23 @@ static FIXP_DBL addLowbandEnergies(FIXP_DBL **Energies, is 1 SBR-band. Therefore the data to be fed into the spectralChange function is reduced. - The values EnergiesM are scaled by the factor (1/32.0) and scaleEnergies[0] - The return value nrgTotal is scaled by the factor (1/32.0) + The values EnergiesM are scaled by the factor (2^19-scaleEnergies[0]) for + slots=YBufferWriteOffset. - \return total energy in the highband + \return total energy in the highband, scaled by factor 2^19 *******************************************************************************/ static FIXP_DBL addHighbandEnergies(FIXP_DBL **RESTRICT Energies, /*!< input */ INT *scaleEnergies, + INT YBufferWriteOffset, FIXP_DBL EnergiesM[NUMBER_TIME_SLOTS_2304][MAX_FREQ_COEFFS], /*!< Combined output */ UCHAR *RESTRICT freqBandTable, INT nSfb, INT sbrSlots, INT timeStep) { - INT i,j,k,slotIn,slotOut,scale; + INT i,j,k,slotIn,slotOut,scale[2]; INT li,ui; FIXP_DBL nrgTotal; FIXP_DBL accu = FL2FXCONST_DBL(0.0f); @@ -245,7 +307,7 @@ static FIXP_DBL addHighbandEnergies(FIXP_DBL **RESTRICT Energies, /*!< input */ combine QMF-bands to SBR-bands, combine Left and Right channel */ for (slotOut=0; slotOut (DFRACT_BITS-1) ) + if ((scaleEnergies[0]-scale[0]) > (DFRACT_BITS-1) || (scaleEnergies[1]-scale[0]) > (DFRACT_BITS-1)) nrgTotal = FL2FXCONST_DBL(0.0f); else { /* Now add all energies */ accu = FL2FXCONST_DBL(0.0f); - for (slotOut=0; slotOut> scale); + accu += (EnergiesM[slotOut][j] >> scale[0]); } } - nrgTotal = accu >> (scaleEnergies[0]-scale); + nrgTotal = accu >> (scaleEnergies[0]-scale[0]); + + for (slotOut=YBufferWriteOffset; slotOut> scale[0]); + } + } + nrgTotal = accu >> (scaleEnergies[1]-scale[1]); } return(nrgTotal); @@ -299,18 +371,23 @@ FDKsbrEnc_frameSplitter(FIXP_DBL **Energies, int YBufferSzShift, int nSfb, int timeStep, - int no_cols) + int no_cols, + FIXP_DBL* tonality) { if (tran_vector[1]==0) /* no transient was detected */ { FIXP_DBL delta; - FIXP_DBL EnergiesM[NUMBER_TIME_SLOTS_2304][MAX_FREQ_COEFFS]; + INT delta_e; + FIXP_DBL (*EnergiesM)[MAX_FREQ_COEFFS]; FIXP_DBL EnergyTotal,newLowbandEnergy,newHighbandEnergy; INT border; INT sbrSlots = fMultI(GetInvInt(timeStep),no_cols); + C_ALLOC_SCRATCH_START(_EnergiesM, FIXP_DBL, NUMBER_TIME_SLOTS_2304*MAX_FREQ_COEFFS) FDK_ASSERT( sbrSlots * timeStep == no_cols ); + EnergiesM = (FIXP_DBL(*)[MAX_FREQ_COEFFS])_EnergiesM; + /* Get Lowband-energy over a range of 2 frames (Look half a frame back and ahead). */ @@ -324,16 +401,13 @@ FDKsbrEnc_frameSplitter(FIXP_DBL **Energies, newHighbandEnergy = addHighbandEnergies(Energies, scaleEnergies, + YBufferWriteOffset, EnergiesM, freqBandTable, nSfb, sbrSlots, timeStep); - if ( h_sbrTransientDetector->frameShift != 0 ) { - if (tran_vector[1]==0) - tran_vector[0] = 0; - } else { /* prevLowBandEnergy: Corresponds to 1 frame, starting with half a frame look-behind newLowbandEnergy: Corresponds to 1 frame, starting in the middle of the current frame */ @@ -343,23 +417,39 @@ FDKsbrEnc_frameSplitter(FIXP_DBL **Energies, of a FIXFIX-frame with 2 envelopes. */ border = (sbrSlots+1) >> 1; + if ( (INT)EnergyTotal&0xffffffe0 && (scaleEnergies[0]<32 || scaleEnergies[1]<32) ) /* i.e. > 31 */ { delta = spectralChange(EnergiesM, scaleEnergies, EnergyTotal, nSfb, 0, border, - sbrSlots); + YBufferWriteOffset, + sbrSlots, + &delta_e + ); + } else { + delta = FL2FXCONST_DBL(0.0f); + delta_e = 0; + + /* set tonality to 0 when energy is very low, since the amplitude + resolution should then be low as well */ + *tonality = FL2FXCONST_DBL(0.0f); + } + - if (delta > (h_sbrTransientDetector->split_thr >> LD_DATA_SHIFT)) /* delta scaled by 1/64 */ + if ( fIsLessThan(h_sbrTransientDetector->split_thr_m, h_sbrTransientDetector->split_thr_e, delta, delta_e) ) { tran_vector[0] = 1; /* Set flag for splitting */ - else + } else { tran_vector[0] = 0; + } + } /* Update prevLowBandEnergy */ h_sbrTransientDetector->prevLowBandEnergy = newLowbandEnergy; h_sbrTransientDetector->prevHighBandEnergy = newHighbandEnergy; + C_ALLOC_SCRATCH_END(_EnergiesM, FIXP_DBL, NUMBER_TIME_SLOTS_2304*MAX_FREQ_COEFFS) } } @@ -636,6 +726,7 @@ FDKsbrEnc_transientDetect(HANDLE_SBR_TRANSIENT_DETECTOR h_sbrTran, int FDKsbrEnc_InitSbrTransientDetector(HANDLE_SBR_TRANSIENT_DETECTOR h_sbrTransientDetector, + UINT sbrSyntaxFlags, /* SBR syntax flags derived from AOT. */ INT frameSize, INT sampleFreq, sbrConfigurationPtr params, @@ -649,8 +740,8 @@ FDKsbrEnc_InitSbrTransientDetector(HANDLE_SBR_TRANSIENT_DETECTOR h_sbrTransientD { INT totalBitrate = params->codecSettings.standardBitrate * params->codecSettings.nChannels; INT codecBitrate = params->codecSettings.bitRate; - FIXP_DBL bitrateFactor_fix, framedur_fix; - INT scale_0, scale_1; + FIXP_DBL bitrateFactor_m, framedur_fix; + INT bitrateFactor_e, tmp_e; FDKmemclear(h_sbrTransientDetector,sizeof(SBR_TRANSIENT_DETECTOR)); @@ -658,11 +749,12 @@ FDKsbrEnc_InitSbrTransientDetector(HANDLE_SBR_TRANSIENT_DETECTOR h_sbrTransientD h_sbrTransientDetector->tran_off = tran_off; if(codecBitrate) { - bitrateFactor_fix = fDivNorm((FIXP_DBL)totalBitrate, (FIXP_DBL)(codecBitrate<<2),&scale_0); + bitrateFactor_m = fDivNorm((FIXP_DBL)totalBitrate, (FIXP_DBL)(codecBitrate<<2),&bitrateFactor_e); + bitrateFactor_e += 2; } else { - bitrateFactor_fix = FL2FXCONST_DBL(1.0/4.0); - scale_0 = 0; + bitrateFactor_m = FL2FXCONST_DBL(1.0/4.0); + bitrateFactor_e = 2; } framedur_fix = fDivNorm(frameSize, sampleFreq); @@ -674,9 +766,13 @@ FDKsbrEnc_InitSbrTransientDetector(HANDLE_SBR_TRANSIENT_DETECTOR h_sbrTransientD FIXP_DBL tmp = framedur_fix - FL2FXCONST_DBL(0.010); tmp = fixMax(tmp, FL2FXCONST_DBL(0.0001)); - tmp = fDivNorm(FL2FXCONST_DBL(0.000075), fPow2(tmp), &scale_1); + tmp = fDivNorm(FL2FXCONST_DBL(0.000075), fPow2(tmp), &tmp_e); - scale_1 = (scale_1 + scale_0 + 2); + bitrateFactor_e = (tmp_e + bitrateFactor_e); + + if(sbrSyntaxFlags & SBR_SYNTAX_LOW_DELAY) { + bitrateFactor_e--; /* divide by 2 */ + } FDK_ASSERT(no_cols <= QMF_MAX_TIME_SLOTS); FDK_ASSERT(no_rows <= QMF_CHANNELS); @@ -684,7 +780,8 @@ FDKsbrEnc_InitSbrTransientDetector(HANDLE_SBR_TRANSIENT_DETECTOR h_sbrTransientD h_sbrTransientDetector->no_cols = no_cols; h_sbrTransientDetector->tran_thr = (FIXP_DBL)((params->tran_thr << (32-24-1)) / no_rows); h_sbrTransientDetector->tran_fc = tran_fc; - h_sbrTransientDetector->split_thr = scaleValueSaturate(fMult(tmp, bitrateFactor_fix), scale_1); + h_sbrTransientDetector->split_thr_m = fMult(tmp, bitrateFactor_m); + h_sbrTransientDetector->split_thr_e = bitrateFactor_e; h_sbrTransientDetector->no_rows = no_rows; h_sbrTransientDetector->mode = params->tran_det_mode; h_sbrTransientDetector->prevLowBandEnergy = FL2FXCONST_DBL(0.0f); @@ -692,3 +789,281 @@ FDKsbrEnc_InitSbrTransientDetector(HANDLE_SBR_TRANSIENT_DETECTOR h_sbrTransientD return (0); } + +#define ENERGY_SCALING_SIZE 32 + +INT FDKsbrEnc_InitSbrFastTransientDetector( + HANDLE_FAST_TRAN_DET h_sbrFastTransientDetector, + const INT time_slots_per_frame, + const INT bandwidth_qmf_slot, + const INT no_qmf_channels, + const INT sbr_qmf_1st_band + ) +{ + + int i, e; + int buff_size; + FIXP_DBL myExp; + FIXP_DBL myExpSlot; + + h_sbrFastTransientDetector->lookahead = TRAN_DET_LOOKAHEAD; + h_sbrFastTransientDetector->nTimeSlots = time_slots_per_frame; + + buff_size = h_sbrFastTransientDetector->nTimeSlots + h_sbrFastTransientDetector->lookahead; + + for(i=0; i< buff_size; i++) { + h_sbrFastTransientDetector->delta_energy[i] = FL2FXCONST_DBL(0.0f); + h_sbrFastTransientDetector->energy_timeSlots[i] = FL2FXCONST_DBL(0.0f); + h_sbrFastTransientDetector->lowpass_energy[i] = FL2FXCONST_DBL(0.0f); + h_sbrFastTransientDetector->transientCandidates[i] = 0; + } + + FDK_ASSERT(bandwidth_qmf_slot > 0.f); + h_sbrFastTransientDetector->stopBand = fMin(TRAN_DET_STOP_FREQ/bandwidth_qmf_slot, no_qmf_channels); + h_sbrFastTransientDetector->startBand = fMin(sbr_qmf_1st_band, h_sbrFastTransientDetector->stopBand - TRAN_DET_MIN_QMFBANDS); + + FDK_ASSERT(h_sbrFastTransientDetector->startBand < no_qmf_channels); + FDK_ASSERT(h_sbrFastTransientDetector->startBand < h_sbrFastTransientDetector->stopBand); + FDK_ASSERT(h_sbrFastTransientDetector->startBand > 1); + FDK_ASSERT(h_sbrFastTransientDetector->stopBand > 1); + + /* the energy weighting and adding up has a headroom of 6 Bits, + so up to 64 bands can be added without potential overflow. */ + FDK_ASSERT(h_sbrFastTransientDetector->stopBand - h_sbrFastTransientDetector->startBand <= 64); + + /* QMF_HP_dB_SLOPE_FIX says that we want a 20 dB per 16 kHz HP filter. + The following lines map this to the QMF bandwidth. */ + #define EXP_E 7 /* QMF_CHANNELS (=64) multiplications max, max. allowed sum is 0.5 */ + myExp = fMultNorm(QMF_HP_dBd_SLOPE_FIX, (FIXP_DBL)bandwidth_qmf_slot, &e); + myExp = scaleValueSaturate(myExp, e+0+DFRACT_BITS-1-EXP_E); + myExpSlot = myExp; + + for(i=0; idBf_m[i] = dBf_m; + h_sbrFastTransientDetector->dBf_e[i] = dBf_e; + + } + + /* Make sure that dBf is greater than 1.0 (because it should be a highpass) */ + /* ... */ + + return 0; +} + +void FDKsbrEnc_fastTransientDetect( + const HANDLE_FAST_TRAN_DET h_sbrFastTransientDetector, + const FIXP_DBL *const *Energies, + const int *const scaleEnergies, + const INT YBufferWriteOffset, + UCHAR *const tran_vector + ) +{ + int timeSlot, band; + + FIXP_DBL max_delta_energy; /* helper to store maximum energy ratio */ + int max_delta_energy_scale; /* helper to store scale of maximum energy ratio */ + int ind_max = 0; /* helper to store index of maximum energy ratio */ + int isTransientInFrame = 0; + + const int nTimeSlots = h_sbrFastTransientDetector->nTimeSlots; + const int lookahead = h_sbrFastTransientDetector->lookahead; + const int startBand = h_sbrFastTransientDetector->startBand; + const int stopBand = h_sbrFastTransientDetector->stopBand; + + int * transientCandidates = h_sbrFastTransientDetector->transientCandidates; + + FIXP_DBL * energy_timeSlots = h_sbrFastTransientDetector->energy_timeSlots; + int * energy_timeSlots_scale = h_sbrFastTransientDetector->energy_timeSlots_scale; + + FIXP_DBL * delta_energy = h_sbrFastTransientDetector->delta_energy; + int * delta_energy_scale = h_sbrFastTransientDetector->delta_energy_scale; + + const FIXP_DBL thr = TRAN_DET_THRSHLD; + const INT thr_scale = TRAN_DET_THRSHLD_SCALE; + + /*reset transient info*/ + tran_vector[2] = 0; + + /* reset transient candidates */ + FDKmemclear(transientCandidates+lookahead, nTimeSlots*sizeof(int)); + + for(timeSlot = lookahead; timeSlot < nTimeSlots + lookahead; timeSlot++) { + int i, norm; + FIXP_DBL tmpE = FL2FXCONST_DBL(0.0f); + int headroomEnSlot = DFRACT_BITS-1; + + FIXP_DBL smallNRG = FL2FXCONST_DBL(1e-2f); + FIXP_DBL denominator; + INT denominator_scale; + + /* determine minimum headroom of energy values for this timeslot */ + for(band = startBand; band < stopBand; band++) { + int tmp_headroom = fNormz(Energies[timeSlot][band])-1; + if(tmp_headroom < headroomEnSlot){ + headroomEnSlot = tmp_headroom; + } + } + + for(i = 0, band = startBand; band < stopBand; band++, i++) { + /* energy is weighted by weightingfactor stored in dBf_m array */ + /* dBf_m index runs from 0 to stopBand-startband */ + /* energy shifted by calculated headroom for maximum precision */ + FIXP_DBL weightedEnergy = fMult(Energies[timeSlot][band]<dBf_m[i]); + + /* energy is added up */ + /* shift by 6 to have a headroom for maximum 64 additions */ + /* shift by dBf_e to handle weighting factor dependent scale factors */ + tmpE += weightedEnergy >> (6 + (10 - h_sbrFastTransientDetector->dBf_e[i])); + } + + /* store calculated energy for timeslot */ + energy_timeSlots[timeSlot] = tmpE; + + /* calculate overall scale factor for energy of this timeslot */ + /* = original scale factor of energies (-scaleEnergies[0]+2*QMF_SCALE_OFFSET or -scaleEnergies[1]+2*QMF_SCALE_OFFSET */ + /* depending on YBufferWriteOffset) */ + /* + weighting factor scale (10) */ + /* + adding up scale factor ( 6) */ + /* - headroom of energy value (headroomEnSlot) */ + if(timeSlot < YBufferWriteOffset){ + energy_timeSlots_scale[timeSlot] = (-scaleEnergies[0]+2*QMF_SCALE_OFFSET) + (10+6) - headroomEnSlot; + } else { + energy_timeSlots_scale[timeSlot] = (-scaleEnergies[1]+2*QMF_SCALE_OFFSET) + (10+6) - headroomEnSlot; + } + + /* Add a small energy to the denominator, thus making the transient + detection energy-dependent. Loud transients are being detected, + silent ones not. */ + + /* make sure that smallNRG does not overflow */ + if ( -energy_timeSlots_scale[timeSlot-1] + 1 > 5 ) + { + denominator = smallNRG; + denominator_scale = 0; + } else { + /* Leave an additional headroom of 1 bit for this addition. */ + smallNRG = scaleValue(smallNRG, -(energy_timeSlots_scale[timeSlot-1] + 1)); + denominator = (energy_timeSlots[timeSlot-1]>>1) + smallNRG; + denominator_scale = energy_timeSlots_scale[timeSlot-1]+1; + } + + delta_energy[timeSlot] = fDivNorm(energy_timeSlots[timeSlot], denominator, &norm); + delta_energy_scale[timeSlot] = energy_timeSlots_scale[timeSlot] - denominator_scale + norm; + } + + /*get transient candidates*/ + /* For every timeslot, check if delta(E) exceeds the threshold. If it did, + it could potentially be marked as a transient candidate. However, the 2 + slots before the current one must not be transients with an energy higher + than 1.4*E(current). If both aren't transients or if the energy of the + current timesolot is more than 1.4 times higher than the energy in the + last or the one before the last slot, it is marked as a transient.*/ + + FDK_ASSERT(lookahead >= 2); + for(timeSlot = lookahead; timeSlot < nTimeSlots + lookahead; timeSlot++) { + FIXP_DBL energy_cur_slot_weighted = fMult(energy_timeSlots[timeSlot],FL2FXCONST_DBL(1.0f/1.4f)); + if( !fIsLessThan(delta_energy[timeSlot], delta_energy_scale[timeSlot], thr, thr_scale) && + ( ((transientCandidates[timeSlot-2]==0) && (transientCandidates[timeSlot-1]==0)) || + !fIsLessThan(energy_cur_slot_weighted, energy_timeSlots_scale[timeSlot], energy_timeSlots[timeSlot-1], energy_timeSlots_scale[timeSlot-1] ) || + !fIsLessThan(energy_cur_slot_weighted, energy_timeSlots_scale[timeSlot], energy_timeSlots[timeSlot-2], energy_timeSlots_scale[timeSlot-2] ) + ) + ) +{ + /* in case of strong transients, subsequent + * qmf slots might be recognized as transients. */ + transientCandidates[timeSlot] = 1; + } + } + + /*get transient with max energy*/ + max_delta_energy = FL2FXCONST_DBL(0.0f); + max_delta_energy_scale = 0; + ind_max = 0; + isTransientInFrame = 0; + for(timeSlot = 0; timeSlot < nTimeSlots; timeSlot++) { + int scale = fMax(delta_energy_scale[timeSlot], max_delta_energy_scale); + if(transientCandidates[timeSlot] && ( (delta_energy[timeSlot] >> (scale - delta_energy_scale[timeSlot])) > (max_delta_energy >> (scale - max_delta_energy_scale)) ) ) { + max_delta_energy = delta_energy[timeSlot]; + max_delta_energy_scale = scale; + ind_max = timeSlot; + isTransientInFrame = 1; + } + } + + /*from all transient candidates take the one with the biggest energy*/ + if(isTransientInFrame) { + tran_vector[0] = ind_max; + tran_vector[1] = 1; + } else { + /*reset transient info*/ + tran_vector[0] = tran_vector[1] = 0; + } + + /*check for transients in lookahead*/ + for(timeSlot = nTimeSlots; timeSlot < nTimeSlots + lookahead; timeSlot++) { + if(transientCandidates[timeSlot]) { + tran_vector[2] = 1; + } + } + + /*update buffers*/ + for(timeSlot = 0; timeSlot < lookahead; timeSlot++) { + transientCandidates[timeSlot] = transientCandidates[nTimeSlots + timeSlot]; + + /* fixpoint stuff */ + energy_timeSlots[timeSlot] = energy_timeSlots[nTimeSlots + timeSlot]; + energy_timeSlots_scale[timeSlot] = energy_timeSlots_scale[nTimeSlots + timeSlot]; + + delta_energy[timeSlot] = delta_energy[nTimeSlots + timeSlot]; + delta_energy_scale[timeSlot] = delta_energy_scale[nTimeSlots + timeSlot]; + } +} + diff --git a/libSBRenc/src/tran_det.h b/libSBRenc/src/tran_det.h index 95b5d2e..6fe1023 100644 --- a/libSBRenc/src/tran_det.h +++ b/libSBRenc/src/tran_det.h @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------------------------------------------- Software License for The Fraunhofer FDK AAC Codec Library for Android -© Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. +© Copyright 1995 - 2015 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. All rights reserved. 1. INTRODUCTION @@ -96,7 +96,8 @@ typedef struct FIXP_DBL transients[QMF_MAX_TIME_SLOTS+(QMF_MAX_TIME_SLOTS/2)]; FIXP_DBL thresholds[QMF_CHANNELS]; FIXP_DBL tran_thr; /* Master threshold for transient signals */ - FIXP_DBL split_thr; /* Threshold for splitting FIXFIX-frames into 2 env */ + FIXP_DBL split_thr_m; /* Threshold for splitting FIXFIX-frames into 2 env */ + INT split_thr_e; /* Scale for splitting threshold */ FIXP_DBL prevLowBandEnergy; /* Energy of low band */ FIXP_DBL prevHighBandEnergy; /* Energy of high band */ INT tran_fc; /* Number of lowband subbands to discard */ @@ -112,6 +113,57 @@ SBR_TRANSIENT_DETECTOR; typedef SBR_TRANSIENT_DETECTOR *HANDLE_SBR_TRANSIENT_DETECTOR; +#define TRAN_DET_LOOKAHEAD 2 +#define TRAN_DET_START_FREQ 4500 /*start frequency for transient detection*/ +#define TRAN_DET_STOP_FREQ 13500 /*stop frequency for transient detection*/ +#define TRAN_DET_MIN_QMFBANDS 4 /* minimum qmf bands for transient detection */ +#define QMF_HP_dBd_SLOPE_FIX FL2FXCONST_DBL(0.00075275f) /* 0.002266f/10 * log2(10) */ +#define TRAN_DET_THRSHLD FL2FXCONST_DBL(3.2f/4.f) +#define TRAN_DET_THRSHLD_SCALE (2) + +typedef struct +{ + INT transientCandidates[QMF_MAX_TIME_SLOTS + TRAN_DET_LOOKAHEAD]; + INT nTimeSlots; + INT lookahead; + INT startBand; + INT stopBand; + + FIXP_DBL dBf_m[QMF_CHANNELS]; + INT dBf_e[QMF_CHANNELS]; + + FIXP_DBL energy_timeSlots[QMF_MAX_TIME_SLOTS + TRAN_DET_LOOKAHEAD]; + INT energy_timeSlots_scale[QMF_MAX_TIME_SLOTS + TRAN_DET_LOOKAHEAD]; + + FIXP_DBL delta_energy[QMF_MAX_TIME_SLOTS + TRAN_DET_LOOKAHEAD]; + INT delta_energy_scale[QMF_MAX_TIME_SLOTS + TRAN_DET_LOOKAHEAD]; + + FIXP_DBL lowpass_energy[QMF_MAX_TIME_SLOTS + TRAN_DET_LOOKAHEAD]; + INT lowpass_energy_scale[QMF_MAX_TIME_SLOTS + TRAN_DET_LOOKAHEAD]; +#if defined (FTD_LOG) + FDKFILE *ftd_log; +#endif +} +FAST_TRAN_DETECTOR; +typedef FAST_TRAN_DETECTOR *HANDLE_FAST_TRAN_DET; + + +INT FDKsbrEnc_InitSbrFastTransientDetector( + HANDLE_FAST_TRAN_DET h_sbrFastTransientDetector, + const INT time_slots_per_frame, + const INT bandwidth_qmf_slot, + const INT no_qmf_channels, + const INT sbr_qmf_1st_band + ); + +void FDKsbrEnc_fastTransientDetect( + const HANDLE_FAST_TRAN_DET h_sbrFastTransientDetector, + const FIXP_DBL *const *Energies, + const int *const scaleEnergies, + const INT YBufferWriteOffset, + UCHAR *const tran_vector + ); + void FDKsbrEnc_transientDetect(HANDLE_SBR_TRANSIENT_DETECTOR h_sbrTransientDetector, FIXP_DBL **Energies, @@ -124,6 +176,7 @@ FDKsbrEnc_transientDetect(HANDLE_SBR_TRANSIENT_DETECTOR h_sbrTransientDetector, int FDKsbrEnc_InitSbrTransientDetector (HANDLE_SBR_TRANSIENT_DETECTOR h_sbrTransientDetector, + UINT sbrSyntaxFlags, /* SBR syntax flags derived from AOT. */ INT frameSize, INT sampleFreq, sbrConfigurationPtr params, @@ -145,6 +198,6 @@ FDKsbrEnc_frameSplitter(FIXP_DBL **Energies, int YBufferSzShift, int nSfb, int timeStep, - int no_cols); - + int no_cols, + FIXP_DBL* tonality); #endif -- cgit v1.2.3