diff options
author | Matthias P. Braendli <matthias.braendli@mpb.li> | 2016-02-15 02:44:20 +0100 |
---|---|---|
committer | Matthias P. Braendli <matthias.braendli@mpb.li> | 2016-02-15 02:44:20 +0100 |
commit | 22f1fce330059ef8a383cf327a023d6a9da5ad3e (patch) | |
tree | 6893f158dcaaaa1b9f1317923c32a841ba31f768 /libtoolame-dab/psycho_3.c | |
parent | 891bb2592944aa2be2d81e1583e73e632e70537f (diff) | |
download | fdk-aac-22f1fce330059ef8a383cf327a023d6a9da5ad3e.tar.gz fdk-aac-22f1fce330059ef8a383cf327a023d6a9da5ad3e.tar.bz2 fdk-aac-22f1fce330059ef8a383cf327a023d6a9da5ad3e.zip |
Include toolame-dab as library
Diffstat (limited to 'libtoolame-dab/psycho_3.c')
-rw-r--r-- | libtoolame-dab/psycho_3.c | 539 |
1 files changed, 539 insertions, 0 deletions
diff --git a/libtoolame-dab/psycho_3.c b/libtoolame-dab/psycho_3.c new file mode 100644 index 0000000..3dbd462 --- /dev/null +++ b/libtoolame-dab/psycho_3.c @@ -0,0 +1,539 @@ +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> +#include "common.h" +#include "options.h" +#include "encoder.h" +#include "mem.h" +#include "fft.h" +#include "ath.h" +#define OLDTHRESHx +#include "psycho_3.h" +#include "psycho_3priv.h" + +/* This is a reimplementation of psy model 1 using the ISO11172 standard. + I found the original dist10 code (which is full of pointers) to be + a horrible thing to try and understand and debug. + This implementation is not built for speed, but is rather meant to + clearly outline the steps specified by the standard (still, it's only + a tiny fraction slower than the dist10 code, and nothing has been optimized) + MFC Feb 2003 */ + +/* Keep a table to fudge the adding of dB */ +#define DBTAB 1000 +static double dbtable[DBTAB]; + +#define CRITBANDMAX 32 /* this is much higher than it needs to be. really only about 24 */ +int cbands=0; /* How many critical bands there really are */ +int cbandindex[CRITBANDMAX]; /* The spectral line index of the start of + each critical band */ + +#define SUBSIZE 136 +int freq_subset[SUBSIZE]; +FLOAT bark[HBLKSIZE], ath[HBLKSIZE]; + +int *numlines; +FLOAT *cbval; +int partition[HBLKSIZE]; +static D1408 *fft_buf; + +frame_header *header; + + +double psycho_3_add_db (double a, double b) +{ + /* MFC - if the difference between a and b is large (>99), then just return the + largest one. (about 10% of the time) + - For differences between 0 and 99, return the largest value, but add + in a pre-calculated difference value. + - the value 99 was chosen arbitarily. + - maximum (a-b) i've seen is 572 */ + FLOAT fdiff; + int idiff; + fdiff = (10.0 * (a - b)); + + if (fdiff > 990.0) { + return a; + } + if (fdiff < -990.0) { + return (b); + } + + idiff = (int) fdiff; + if (idiff >= 0) { + return (a + dbtable[idiff]); + } + + return (b + dbtable[-idiff]); +} + +void psycho_3 (short buffer[2][1152], double scale[2][SBLIMIT], + double ltmin[2][SBLIMIT], frame_info * frame, options *glopts) +{ + int nch = frame->nch; + int sblimit = frame->sblimit; + int k, i; + static char init = 0; + static int off[2] = { 256, 256 }; + FLOAT sample[BLKSIZE]; + + FLOAT energy[BLKSIZE]; + FLOAT power[HBLKSIZE]; + FLOAT Xtm[HBLKSIZE], Xnm[HBLKSIZE]; + int tonelabel[HBLKSIZE], noiselabel[HBLKSIZE]; + FLOAT LTg[HBLKSIZE]; + double Lsb[SBLIMIT]; + + header = frame->header; + + if (init==0) { + psycho_3_init(glopts); + init++; + } + + + for (k = 0; k < nch; k++) { + int ok = off[k] % 1408; + for (i = 0; i < 1152; i++) { + fft_buf[k][ok++] = (FLOAT) buffer[k][i] / SCALE; + if (ok >= 1408) + ok = 0; + } + ok = (off[k] + 1216) % 1408; + for (i = 0; i < BLKSIZE; i++) { + sample[i] = fft_buf[k][ok++]; + if (ok >= 1408) + ok = 0; + } + + off[k] += 1152; + off[k] %= 1408; + + psycho_3_fft(sample, energy); + psycho_3_powerdensityspectrum(energy, power); + psycho_3_spl(Lsb, power, &scale[k][0]); + psycho_3_tonal_label (power, tonelabel, Xtm); + psycho_3_noise_label (power, energy, tonelabel, noiselabel, Xnm); + if (glopts->verbosity > 20) + psycho_3_dump(tonelabel, Xtm, noiselabel, Xnm); + psycho_3_decimation(ath, tonelabel, Xtm, noiselabel, Xnm, bark); + psycho_3_threshold(LTg, tonelabel, Xtm, noiselabel, Xnm, bark, ath, bitrate[header->version][header->bitrate_index] / nch, freq_subset); + psycho_3_minimummasking(LTg, <min[k][0], freq_subset); + psycho_3_smr(<min[k][0], Lsb); + } +} + +/* ISO11172 Sec D.1 Step 1 - Window with HANN and then perform the FFT */ +void psycho_3_fft(FLOAT sample[BLKSIZE], FLOAT energy[BLKSIZE]) +{ + FLOAT x_real[BLKSIZE]; + int i; + static int init = 0; + static FLOAT *window; + + if (!init) { /* calculate window function for the Fourier transform */ + window = (FLOAT *) mem_alloc (sizeof (DFFT), "window"); + register FLOAT sqrt_8_over_3 = pow (8.0 / 3.0, 0.5); + for (i = 0; i < BLKSIZE; i++) { + window[i] = sqrt_8_over_3 * 0.5 * (1 - cos (2.0 * PI * i / (BLKSIZE))) / BLKSIZE; + } + init++; + } + + /* convolve the samples with the hann window */ + for (i = 0; i < BLKSIZE; i++) + x_real[i] = (FLOAT) (sample[i] * window[i]); + /* do the FFT */ + psycho_1_fft (x_real, energy, BLKSIZE); +} + +/* Sect D.1 Step 1 - convert the energies into dB */ +void psycho_3_powerdensityspectrum(FLOAT energy[BLKSIZE], FLOAT power[HBLKSIZE]) { + int i; + for (i=1;i<HBLKSIZE;i++) { + if (energy[i] < 1E-20) + power[i] = -200.0 + POWERNORM; + else + power[i] = 10 * log10 (energy[i]) + POWERNORM; + } +} + +/* Sect D.1 Step 2 - Determine the sound pressure level in each subband */ +void psycho_3_spl(double *Lsb, FLOAT *power, double *scale) { + int i; + FLOAT Xmax[SBLIMIT]; + + for (i=0;i<SBLIMIT;i++) { + Xmax[i] = DBMIN; + } + /* Find the maximum SPL in the power spectrum */ + for (i=1;i<HBLKSIZE;i++) { + int index = i>>4; + if (Xmax[index] < power[i]) + Xmax[index] = power[i]; + } + + /* Compare it to the sound pressure based upon the scale for this subband + and pick the maximum one */ + for (i=0;i<SBLIMIT;i++) { + double val = 20 * log10 (scale[i] * 32768) - 10; + Lsb[i] = MAX(Xmax[i], val); + } +} + +/* Sect D.1 Step 4 Label the Tonal Components */ +void psycho_3_tonal_label (FLOAT power[HBLKSIZE], int *tonelabel, FLOAT Xtm[HBLKSIZE]) +{ + int i; + int maxima[HBLKSIZE]; + + /* Find the maxima as per ISO11172 D.1.4.a */ + maxima[0]=maxima[HBLKSIZE-1]=0; + tonelabel[0]=tonelabel[HBLKSIZE-1]=0; + Xtm[0] = Xtm[HBLKSIZE-1] = DBMIN; + for (i=1;i<HBLKSIZE-1;i++) { + tonelabel[i] = 0; + Xtm[i] = DBMIN; + if (power[i]>power[i-1] && power[i]>power[i+1]) /* The first criteria for a maximum */ + maxima[i]=1; + else + maxima[i]=0; + } + + { + /* Now find the tones as per ISO11172 D.1 Step4.b */ + /* The standard is a bit vague (surprise surprise). + So I'm going to assume that + - a tone must be 7dB greater than *all* the relevant neighbours + - once a tone is found, the neighbours are immediately set to -inf dB + */ + + psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 2, 63, 2); + psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 63,127,3); + psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 127,255,6); + psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 255,500,12); + + } +} + +/* Sect D.1 Step4b + A tone within the range (start -> end), must be 7.0 dB greater than + all it's neighbours within +/- srange. Don't count its immediate neighbours. */ +void psycho_3_tonal_label_range(FLOAT *power, int *tonelabel, int *maxima, FLOAT *Xtm, int start, int end, int srange) { + int j,k; + + for (k=start;k<end;k++) /* Search for all the maxima in this range */ + if (maxima[k] == 1) { + tonelabel[k] = TONE; /* assume it's a TONE and then prove otherwise */ + for (j=-srange;j<=+srange;j++) /* Check the neighbours within +/- srange */ + if (abs(j) > 1) /* Don't count the immediate neighbours, or itself */ + if ((power[k] - power[k+j]) < 7.0) + tonelabel[k] = 0; /* Not greater by 7dB, therefore not a tone */ + if (tonelabel[k] == TONE) { + /* Calculate the sound pressure level for this tone by summing + the adjacent spectral lines + Xtm[k] = 10 * log10( pow(10.0, 0.1*power[k-1]) + pow(10.0, 0.1*power[k]) + + pow(10.0, 0.1*power[k+1]) ); */ + double temp = psycho_3_add_db(power[k-1], power[k]); + Xtm[k] = psycho_3_add_db(temp, power[k+1]); + + /* *ALL* spectral lines within +/- srange are set to -inf dB + So that when we do the noise calculate, they are not counted */ + for (j=-srange;j<=+srange;j++) + power[k+j] = DBMIN; + } + } +} + +void psycho_3_init_add_db (void) +{ + int i; + double x; + for (i = 0; i < DBTAB; i++) { + x = (double) i / 10.0; + dbtable[i] = 10 * log10 (1 + pow (10.0, x / 10.0)) - x; + } +} + +/* D.1 Step 4.c Labelling non-tonal (noise) components + Sum the energies in each critical band (the tone energies have been removed + during the tone labelling). + Find the "geometric mean" of these energies - i.e. find the best spot to put the + sum of energies within this critical band. */ +void psycho_3_noise_label (FLOAT power[HBLKSIZE], FLOAT energy[BLKSIZE], int *tonelabel, int *noiselabel, FLOAT Xnm[HBLKSIZE]) { + int i,j; + + Xnm[0] = DBMIN; + for (i=0;i<cbands;i++) { + /* for each critical band */ + double sum = DBMIN; + double esum=0; + double centreweight = 0; + int centre; + for (j=cbandindex[i]; j<cbandindex[i+1]; j++) { + Xnm[j] = DBMIN; + /* go through all the spectral lines within the critical band, + adding the energies. The tone energies have already been removed */ + if (power[j] != DBMIN) { + /* Found a noise energy, add it to the sum */ + sum = psycho_3_add_db(power[j], sum); + + /* calculations for the geometric mean + FIXME MFC Feb 2003: Would it just be easier to + do the *whole* of psycho_1 in the energy domain rather than + in the dB domain? + FIXME: This is just a lazy arsed arithmetic mean. Don't know + if it's really going to make that much difference */ + esum += energy[j]; /* Calculate the sum of energies */ + centreweight += (j - cbandindex[i]) * energy[j]; /* And the energy moment */ + } + } + + if (sum<=DBMIN) + /* If the energy sum is really small, just pretend the noise occurs + in the centre frequency line */ + centre = (cbandindex[i] + cbandindex[i+1])/2; + else + /* Otherwise, work out the mean position of the noise, and put it there. */ + centre = cbandindex[i] + (int)(centreweight/esum); + + Xnm[centre] = sum; + noiselabel[centre] = NOISE; + } +} + +/* ISO11172 D.1 Step 5 + Get rid of noise/tones that aren't greater than the ATH + If two tones are within 0.5bark, then delete the tone with the lower energy */ +void psycho_3_decimation(FLOAT *ath, int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm, FLOAT *bark) { + int i; + + /* Delete components which aren't above the ATH */ + for (i=1;i<HBLKSIZE;i++) { + if (noiselabel[i]==NOISE) { + if (Xnm[i] < ath[i]) { + /* this masker isn't above the ATH : delete it */ + Xnm[i] = DBMIN; + noiselabel[i]=0; + } + } + if (tonelabel[i] == TONE) { + if (Xtm[i] < ath[i]) { + Xtm[i] = DBMIN; + tonelabel[i]=0; + } + } + } + /* Search for tones that are within 0.5 bark */ + /* MFC FIXME Feb 2003: haven't done this yet */ + +} + +/* ISO11172 Sect D.1 Step 6 + Calculation of individual masking thresholds + Work out how each of the tones&noises maskes other frequencies + NOTE: Only a subset of other frequencies is checked. According to the + standard different subbands are subsampled to different amounts. + See psycho_3_init and freq_subset */ +void psycho_3_threshold(FLOAT *LTg, int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm, FLOAT *bark, FLOAT *ath, int bit_rate, int *freq_subset) { + int i,j,k; + FLOAT LTtm[SUBSIZE]; + FLOAT LTnm[SUBSIZE]; + + for (i=0;i<SUBSIZE;i++) { + LTtm[i] = DBMIN; + LTnm[i] = DBMIN; + } + /* Loop over the entire spectrum and find every noise and tone + And then with each noise/tone work out how it masks + the spectral lines around it */ + for (k=1;k<HBLKSIZE;k++) { + /* Find every tone */ + if (tonelabel[k]==TONE) { + for (j=0;j<SUBSIZE;j++) { + /* figure out how it masks the levels around it */ + FLOAT dz = bark[freq_subset[j]] - bark[k]; + if (dz >= -3.0 && dz < 8.0) { + FLOAT vf; + FLOAT av = -1.525 - 0.275 * bark[k] - 4.5 + Xtm[k]; + /* masking function for lower & upper slopes */ + if (dz < -1) + vf = 17 * (dz + 1) - (0.4 * Xtm[k] + 6); + else if (dz < 0) + vf = (0.4 * Xtm[k] + 6) * dz; + else if (dz < 1) + vf = (-17 * dz); + else + vf = -(dz - 1) * (17 - 0.15 * Xtm[k]) - 17; + LTtm[j] = psycho_3_add_db (LTtm[j], av + vf); + } + } + } + + /* find every noise label */ + if (noiselabel[k]==NOISE) { + for (j=0;j<SUBSIZE;j++) { + /* figure out how it masks the levels around it */ + FLOAT dz = bark[freq_subset[j]] - bark[k]; + if (dz >= -3.0 && dz < 8.0) { + FLOAT vf; + FLOAT av = -1.525 - 0.175 * bark[k] - 0.5 + Xnm[k]; + /* masking function for lower & upper slopes */ + if (dz < -1) + vf = 17 * (dz + 1) - (0.4 * Xnm[k] + 6); + else if (dz < 0) + vf = (0.4 * Xnm[k] + 6) * dz; + else if (dz < 1) + vf = (-17 * dz); + else + vf = -(dz - 1) * (17 - 0.15 * Xnm[k]) - 17; + LTnm[j] = psycho_3_add_db (LTnm[j], av + vf); + } + } + } + } + + /* ISO11172 D.1 Step 7 + Calculate the global masking threhold */ + for (i=0;i<SUBSIZE;i++) { + LTg[i] = psycho_3_add_db(LTnm[i], LTtm[i]); + if (bit_rate < 96) + LTg[i] = psycho_3_add_db(ath[freq_subset[i]], LTg[i]); + else + LTg[i] = psycho_3_add_db(ath[freq_subset[i]]-12.0, LTg[i]); + } +} + + /* Find the minimum LTg for each subband. ISO11172 Sec D.1 Step 8 */ +void psycho_3_minimummasking(FLOAT *LTg, double *LTmin, int *freq_subset) { + int i; + + for (i=0;i<SBLIMIT;i++) + LTmin[i] = 999999.9; + + for (i=0;i<SUBSIZE;i++) { + int index = freq_subset[i]>>4; + if (LTmin[index] > LTg[i]) { + LTmin[index] = LTg[i]; + } + } +} + +/* ISO11172 Sect D.1 Step 9 + Calculate the signal-to-mask ratio + MFC FIXME Feb 2003 for better calling from + toolame, add a "float SMR[]" array and return it */ +void psycho_3_smr(double *LTmin, double *Lsb) { + int i; + for (i=0;i<SBLIMIT;i++) { + LTmin[i] = Lsb[i] - LTmin[i]; + } +} + +void psycho_3_init(options *glopts) { + int i; + int cbase = 0; /* current base index for the bark range calculation */ + + fft_buf = (D1408 *) mem_alloc ((long) sizeof (D1408) * 2, "fft_buf"); + + /* Initialise the tables for the adding dB */ + psycho_3_init_add_db(); + + /* For each spectral line calculate the bark and the ATH (in dB) */ + FLOAT sfreq = (FLOAT) s_freq[header->version][header->sampling_frequency] * 1000; + for (i=1;i<HBLKSIZE; i++) { + FLOAT freq = i * sfreq/BLKSIZE; + bark[i] = freq2bark(freq); + ath[i] = ATH_dB(freq,glopts->athlevel); + } + + { /* Work out the critical bands + Starting from line 0, all lines within 1 bark of the starting + bark are added to the same critical band. When a line is greater + by 1.0 of a bark, start a new critical band. */ + + numlines = (int *)calloc(HBLKSIZE, sizeof(int)); + cbval = (float *)calloc(HBLKSIZE, sizeof(float)); + cbandindex[0] = 1; + for (i=1;i<HBLKSIZE;i++) { + if ((bark[i] - bark[cbase]) > 1.0) { /* 1 critical band? 1 bark? */ + /* this frequency line is too different from the starting line, + (in terms of the bark distance) + so make this spectral line the first member of the next critical band */ + cbase = i; /* Start the new critical band from this frequency line */ + cbands++; + cbandindex[cbands] = cbase; + } + /* partition[i] tells us which critical band the i'th frequency line is in */ + partition[i] = cbands; + /* keep a count of how many frequency lines are in each partition */ + numlines[cbands]++; + } + + cbands++; + cbandindex[cbands] = 513; /* Set the top of the last critical band */ + + /* For each crtical band calculate the average bark value + cbval [central bark value] */ + for (i=1;i<HBLKSIZE;i++) + cbval[partition[i]] += bark[i]; /* sum up all the bark values */ + for (i=1;i<CBANDS;i++) { + if (numlines[i] != 0) + cbval[i] /= numlines[i]; /* divide by the number of values */ + else { + cbval[i]=0; /* this isn't a partition */ + } + } + } + + { + /* For Step6 - For the calculation of individual masking thresholds + the spectral lines are subsampled + i.e. no need to work out the masking for every single spectral line. + Depending upon which subband the calculation is for, you + can skip a number of lines + There are 16 lines per subband -> 32 * 16 = 512 + Subband 0-2 : Every line (3 * 16 = 48 lines) + Subband 3-5 : Every Second line (3 * 16/2 = 24 lines) + Subband 6-11 : Every 4th line (6 * 16/4 = 24 lines) + Subband 12-31 : Every 12th line (20 * 16/8 = 40 lines) + + create this subset of frequencies (freq_subset) */ + int freq_index=0; + for (i=1;i<(3*16)+1;i++) + freq_subset[freq_index++] = i; + for (;i<(6*16)+1;i+=2) + freq_subset[freq_index++] = i; + for (;i<(12*16)+1;i+=4) + freq_subset[freq_index++] = i; + for (;i<(32*16)+1;i+=8) + freq_subset[freq_index++] = i; + } + + if (glopts->verbosity > 4) { + fprintf(stdout,"%i critical bands\n",cbands); + for (i=0;i<cbands;i++) + fprintf(stdout,"cband %i spectral line index %i\n",i,cbandindex[i]); + fprintf(stdout,"%i Subsampled spectral lines\n",SUBSIZE); + for (i=0;i<SUBSIZE;i++) + fprintf(stdout,"%i Spectral line %i Bark %.2f\n",i,freq_subset[i], bark[freq_subset[i]]); + } +} + +void psycho_3_dump(int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm) { + int i; + fprintf(stdout,"3 Ton:"); + for (i=1;i<HAN_SIZE;i++) { + if (tonelabel[i] == TONE) + fprintf(stdout,"[%i] %3.0f ",i,Xtm[i]); + } + fprintf(stdout,"\n"); + + fprintf(stdout,"3 Nos:"); + for (i=1;i<HAN_SIZE;i++) { + if (noiselabel[i] == NOISE) + fprintf(stdout,"[%i] %3.0f ",i,Xnm[i]); + } + fprintf(stdout,"\n"); +} |