From 22f1fce330059ef8a383cf327a023d6a9da5ad3e Mon Sep 17 00:00:00 2001 From: "Matthias P. Braendli" Date: Mon, 15 Feb 2016 02:44:20 +0100 Subject: Include toolame-dab as library --- libtoolame-dab/psycho_3.c | 539 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 539 insertions(+) create mode 100644 libtoolame-dab/psycho_3.c (limited to 'libtoolame-dab/psycho_3.c') diff --git a/libtoolame-dab/psycho_3.c b/libtoolame-dab/psycho_3.c new file mode 100644 index 0000000..3dbd462 --- /dev/null +++ b/libtoolame-dab/psycho_3.c @@ -0,0 +1,539 @@ +#include +#include +#include +#include +#include "common.h" +#include "options.h" +#include "encoder.h" +#include "mem.h" +#include "fft.h" +#include "ath.h" +#define OLDTHRESHx +#include "psycho_3.h" +#include "psycho_3priv.h" + +/* This is a reimplementation of psy model 1 using the ISO11172 standard. + I found the original dist10 code (which is full of pointers) to be + a horrible thing to try and understand and debug. + This implementation is not built for speed, but is rather meant to + clearly outline the steps specified by the standard (still, it's only + a tiny fraction slower than the dist10 code, and nothing has been optimized) + MFC Feb 2003 */ + +/* Keep a table to fudge the adding of dB */ +#define DBTAB 1000 +static double dbtable[DBTAB]; + +#define CRITBANDMAX 32 /* this is much higher than it needs to be. really only about 24 */ +int cbands=0; /* How many critical bands there really are */ +int cbandindex[CRITBANDMAX]; /* The spectral line index of the start of + each critical band */ + +#define SUBSIZE 136 +int freq_subset[SUBSIZE]; +FLOAT bark[HBLKSIZE], ath[HBLKSIZE]; + +int *numlines; +FLOAT *cbval; +int partition[HBLKSIZE]; +static D1408 *fft_buf; + +frame_header *header; + + +double psycho_3_add_db (double a, double b) +{ + /* MFC - if the difference between a and b is large (>99), then just return the + largest one. (about 10% of the time) + - For differences between 0 and 99, return the largest value, but add + in a pre-calculated difference value. + - the value 99 was chosen arbitarily. + - maximum (a-b) i've seen is 572 */ + FLOAT fdiff; + int idiff; + fdiff = (10.0 * (a - b)); + + if (fdiff > 990.0) { + return a; + } + if (fdiff < -990.0) { + return (b); + } + + idiff = (int) fdiff; + if (idiff >= 0) { + return (a + dbtable[idiff]); + } + + return (b + dbtable[-idiff]); +} + +void psycho_3 (short buffer[2][1152], double scale[2][SBLIMIT], + double ltmin[2][SBLIMIT], frame_info * frame, options *glopts) +{ + int nch = frame->nch; + int sblimit = frame->sblimit; + int k, i; + static char init = 0; + static int off[2] = { 256, 256 }; + FLOAT sample[BLKSIZE]; + + FLOAT energy[BLKSIZE]; + FLOAT power[HBLKSIZE]; + FLOAT Xtm[HBLKSIZE], Xnm[HBLKSIZE]; + int tonelabel[HBLKSIZE], noiselabel[HBLKSIZE]; + FLOAT LTg[HBLKSIZE]; + double Lsb[SBLIMIT]; + + header = frame->header; + + if (init==0) { + psycho_3_init(glopts); + init++; + } + + + for (k = 0; k < nch; k++) { + int ok = off[k] % 1408; + for (i = 0; i < 1152; i++) { + fft_buf[k][ok++] = (FLOAT) buffer[k][i] / SCALE; + if (ok >= 1408) + ok = 0; + } + ok = (off[k] + 1216) % 1408; + for (i = 0; i < BLKSIZE; i++) { + sample[i] = fft_buf[k][ok++]; + if (ok >= 1408) + ok = 0; + } + + off[k] += 1152; + off[k] %= 1408; + + psycho_3_fft(sample, energy); + psycho_3_powerdensityspectrum(energy, power); + psycho_3_spl(Lsb, power, &scale[k][0]); + psycho_3_tonal_label (power, tonelabel, Xtm); + psycho_3_noise_label (power, energy, tonelabel, noiselabel, Xnm); + if (glopts->verbosity > 20) + psycho_3_dump(tonelabel, Xtm, noiselabel, Xnm); + psycho_3_decimation(ath, tonelabel, Xtm, noiselabel, Xnm, bark); + psycho_3_threshold(LTg, tonelabel, Xtm, noiselabel, Xnm, bark, ath, bitrate[header->version][header->bitrate_index] / nch, freq_subset); + psycho_3_minimummasking(LTg, <min[k][0], freq_subset); + psycho_3_smr(<min[k][0], Lsb); + } +} + +/* ISO11172 Sec D.1 Step 1 - Window with HANN and then perform the FFT */ +void psycho_3_fft(FLOAT sample[BLKSIZE], FLOAT energy[BLKSIZE]) +{ + FLOAT x_real[BLKSIZE]; + int i; + static int init = 0; + static FLOAT *window; + + if (!init) { /* calculate window function for the Fourier transform */ + window = (FLOAT *) mem_alloc (sizeof (DFFT), "window"); + register FLOAT sqrt_8_over_3 = pow (8.0 / 3.0, 0.5); + for (i = 0; i < BLKSIZE; i++) { + window[i] = sqrt_8_over_3 * 0.5 * (1 - cos (2.0 * PI * i / (BLKSIZE))) / BLKSIZE; + } + init++; + } + + /* convolve the samples with the hann window */ + for (i = 0; i < BLKSIZE; i++) + x_real[i] = (FLOAT) (sample[i] * window[i]); + /* do the FFT */ + psycho_1_fft (x_real, energy, BLKSIZE); +} + +/* Sect D.1 Step 1 - convert the energies into dB */ +void psycho_3_powerdensityspectrum(FLOAT energy[BLKSIZE], FLOAT power[HBLKSIZE]) { + int i; + for (i=1;i>4; + if (Xmax[index] < power[i]) + Xmax[index] = power[i]; + } + + /* Compare it to the sound pressure based upon the scale for this subband + and pick the maximum one */ + for (i=0;ipower[i-1] && power[i]>power[i+1]) /* The first criteria for a maximum */ + maxima[i]=1; + else + maxima[i]=0; + } + + { + /* Now find the tones as per ISO11172 D.1 Step4.b */ + /* The standard is a bit vague (surprise surprise). + So I'm going to assume that + - a tone must be 7dB greater than *all* the relevant neighbours + - once a tone is found, the neighbours are immediately set to -inf dB + */ + + psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 2, 63, 2); + psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 63,127,3); + psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 127,255,6); + psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 255,500,12); + + } +} + +/* Sect D.1 Step4b + A tone within the range (start -> end), must be 7.0 dB greater than + all it's neighbours within +/- srange. Don't count its immediate neighbours. */ +void psycho_3_tonal_label_range(FLOAT *power, int *tonelabel, int *maxima, FLOAT *Xtm, int start, int end, int srange) { + int j,k; + + for (k=start;k 1) /* Don't count the immediate neighbours, or itself */ + if ((power[k] - power[k+j]) < 7.0) + tonelabel[k] = 0; /* Not greater by 7dB, therefore not a tone */ + if (tonelabel[k] == TONE) { + /* Calculate the sound pressure level for this tone by summing + the adjacent spectral lines + Xtm[k] = 10 * log10( pow(10.0, 0.1*power[k-1]) + pow(10.0, 0.1*power[k]) + + pow(10.0, 0.1*power[k+1]) ); */ + double temp = psycho_3_add_db(power[k-1], power[k]); + Xtm[k] = psycho_3_add_db(temp, power[k+1]); + + /* *ALL* spectral lines within +/- srange are set to -inf dB + So that when we do the noise calculate, they are not counted */ + for (j=-srange;j<=+srange;j++) + power[k+j] = DBMIN; + } + } +} + +void psycho_3_init_add_db (void) +{ + int i; + double x; + for (i = 0; i < DBTAB; i++) { + x = (double) i / 10.0; + dbtable[i] = 10 * log10 (1 + pow (10.0, x / 10.0)) - x; + } +} + +/* D.1 Step 4.c Labelling non-tonal (noise) components + Sum the energies in each critical band (the tone energies have been removed + during the tone labelling). + Find the "geometric mean" of these energies - i.e. find the best spot to put the + sum of energies within this critical band. */ +void psycho_3_noise_label (FLOAT power[HBLKSIZE], FLOAT energy[BLKSIZE], int *tonelabel, int *noiselabel, FLOAT Xnm[HBLKSIZE]) { + int i,j; + + Xnm[0] = DBMIN; + for (i=0;i= -3.0 && dz < 8.0) { + FLOAT vf; + FLOAT av = -1.525 - 0.275 * bark[k] - 4.5 + Xtm[k]; + /* masking function for lower & upper slopes */ + if (dz < -1) + vf = 17 * (dz + 1) - (0.4 * Xtm[k] + 6); + else if (dz < 0) + vf = (0.4 * Xtm[k] + 6) * dz; + else if (dz < 1) + vf = (-17 * dz); + else + vf = -(dz - 1) * (17 - 0.15 * Xtm[k]) - 17; + LTtm[j] = psycho_3_add_db (LTtm[j], av + vf); + } + } + } + + /* find every noise label */ + if (noiselabel[k]==NOISE) { + for (j=0;j= -3.0 && dz < 8.0) { + FLOAT vf; + FLOAT av = -1.525 - 0.175 * bark[k] - 0.5 + Xnm[k]; + /* masking function for lower & upper slopes */ + if (dz < -1) + vf = 17 * (dz + 1) - (0.4 * Xnm[k] + 6); + else if (dz < 0) + vf = (0.4 * Xnm[k] + 6) * dz; + else if (dz < 1) + vf = (-17 * dz); + else + vf = -(dz - 1) * (17 - 0.15 * Xnm[k]) - 17; + LTnm[j] = psycho_3_add_db (LTnm[j], av + vf); + } + } + } + } + + /* ISO11172 D.1 Step 7 + Calculate the global masking threhold */ + for (i=0;i>4; + if (LTmin[index] > LTg[i]) { + LTmin[index] = LTg[i]; + } + } +} + +/* ISO11172 Sect D.1 Step 9 + Calculate the signal-to-mask ratio + MFC FIXME Feb 2003 for better calling from + toolame, add a "float SMR[]" array and return it */ +void psycho_3_smr(double *LTmin, double *Lsb) { + int i; + for (i=0;iversion][header->sampling_frequency] * 1000; + for (i=1;iathlevel); + } + + { /* Work out the critical bands + Starting from line 0, all lines within 1 bark of the starting + bark are added to the same critical band. When a line is greater + by 1.0 of a bark, start a new critical band. */ + + numlines = (int *)calloc(HBLKSIZE, sizeof(int)); + cbval = (float *)calloc(HBLKSIZE, sizeof(float)); + cbandindex[0] = 1; + for (i=1;i 1.0) { /* 1 critical band? 1 bark? */ + /* this frequency line is too different from the starting line, + (in terms of the bark distance) + so make this spectral line the first member of the next critical band */ + cbase = i; /* Start the new critical band from this frequency line */ + cbands++; + cbandindex[cbands] = cbase; + } + /* partition[i] tells us which critical band the i'th frequency line is in */ + partition[i] = cbands; + /* keep a count of how many frequency lines are in each partition */ + numlines[cbands]++; + } + + cbands++; + cbandindex[cbands] = 513; /* Set the top of the last critical band */ + + /* For each crtical band calculate the average bark value + cbval [central bark value] */ + for (i=1;i 32 * 16 = 512 + Subband 0-2 : Every line (3 * 16 = 48 lines) + Subband 3-5 : Every Second line (3 * 16/2 = 24 lines) + Subband 6-11 : Every 4th line (6 * 16/4 = 24 lines) + Subband 12-31 : Every 12th line (20 * 16/8 = 40 lines) + + create this subset of frequencies (freq_subset) */ + int freq_index=0; + for (i=1;i<(3*16)+1;i++) + freq_subset[freq_index++] = i; + for (;i<(6*16)+1;i+=2) + freq_subset[freq_index++] = i; + for (;i<(12*16)+1;i+=4) + freq_subset[freq_index++] = i; + for (;i<(32*16)+1;i+=8) + freq_subset[freq_index++] = i; + } + + if (glopts->verbosity > 4) { + fprintf(stdout,"%i critical bands\n",cbands); + for (i=0;i