aboutsummaryrefslogtreecommitdiffstats
path: root/libtoolame-dab/psycho_3.c
diff options
context:
space:
mode:
Diffstat (limited to 'libtoolame-dab/psycho_3.c')
-rw-r--r--libtoolame-dab/psycho_3.c539
1 files changed, 539 insertions, 0 deletions
diff --git a/libtoolame-dab/psycho_3.c b/libtoolame-dab/psycho_3.c
new file mode 100644
index 0000000..3dbd462
--- /dev/null
+++ b/libtoolame-dab/psycho_3.c
@@ -0,0 +1,539 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include "common.h"
+#include "options.h"
+#include "encoder.h"
+#include "mem.h"
+#include "fft.h"
+#include "ath.h"
+#define OLDTHRESHx
+#include "psycho_3.h"
+#include "psycho_3priv.h"
+
+/* This is a reimplementation of psy model 1 using the ISO11172 standard.
+ I found the original dist10 code (which is full of pointers) to be
+ a horrible thing to try and understand and debug.
+ This implementation is not built for speed, but is rather meant to
+ clearly outline the steps specified by the standard (still, it's only
+ a tiny fraction slower than the dist10 code, and nothing has been optimized)
+ MFC Feb 2003 */
+
+/* Keep a table to fudge the adding of dB */
+#define DBTAB 1000
+static double dbtable[DBTAB];
+
+#define CRITBANDMAX 32 /* this is much higher than it needs to be. really only about 24 */
+int cbands=0; /* How many critical bands there really are */
+int cbandindex[CRITBANDMAX]; /* The spectral line index of the start of
+ each critical band */
+
+#define SUBSIZE 136
+int freq_subset[SUBSIZE];
+FLOAT bark[HBLKSIZE], ath[HBLKSIZE];
+
+int *numlines;
+FLOAT *cbval;
+int partition[HBLKSIZE];
+static D1408 *fft_buf;
+
+frame_header *header;
+
+
+double psycho_3_add_db (double a, double b)
+{
+ /* MFC - if the difference between a and b is large (>99), then just return the
+ largest one. (about 10% of the time)
+ - For differences between 0 and 99, return the largest value, but add
+ in a pre-calculated difference value.
+ - the value 99 was chosen arbitarily.
+ - maximum (a-b) i've seen is 572 */
+ FLOAT fdiff;
+ int idiff;
+ fdiff = (10.0 * (a - b));
+
+ if (fdiff > 990.0) {
+ return a;
+ }
+ if (fdiff < -990.0) {
+ return (b);
+ }
+
+ idiff = (int) fdiff;
+ if (idiff >= 0) {
+ return (a + dbtable[idiff]);
+ }
+
+ return (b + dbtable[-idiff]);
+}
+
+void psycho_3 (short buffer[2][1152], double scale[2][SBLIMIT],
+ double ltmin[2][SBLIMIT], frame_info * frame, options *glopts)
+{
+ int nch = frame->nch;
+ int sblimit = frame->sblimit;
+ int k, i;
+ static char init = 0;
+ static int off[2] = { 256, 256 };
+ FLOAT sample[BLKSIZE];
+
+ FLOAT energy[BLKSIZE];
+ FLOAT power[HBLKSIZE];
+ FLOAT Xtm[HBLKSIZE], Xnm[HBLKSIZE];
+ int tonelabel[HBLKSIZE], noiselabel[HBLKSIZE];
+ FLOAT LTg[HBLKSIZE];
+ double Lsb[SBLIMIT];
+
+ header = frame->header;
+
+ if (init==0) {
+ psycho_3_init(glopts);
+ init++;
+ }
+
+
+ for (k = 0; k < nch; k++) {
+ int ok = off[k] % 1408;
+ for (i = 0; i < 1152; i++) {
+ fft_buf[k][ok++] = (FLOAT) buffer[k][i] / SCALE;
+ if (ok >= 1408)
+ ok = 0;
+ }
+ ok = (off[k] + 1216) % 1408;
+ for (i = 0; i < BLKSIZE; i++) {
+ sample[i] = fft_buf[k][ok++];
+ if (ok >= 1408)
+ ok = 0;
+ }
+
+ off[k] += 1152;
+ off[k] %= 1408;
+
+ psycho_3_fft(sample, energy);
+ psycho_3_powerdensityspectrum(energy, power);
+ psycho_3_spl(Lsb, power, &scale[k][0]);
+ psycho_3_tonal_label (power, tonelabel, Xtm);
+ psycho_3_noise_label (power, energy, tonelabel, noiselabel, Xnm);
+ if (glopts->verbosity > 20)
+ psycho_3_dump(tonelabel, Xtm, noiselabel, Xnm);
+ psycho_3_decimation(ath, tonelabel, Xtm, noiselabel, Xnm, bark);
+ psycho_3_threshold(LTg, tonelabel, Xtm, noiselabel, Xnm, bark, ath, bitrate[header->version][header->bitrate_index] / nch, freq_subset);
+ psycho_3_minimummasking(LTg, &ltmin[k][0], freq_subset);
+ psycho_3_smr(&ltmin[k][0], Lsb);
+ }
+}
+
+/* ISO11172 Sec D.1 Step 1 - Window with HANN and then perform the FFT */
+void psycho_3_fft(FLOAT sample[BLKSIZE], FLOAT energy[BLKSIZE])
+{
+ FLOAT x_real[BLKSIZE];
+ int i;
+ static int init = 0;
+ static FLOAT *window;
+
+ if (!init) { /* calculate window function for the Fourier transform */
+ window = (FLOAT *) mem_alloc (sizeof (DFFT), "window");
+ register FLOAT sqrt_8_over_3 = pow (8.0 / 3.0, 0.5);
+ for (i = 0; i < BLKSIZE; i++) {
+ window[i] = sqrt_8_over_3 * 0.5 * (1 - cos (2.0 * PI * i / (BLKSIZE))) / BLKSIZE;
+ }
+ init++;
+ }
+
+ /* convolve the samples with the hann window */
+ for (i = 0; i < BLKSIZE; i++)
+ x_real[i] = (FLOAT) (sample[i] * window[i]);
+ /* do the FFT */
+ psycho_1_fft (x_real, energy, BLKSIZE);
+}
+
+/* Sect D.1 Step 1 - convert the energies into dB */
+void psycho_3_powerdensityspectrum(FLOAT energy[BLKSIZE], FLOAT power[HBLKSIZE]) {
+ int i;
+ for (i=1;i<HBLKSIZE;i++) {
+ if (energy[i] < 1E-20)
+ power[i] = -200.0 + POWERNORM;
+ else
+ power[i] = 10 * log10 (energy[i]) + POWERNORM;
+ }
+}
+
+/* Sect D.1 Step 2 - Determine the sound pressure level in each subband */
+void psycho_3_spl(double *Lsb, FLOAT *power, double *scale) {
+ int i;
+ FLOAT Xmax[SBLIMIT];
+
+ for (i=0;i<SBLIMIT;i++) {
+ Xmax[i] = DBMIN;
+ }
+ /* Find the maximum SPL in the power spectrum */
+ for (i=1;i<HBLKSIZE;i++) {
+ int index = i>>4;
+ if (Xmax[index] < power[i])
+ Xmax[index] = power[i];
+ }
+
+ /* Compare it to the sound pressure based upon the scale for this subband
+ and pick the maximum one */
+ for (i=0;i<SBLIMIT;i++) {
+ double val = 20 * log10 (scale[i] * 32768) - 10;
+ Lsb[i] = MAX(Xmax[i], val);
+ }
+}
+
+/* Sect D.1 Step 4 Label the Tonal Components */
+void psycho_3_tonal_label (FLOAT power[HBLKSIZE], int *tonelabel, FLOAT Xtm[HBLKSIZE])
+{
+ int i;
+ int maxima[HBLKSIZE];
+
+ /* Find the maxima as per ISO11172 D.1.4.a */
+ maxima[0]=maxima[HBLKSIZE-1]=0;
+ tonelabel[0]=tonelabel[HBLKSIZE-1]=0;
+ Xtm[0] = Xtm[HBLKSIZE-1] = DBMIN;
+ for (i=1;i<HBLKSIZE-1;i++) {
+ tonelabel[i] = 0;
+ Xtm[i] = DBMIN;
+ if (power[i]>power[i-1] && power[i]>power[i+1]) /* The first criteria for a maximum */
+ maxima[i]=1;
+ else
+ maxima[i]=0;
+ }
+
+ {
+ /* Now find the tones as per ISO11172 D.1 Step4.b */
+ /* The standard is a bit vague (surprise surprise).
+ So I'm going to assume that
+ - a tone must be 7dB greater than *all* the relevant neighbours
+ - once a tone is found, the neighbours are immediately set to -inf dB
+ */
+
+ psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 2, 63, 2);
+ psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 63,127,3);
+ psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 127,255,6);
+ psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 255,500,12);
+
+ }
+}
+
+/* Sect D.1 Step4b
+ A tone within the range (start -> end), must be 7.0 dB greater than
+ all it's neighbours within +/- srange. Don't count its immediate neighbours. */
+void psycho_3_tonal_label_range(FLOAT *power, int *tonelabel, int *maxima, FLOAT *Xtm, int start, int end, int srange) {
+ int j,k;
+
+ for (k=start;k<end;k++) /* Search for all the maxima in this range */
+ if (maxima[k] == 1) {
+ tonelabel[k] = TONE; /* assume it's a TONE and then prove otherwise */
+ for (j=-srange;j<=+srange;j++) /* Check the neighbours within +/- srange */
+ if (abs(j) > 1) /* Don't count the immediate neighbours, or itself */
+ if ((power[k] - power[k+j]) < 7.0)
+ tonelabel[k] = 0; /* Not greater by 7dB, therefore not a tone */
+ if (tonelabel[k] == TONE) {
+ /* Calculate the sound pressure level for this tone by summing
+ the adjacent spectral lines
+ Xtm[k] = 10 * log10( pow(10.0, 0.1*power[k-1]) + pow(10.0, 0.1*power[k])
+ + pow(10.0, 0.1*power[k+1]) ); */
+ double temp = psycho_3_add_db(power[k-1], power[k]);
+ Xtm[k] = psycho_3_add_db(temp, power[k+1]);
+
+ /* *ALL* spectral lines within +/- srange are set to -inf dB
+ So that when we do the noise calculate, they are not counted */
+ for (j=-srange;j<=+srange;j++)
+ power[k+j] = DBMIN;
+ }
+ }
+}
+
+void psycho_3_init_add_db (void)
+{
+ int i;
+ double x;
+ for (i = 0; i < DBTAB; i++) {
+ x = (double) i / 10.0;
+ dbtable[i] = 10 * log10 (1 + pow (10.0, x / 10.0)) - x;
+ }
+}
+
+/* D.1 Step 4.c Labelling non-tonal (noise) components
+ Sum the energies in each critical band (the tone energies have been removed
+ during the tone labelling).
+ Find the "geometric mean" of these energies - i.e. find the best spot to put the
+ sum of energies within this critical band. */
+void psycho_3_noise_label (FLOAT power[HBLKSIZE], FLOAT energy[BLKSIZE], int *tonelabel, int *noiselabel, FLOAT Xnm[HBLKSIZE]) {
+ int i,j;
+
+ Xnm[0] = DBMIN;
+ for (i=0;i<cbands;i++) {
+ /* for each critical band */
+ double sum = DBMIN;
+ double esum=0;
+ double centreweight = 0;
+ int centre;
+ for (j=cbandindex[i]; j<cbandindex[i+1]; j++) {
+ Xnm[j] = DBMIN;
+ /* go through all the spectral lines within the critical band,
+ adding the energies. The tone energies have already been removed */
+ if (power[j] != DBMIN) {
+ /* Found a noise energy, add it to the sum */
+ sum = psycho_3_add_db(power[j], sum);
+
+ /* calculations for the geometric mean
+ FIXME MFC Feb 2003: Would it just be easier to
+ do the *whole* of psycho_1 in the energy domain rather than
+ in the dB domain?
+ FIXME: This is just a lazy arsed arithmetic mean. Don't know
+ if it's really going to make that much difference */
+ esum += energy[j]; /* Calculate the sum of energies */
+ centreweight += (j - cbandindex[i]) * energy[j]; /* And the energy moment */
+ }
+ }
+
+ if (sum<=DBMIN)
+ /* If the energy sum is really small, just pretend the noise occurs
+ in the centre frequency line */
+ centre = (cbandindex[i] + cbandindex[i+1])/2;
+ else
+ /* Otherwise, work out the mean position of the noise, and put it there. */
+ centre = cbandindex[i] + (int)(centreweight/esum);
+
+ Xnm[centre] = sum;
+ noiselabel[centre] = NOISE;
+ }
+}
+
+/* ISO11172 D.1 Step 5
+ Get rid of noise/tones that aren't greater than the ATH
+ If two tones are within 0.5bark, then delete the tone with the lower energy */
+void psycho_3_decimation(FLOAT *ath, int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm, FLOAT *bark) {
+ int i;
+
+ /* Delete components which aren't above the ATH */
+ for (i=1;i<HBLKSIZE;i++) {
+ if (noiselabel[i]==NOISE) {
+ if (Xnm[i] < ath[i]) {
+ /* this masker isn't above the ATH : delete it */
+ Xnm[i] = DBMIN;
+ noiselabel[i]=0;
+ }
+ }
+ if (tonelabel[i] == TONE) {
+ if (Xtm[i] < ath[i]) {
+ Xtm[i] = DBMIN;
+ tonelabel[i]=0;
+ }
+ }
+ }
+ /* Search for tones that are within 0.5 bark */
+ /* MFC FIXME Feb 2003: haven't done this yet */
+
+}
+
+/* ISO11172 Sect D.1 Step 6
+ Calculation of individual masking thresholds
+ Work out how each of the tones&noises maskes other frequencies
+ NOTE: Only a subset of other frequencies is checked. According to the
+ standard different subbands are subsampled to different amounts.
+ See psycho_3_init and freq_subset */
+void psycho_3_threshold(FLOAT *LTg, int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm, FLOAT *bark, FLOAT *ath, int bit_rate, int *freq_subset) {
+ int i,j,k;
+ FLOAT LTtm[SUBSIZE];
+ FLOAT LTnm[SUBSIZE];
+
+ for (i=0;i<SUBSIZE;i++) {
+ LTtm[i] = DBMIN;
+ LTnm[i] = DBMIN;
+ }
+ /* Loop over the entire spectrum and find every noise and tone
+ And then with each noise/tone work out how it masks
+ the spectral lines around it */
+ for (k=1;k<HBLKSIZE;k++) {
+ /* Find every tone */
+ if (tonelabel[k]==TONE) {
+ for (j=0;j<SUBSIZE;j++) {
+ /* figure out how it masks the levels around it */
+ FLOAT dz = bark[freq_subset[j]] - bark[k];
+ if (dz >= -3.0 && dz < 8.0) {
+ FLOAT vf;
+ FLOAT av = -1.525 - 0.275 * bark[k] - 4.5 + Xtm[k];
+ /* masking function for lower & upper slopes */
+ if (dz < -1)
+ vf = 17 * (dz + 1) - (0.4 * Xtm[k] + 6);
+ else if (dz < 0)
+ vf = (0.4 * Xtm[k] + 6) * dz;
+ else if (dz < 1)
+ vf = (-17 * dz);
+ else
+ vf = -(dz - 1) * (17 - 0.15 * Xtm[k]) - 17;
+ LTtm[j] = psycho_3_add_db (LTtm[j], av + vf);
+ }
+ }
+ }
+
+ /* find every noise label */
+ if (noiselabel[k]==NOISE) {
+ for (j=0;j<SUBSIZE;j++) {
+ /* figure out how it masks the levels around it */
+ FLOAT dz = bark[freq_subset[j]] - bark[k];
+ if (dz >= -3.0 && dz < 8.0) {
+ FLOAT vf;
+ FLOAT av = -1.525 - 0.175 * bark[k] - 0.5 + Xnm[k];
+ /* masking function for lower & upper slopes */
+ if (dz < -1)
+ vf = 17 * (dz + 1) - (0.4 * Xnm[k] + 6);
+ else if (dz < 0)
+ vf = (0.4 * Xnm[k] + 6) * dz;
+ else if (dz < 1)
+ vf = (-17 * dz);
+ else
+ vf = -(dz - 1) * (17 - 0.15 * Xnm[k]) - 17;
+ LTnm[j] = psycho_3_add_db (LTnm[j], av + vf);
+ }
+ }
+ }
+ }
+
+ /* ISO11172 D.1 Step 7
+ Calculate the global masking threhold */
+ for (i=0;i<SUBSIZE;i++) {
+ LTg[i] = psycho_3_add_db(LTnm[i], LTtm[i]);
+ if (bit_rate < 96)
+ LTg[i] = psycho_3_add_db(ath[freq_subset[i]], LTg[i]);
+ else
+ LTg[i] = psycho_3_add_db(ath[freq_subset[i]]-12.0, LTg[i]);
+ }
+}
+
+ /* Find the minimum LTg for each subband. ISO11172 Sec D.1 Step 8 */
+void psycho_3_minimummasking(FLOAT *LTg, double *LTmin, int *freq_subset) {
+ int i;
+
+ for (i=0;i<SBLIMIT;i++)
+ LTmin[i] = 999999.9;
+
+ for (i=0;i<SUBSIZE;i++) {
+ int index = freq_subset[i]>>4;
+ if (LTmin[index] > LTg[i]) {
+ LTmin[index] = LTg[i];
+ }
+ }
+}
+
+/* ISO11172 Sect D.1 Step 9
+ Calculate the signal-to-mask ratio
+ MFC FIXME Feb 2003 for better calling from
+ toolame, add a "float SMR[]" array and return it */
+void psycho_3_smr(double *LTmin, double *Lsb) {
+ int i;
+ for (i=0;i<SBLIMIT;i++) {
+ LTmin[i] = Lsb[i] - LTmin[i];
+ }
+}
+
+void psycho_3_init(options *glopts) {
+ int i;
+ int cbase = 0; /* current base index for the bark range calculation */
+
+ fft_buf = (D1408 *) mem_alloc ((long) sizeof (D1408) * 2, "fft_buf");
+
+ /* Initialise the tables for the adding dB */
+ psycho_3_init_add_db();
+
+ /* For each spectral line calculate the bark and the ATH (in dB) */
+ FLOAT sfreq = (FLOAT) s_freq[header->version][header->sampling_frequency] * 1000;
+ for (i=1;i<HBLKSIZE; i++) {
+ FLOAT freq = i * sfreq/BLKSIZE;
+ bark[i] = freq2bark(freq);
+ ath[i] = ATH_dB(freq,glopts->athlevel);
+ }
+
+ { /* Work out the critical bands
+ Starting from line 0, all lines within 1 bark of the starting
+ bark are added to the same critical band. When a line is greater
+ by 1.0 of a bark, start a new critical band. */
+
+ numlines = (int *)calloc(HBLKSIZE, sizeof(int));
+ cbval = (float *)calloc(HBLKSIZE, sizeof(float));
+ cbandindex[0] = 1;
+ for (i=1;i<HBLKSIZE;i++) {
+ if ((bark[i] - bark[cbase]) > 1.0) { /* 1 critical band? 1 bark? */
+ /* this frequency line is too different from the starting line,
+ (in terms of the bark distance)
+ so make this spectral line the first member of the next critical band */
+ cbase = i; /* Start the new critical band from this frequency line */
+ cbands++;
+ cbandindex[cbands] = cbase;
+ }
+ /* partition[i] tells us which critical band the i'th frequency line is in */
+ partition[i] = cbands;
+ /* keep a count of how many frequency lines are in each partition */
+ numlines[cbands]++;
+ }
+
+ cbands++;
+ cbandindex[cbands] = 513; /* Set the top of the last critical band */
+
+ /* For each crtical band calculate the average bark value
+ cbval [central bark value] */
+ for (i=1;i<HBLKSIZE;i++)
+ cbval[partition[i]] += bark[i]; /* sum up all the bark values */
+ for (i=1;i<CBANDS;i++) {
+ if (numlines[i] != 0)
+ cbval[i] /= numlines[i]; /* divide by the number of values */
+ else {
+ cbval[i]=0; /* this isn't a partition */
+ }
+ }
+ }
+
+ {
+ /* For Step6 - For the calculation of individual masking thresholds
+ the spectral lines are subsampled
+ i.e. no need to work out the masking for every single spectral line.
+ Depending upon which subband the calculation is for, you
+ can skip a number of lines
+ There are 16 lines per subband -> 32 * 16 = 512
+ Subband 0-2 : Every line (3 * 16 = 48 lines)
+ Subband 3-5 : Every Second line (3 * 16/2 = 24 lines)
+ Subband 6-11 : Every 4th line (6 * 16/4 = 24 lines)
+ Subband 12-31 : Every 12th line (20 * 16/8 = 40 lines)
+
+ create this subset of frequencies (freq_subset) */
+ int freq_index=0;
+ for (i=1;i<(3*16)+1;i++)
+ freq_subset[freq_index++] = i;
+ for (;i<(6*16)+1;i+=2)
+ freq_subset[freq_index++] = i;
+ for (;i<(12*16)+1;i+=4)
+ freq_subset[freq_index++] = i;
+ for (;i<(32*16)+1;i+=8)
+ freq_subset[freq_index++] = i;
+ }
+
+ if (glopts->verbosity > 4) {
+ fprintf(stdout,"%i critical bands\n",cbands);
+ for (i=0;i<cbands;i++)
+ fprintf(stdout,"cband %i spectral line index %i\n",i,cbandindex[i]);
+ fprintf(stdout,"%i Subsampled spectral lines\n",SUBSIZE);
+ for (i=0;i<SUBSIZE;i++)
+ fprintf(stdout,"%i Spectral line %i Bark %.2f\n",i,freq_subset[i], bark[freq_subset[i]]);
+ }
+}
+
+void psycho_3_dump(int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm) {
+ int i;
+ fprintf(stdout,"3 Ton:");
+ for (i=1;i<HAN_SIZE;i++) {
+ if (tonelabel[i] == TONE)
+ fprintf(stdout,"[%i] %3.0f ",i,Xtm[i]);
+ }
+ fprintf(stdout,"\n");
+
+ fprintf(stdout,"3 Nos:");
+ for (i=1;i<HAN_SIZE;i++) {
+ if (noiselabel[i] == NOISE)
+ fprintf(stdout,"[%i] %3.0f ",i,Xnm[i]);
+ }
+ fprintf(stdout,"\n");
+}