diff options
Diffstat (limited to 'src/FIRFilter.cpp')
| -rw-r--r-- | src/FIRFilter.cpp | 311 | 
1 files changed, 311 insertions, 0 deletions
| diff --git a/src/FIRFilter.cpp b/src/FIRFilter.cpp new file mode 100644 index 0000000..2ba4294 --- /dev/null +++ b/src/FIRFilter.cpp @@ -0,0 +1,311 @@ +/* +   Copyright (C) 2007, 2008, 2009, 2010, 2011 Her Majesty the Queen in +   Right of Canada (Communications Research Center Canada) + +   Written by +   2012, Matthias P. Braendli, matthias.braendli@mpb.li + +   This block implements a FIR filter. The real filter taps are given +   as floats, and the block can take advantage of SSE. +   For better performance, filtering is done in another thread, leading +   to a pipeline delay of two calls to FIRFilter::process + */ +/* +   This file is part of CRC-DADMOD. + +   CRC-DADMOD is free software: you can redistribute it and/or modify +   it under the terms of the GNU General Public License as +   published by the Free Software Foundation, either version 3 of the +   License, or (at your option) any later version. + +   CRC-DADMOD is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +   GNU General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with CRC-DADMOD.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include "FIRFilter.h" +#include "PcDebug.h" + +#include <stdio.h> +#include <stdexcept> + +#include <iostream> +#include <fstream> + +#ifdef __SSE__ +#   include <xmmintrin.h> +#endif + + +#include <sys/time.h> + +void FIRFilterWorker::process(struct FIRFilterWorkerData *fwd) +{ +    size_t i; +    struct timespec time_start; +    struct timespec time_end; + +    // This thread creates the dataOut buffer, and deletes +    // the incoming buffer + +    while(running) { +        Buffer* dataIn; +        fwd->input_queue.wait_and_pop(dataIn); + +        Buffer* dataOut; +        dataOut = new Buffer(); +        dataOut->setLength(dataIn->getLength()); + +        PDEBUG("FIRFilterWorker: dataIn->getLength() %d\n", dataIn->getLength()); + +#if __SSE__ +        // The SSE accelerated version cannot work on the complex values, +        // it is necessary to do the convolution on the real and imaginary +        // parts separately. Thankfully, the taps are real, simplifying the +        // procedure. + +        const float* in = reinterpret_cast<const float*>(dataIn->getData()); +        float* out      = reinterpret_cast<float*>(dataOut->getData()); +        size_t sizeIn   = dataIn->getLength() / sizeof(float); + +        if ((uintptr_t)(&out[0]) % 16 != 0) { +            fprintf(stderr, "FIRFilterWorker: out not aligned %p ", out); +            throw std::runtime_error("FIRFilterWorker: out not aligned"); +        } +             +        clock_gettime(CLOCK_THREAD_CPUTIME_ID, &time_start); + +        __m128 SSEout; +        __m128 SSEtaps; +        __m128 SSEin; +        for (i = 0; i < sizeIn - 2*fwd->n_taps; i += 4) { +            SSEout = _mm_setr_ps(0,0,0,0); + +            for (int j = 0; j < fwd->n_taps; j++) { +                if ((uintptr_t)(&in[i+2*j]) % 16 == 0) { +                    SSEin = _mm_load_ps(&in[i+2*j]); //faster when aligned +                } +                else { +                    SSEin = _mm_loadu_ps(&in[i+2*j]); +                } + +                SSEtaps = _mm_load1_ps(&fwd->taps[j]); + +                SSEout = _mm_add_ps(SSEout, _mm_mul_ps(SSEin, SSEtaps)); +            } +            _mm_store_ps(&out[i], SSEout); +        } + +        for (; i < sizeIn; i++) { +            out[i] = 0.0; +            for (int j = 0; i+2*j < sizeIn; j++) { +                out[i] += in[i+2*j] * fwd->taps[j]; +            } +        } +        clock_gettime(CLOCK_THREAD_CPUTIME_ID, &time_end); + +#else +        // No SSE ? Loop unrolling should make this faster. As for the SSE, +        // the real and imaginary parts are calculated separately. +        const float* in = reinterpret_cast<const float*>(dataIn->getData()); +        float* out      = reinterpret_cast<float*>(dataOut->getData()); +        size_t sizeIn   = dataIn->getLength() / sizeof(float); + +        clock_gettime(CLOCK_THREAD_CPUTIME_ID, &time_start); + +        // Convolve by aligning both frame and taps at zero. +        for (i = 0; i < sizeIn - 2*fwd->n_taps; i += 4) { +            out[i]    = 0.0; +            out[i+1]  = 0.0; +            out[i+2]  = 0.0; +            out[i+3]  = 0.0; + +            for (int j = 0; j < fwd->n_taps; j++) { +                out[i]   += in[i   + 2*j] * fwd->taps[j]; +                out[i+1] += in[i+1 + 2*j] * fwd->taps[j]; +                out[i+2] += in[i+2 + 2*j] * fwd->taps[j]; +                out[i+3] += in[i+3 + 2*j] * fwd->taps[j]; +            } +        } + +        // At the end of the frame, we cut the convolution off. +        // The beginning of the next frame starts with a NULL symbol +        // anyway. +        for (; i < sizeIn; i++) { +            out[i] = 0.0; +            for (int j = 0; i+2*j < sizeIn; j++) { +                out[i] += in[i+2*j] * fwd->taps[j]; +            } +        } + +        clock_gettime(CLOCK_THREAD_CPUTIME_ID, &time_end); + + +#endif + +        // The following implementations are for debugging only. +#if 0 +        // Same thing as above, without loop unrolling. For debugging. +        const float* in = reinterpret_cast<const float*>(dataIn->getData()); +        float* out      = reinterpret_cast<float*>(dataOut->getData()); +        size_t sizeIn   = dataIn->getLength() / sizeof(float); + +        for (i = 0; i < sizeIn - 2*fwd->n_taps; i += 1) { +            out[i]  = 0.0; + +            for (int j = 0; j < fwd->n_taps; j++) { +                out[i]  += in[i+2*j] * fwd->taps[j]; +            } +        } + +        for (; i < sizeIn; i++) { +            out[i] = 0.0; +            for (int j = 0; i+2*j < sizeIn; j++) { +                out[i] += in[i+2*j] * fwd->taps[j]; +            } +        } + +#elif 0 +        // An unrolled loop, but this time, the input data is cast to complex float. +        // Makes indices more natural. For debugging. +        const complexf* in = reinterpret_cast<const complexf*>(dataIn->getData()); +        complexf* out      = reinterpret_cast<complexf*>(dataOut->getData()); +        size_t sizeIn      = dataIn->getLength() / sizeof(complexf); + +        for (i = 0; i < sizeIn - fwd->n_taps; i += 4) { +            out[i]   = 0.0; +            out[i+1] = 0.0; +            out[i+2] = 0.0; +            out[i+3] = 0.0; + +            for (int j = 0; j < fwd->n_taps; j++) { +                out[i]   += in[i+j  ] * fwd->taps[j]; +                out[i+1] += in[i+1+j] * fwd->taps[j]; +                out[i+2] += in[i+2+j] * fwd->taps[j]; +                out[i+3] += in[i+3+j] * fwd->taps[j]; +            } +        } + +        for (; i < sizeIn; i++) { +            out[i] = 0.0; +            for (int j = 0; j+i < sizeIn; j++) { +                out[i] += in[i+j] * fwd->taps[j]; +            } +        } + +#elif 0 +        // Simple implementation. Slow. For debugging. +        const complexf* in = reinterpret_cast<const complexf*>(dataIn->getData()); +        complexf* out      = reinterpret_cast<complexf*>(dataOut->getData()); +        size_t sizeIn      = dataIn->getLength() / sizeof(complexf); + +        for (i = 0; i < sizeIn - fwd->n_taps; i += 1) { +            out[i]   = 0.0; + +            for (int j = 0; j < fwd->n_taps; j++) { +                out[i]  += in[i+j  ] * fwd->taps[j]; +            } +        } + +        for (; i < sizeIn; i++) { +            out[i] = 0.0; +            for (int j = 0; j+i < sizeIn; j++) { +                out[i] += in[i+j] * fwd->taps[j]; +            } +        } +#endif +         +        calculationTime += (time_end.tv_sec - time_start.tv_sec) * 1000000000L + +            time_end.tv_nsec - time_start.tv_nsec; +        fwd->output_queue.push(dataOut); +        delete dataIn; +    } +} + + +FIRFilter::FIRFilter(char* taps_file) : +    ModCodec(ModFormat(sizeof(complexf)), ModFormat(sizeof(complexf))) +{ +    PDEBUG("FIRFilter::FIRFilter(%s) @ %p\n", +            taps_file, this); + +    number_of_runs = 0; + +    std::ifstream taps_fstream(taps_file); +    if(!taps_fstream) {  +        fprintf(stderr, "FIRFilter: file %s could not be opened !\n", taps_file); +        throw std::runtime_error("FIRFilter: Could not open file with taps! "); +    } +    int n_taps; +    taps_fstream >> n_taps; + +    my_Ntaps = n_taps; + +    fprintf(stderr, "FIRFilter: Reading %d taps...\n", my_Ntaps); + +    myFilter = new float[my_Ntaps]; + +    int n; +    for (n = 0; n < n_taps; n++) { +        taps_fstream >> myFilter[n]; +        PDEBUG("FIRFilter: tap: %f\n",  myFilter[n] ); +        if (taps_fstream.eof()) { +            fprintf(stderr, "FIRFilter: file %s should contains %d taps, but EOF reached "\ +                    "after %d taps !\n", taps_file, n_taps, n); +            throw std::runtime_error("FIRFilter: filtertaps file invalid ! "); +        } +    } + +    firwd.taps = myFilter; +    firwd.n_taps = my_Ntaps; + +    PDEBUG("FIRFilter: Starting worker\n" ); +    worker.start(&firwd); +} + + +FIRFilter::~FIRFilter() +{ +    PDEBUG("FIRFilter::~FIRFilter() @ %p\n", this); + +    worker.stop(); + +    if (myFilter != NULL) { +        delete[] myFilter; +    } +} + + +int FIRFilter::process(Buffer* const dataIn, Buffer* dataOut) +{ +    PDEBUG("FIRFilter::process(dataIn: %p, dataOut: %p)\n", +            dataIn, dataOut); + +    // This thread creates the dataIn buffer, and deletes +    // the outgoing buffer + +    Buffer* inbuffer = new Buffer(dataIn->getLength(), dataIn->getData()); + +    firwd.input_queue.push(inbuffer); + +    if (number_of_runs > 2) { +        Buffer* outbuffer; +        firwd.output_queue.wait_and_pop(outbuffer); + +        dataOut->setData(outbuffer->getData(), outbuffer->getLength()); + +        delete outbuffer; +    } +    else { +        dataOut->setLength(dataIn->getLength()); +        memset(dataOut->getData(), 0, dataOut->getLength()); +        number_of_runs++; +    } + +    return dataOut->getLength(); + +} | 
