summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias P. Braendli <matthias.braendli@mpb.li>2016-12-11 15:49:15 +0100
committerMatthias P. Braendli <matthias.braendli@mpb.li>2016-12-11 15:49:15 +0100
commite181b4b9fed8d811a3d9b22d249a1b939ed9f3f8 (patch)
treea8cc562c27c875de59599c3af321ddef744e2cb8
parente17bed6d75bdcdfd949e84217143096bd626ec95 (diff)
downloaddabmod-e181b4b9fed8d811a3d9b22d249a1b939ed9f3f8.tar.gz
dabmod-e181b4b9fed8d811a3d9b22d249a1b939ed9f3f8.tar.bz2
dabmod-e181b4b9fed8d811a3d9b22d249a1b939ed9f3f8.zip
Remove unused AVX implementation of FIRFilter
-rw-r--r--src/FIRFilter.cpp74
1 files changed, 3 insertions, 71 deletions
diff --git a/src/FIRFilter.cpp b/src/FIRFilter.cpp
index 77e2305..86f558b 100644
--- a/src/FIRFilter.cpp
+++ b/src/FIRFilter.cpp
@@ -38,12 +38,8 @@
#include <fstream>
#include <memory>
-#ifdef __AVX__
-# include <immintrin.h>
-#else
-# ifdef __SSE__
-# include <xmmintrin.h>
-# endif
+#ifdef __SSE__
+# include <xmmintrin.h>
#endif
using namespace std;
@@ -71,67 +67,7 @@ void FIRFilterWorker::process(struct FIRFilterWorkerData *fwd)
PDEBUG("FIRFilterWorker: dataIn->getLength() %zu\n", dataIn->getLength());
-#if __AVX__
-#define _mm256_load1_ps(x) _mm256_set_ps(x, x, x, x, x, x, x, x)
-#warning FIRFilter uses experimental AVX code
-
- // The AVX accelerated version cannot work on the complex values,
- // it is necessary to do the convolution on the real and imaginary
- // parts separately. Thankfully, the taps are real, simplifying the
- // procedure.
- //
- // The AVX version is not enabled by default, because the performance
- // on my test machine (sandy bridge i7) is slightly worse with AVX than
- // with SSE. TODO: Try with Ivy Bridge or newer.
- //
- // Interesting links:
- // http://software.intel.com/en-us/forums/topic/283753
-
- const float* in = reinterpret_cast<const float*>(dataIn->getData());
- float* out = reinterpret_cast<float*>(dataOut->getData());
- size_t sizeIn = dataIn->getLength() / sizeof(float);
-
- if ((uintptr_t)(&out[0]) % 32 != 0) {
- fprintf(stderr, "FIRFilterWorker: out not aligned %p ", out);
- throw std::runtime_error("FIRFilterWorker: out not aligned");
- }
-
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &time_start);
-
- __m256 AVXout;
- __m256 AVXtaps;
- __m256 AVXin;
- {
- boost::mutex::scoped_lock lock(fwd->taps_mutex);
-
- for (i = 0; i < sizeIn - 2*fwd->taps.size(); i += 8) {
- AVXout = _mm256_setr_ps(0,0,0,0,0,0,0,0);
-
- for (size_t j = 0; j < fwd->taps.size; j++) {
- if ((uintptr_t)(&in[i+2*j]) % 32 == 0) {
- AVXin = _mm256_load_ps(&in[i+2*j]); //faster when aligned
- }
- else {
- AVXin = _mm256_loadu_ps(&in[i+2*j]);
- }
-
- AVXtaps = _mm256_load1_ps(fwd->taps[j]);
-
- AVXout = _mm256_add_ps(AVXout, _mm256_mul_ps(AVXin, AVXtaps));
- }
- _mm256_store_ps(&out[i], AVXout);
- }
-
- for (; i < sizeIn; i++) {
- out[i] = 0.0;
- for (int j = 0; i+2*j < sizeIn; j++) {
- out[i] += in[i+2*j] * fwd->taps[j];
- }
- }
- }
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &time_end);
-
-#elif __SSE__
+#if __SSE__
// The SSE accelerated version cannot work on the complex values,
// it is necessary to do the convolution on the real and imaginary
// parts separately. Thankfully, the taps are real, simplifying the
@@ -317,10 +253,6 @@ FIRFilter::FIRFilter(std::string& taps_file) :
load_filter_taps(myTapsFile);
-#if __AVX__
- fprintf(stderr, "FIRFilter: WARNING: using experimental AVX code !\n");
-#endif
-
PDEBUG("FIRFilter: Starting worker\n" );
worker.start(&firwd);
}