summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosh Blum <josh@joshknows.com>2010-06-22 15:41:59 -0700
committerJosh Blum <josh@joshknows.com>2010-06-28 11:12:19 -0700
commit45dc078069798d14f33518033e58d9aa7ace99f1 (patch)
tree34ffe74addb173141d88bcba37027547eeaba9c3
parentf0005a27c3c0ff3148bab53eefdafcad799f03cc (diff)
downloaduhd-45dc078069798d14f33518033e58d9aa7ace99f1.tar.gz
uhd-45dc078069798d14f33518033e58d9aa7ace99f1.tar.bz2
uhd-45dc078069798d14f33518033e58d9aa7ace99f1.zip
uhd: implemented complex float <-> item32 conversion with sse2
-rw-r--r--host/lib/transport/CMakeLists.txt10
-rwxr-xr-xhost/lib/transport/gen_convert_types.py74
2 files changed, 84 insertions, 0 deletions
diff --git a/host/lib/transport/CMakeLists.txt b/host/lib/transport/CMakeLists.txt
index 872865d6c..a2bd17b01 100644
--- a/host/lib/transport/CMakeLists.txt
+++ b/host/lib/transport/CMakeLists.txt
@@ -18,6 +18,16 @@
#This file will be included by cmake, use absolute paths!
########################################################################
+# Check for SIMD headers
+########################################################################
+INCLUDE(CheckIncludeFileCXX)
+CHECK_INCLUDE_FILE_CXX(emmintrin.h HAVE_EMMINTRIN_H)
+
+IF(HAVE_EMMINTRIN_H)
+ ADD_DEFINITIONS(-DHAVE_EMMINTRIN_H)
+ENDIF(HAVE_EMMINTRIN_H)
+
+########################################################################
# Setup defines for interface address discovery
########################################################################
MESSAGE(STATUS "Configuring interface address discovery...")
diff --git a/host/lib/transport/gen_convert_types.py b/host/lib/transport/gen_convert_types.py
index e81bf7330..6b87bf134 100755
--- a/host/lib/transport/gen_convert_types.py
+++ b/host/lib/transport/gen_convert_types.py
@@ -30,6 +30,9 @@ TMPL_TEXT = """
\#include <stdexcept>
\#include <cstring>
\#include <complex>
+\#include <iostream>
+
+\#define USE_EMMINTRIN_H true
\#ifdef BOOST_BIG_ENDIAN
static const bool is_big_endian = true;
@@ -101,6 +104,39 @@ static UHD_INLINE void fc32_to_item32_nswap(
}
}
+\#if defined(HAVE_EMMINTRIN_H) && USE_EMMINTRIN_H
+\#include <emmintrin.h>
+
+static UHD_INLINE void fc32_to_item32_bswap(
+ const fc32_t *input, item32_t *output, size_t nsamps
+){
+ __m128 scalar = _mm_set_ps1(shorts_per_float);
+
+ //convert samples with intrinsics pairs at a time
+ size_t i = 0; for (; i < nsamps/4; i+=4){
+ //load from input
+ __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
+ __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
+
+ //convert and scale
+ __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
+ __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
+
+ //pack + byteswap -> byteswap 32 bit words
+ __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
+ tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
+
+ //store to output
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
+ }
+
+ //convert remainder
+ for (; i < nsamps; i++){
+ output[i] = uhd::byteswap(fc32_to_item32(input[i]));
+ }
+}
+
+\#else
static UHD_INLINE void fc32_to_item32_bswap(
const fc32_t *input, item32_t *output, size_t nsamps
){
@@ -109,6 +145,8 @@ static UHD_INLINE void fc32_to_item32_bswap(
}
}
+\#endif
+
/***********************************************************************
* Convert items32 buffer to complex float
**********************************************************************/
@@ -129,6 +167,40 @@ static UHD_INLINE void item32_to_fc32_nswap(
}
}
+\#if defined(HAVE_EMMINTRIN_H) && USE_EMMINTRIN_H
+\#include <emmintrin.h>
+
+static UHD_INLINE void item32_to_fc32_bswap(
+ const item32_t *input, fc32_t *output, size_t nsamps
+){
+ __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
+
+ //convert samples with intrinsics pairs at a time
+ size_t i = 0; for (; i < nsamps/4; i+=4){
+ //load from input
+ __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
+
+ //byteswap + unpack -> byteswap 32 bit words
+ tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
+ __m128i tmpilo = _mm_unpacklo_epi16(tmpi, tmpi);
+ __m128i tmpihi = _mm_unpackhi_epi16(tmpi, tmpi);
+
+ //convert and scale
+ __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
+ __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
+
+ //store to output
+ _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
+ _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
+ }
+
+ //convert remainder
+ for (; i < nsamps; i++){
+ output[i] = item32_to_fc32(uhd::byteswap(input[i]));
+ }
+}
+
+\#else
static UHD_INLINE void item32_to_fc32_bswap(
const item32_t *input, fc32_t *output, size_t nsamps
){
@@ -137,6 +209,8 @@ static UHD_INLINE void item32_to_fc32_bswap(
}
}
+\#endif
+
/***********************************************************************
* Sample-buffer converters
**********************************************************************/