diff options
author | Josh Blum <josh@joshknows.com> | 2010-06-22 15:41:59 -0700 |
---|---|---|
committer | Josh Blum <josh@joshknows.com> | 2010-06-28 11:12:19 -0700 |
commit | 45dc078069798d14f33518033e58d9aa7ace99f1 (patch) | |
tree | 34ffe74addb173141d88bcba37027547eeaba9c3 | |
parent | f0005a27c3c0ff3148bab53eefdafcad799f03cc (diff) | |
download | uhd-45dc078069798d14f33518033e58d9aa7ace99f1.tar.gz uhd-45dc078069798d14f33518033e58d9aa7ace99f1.tar.bz2 uhd-45dc078069798d14f33518033e58d9aa7ace99f1.zip |
uhd: implemented complex float <-> item32 conversion with sse2
-rw-r--r-- | host/lib/transport/CMakeLists.txt | 10 | ||||
-rwxr-xr-x | host/lib/transport/gen_convert_types.py | 74 |
2 files changed, 84 insertions, 0 deletions
diff --git a/host/lib/transport/CMakeLists.txt b/host/lib/transport/CMakeLists.txt index 872865d6c..a2bd17b01 100644 --- a/host/lib/transport/CMakeLists.txt +++ b/host/lib/transport/CMakeLists.txt @@ -18,6 +18,16 @@ #This file will be included by cmake, use absolute paths! ######################################################################## +# Check for SIMD headers +######################################################################## +INCLUDE(CheckIncludeFileCXX) +CHECK_INCLUDE_FILE_CXX(emmintrin.h HAVE_EMMINTRIN_H) + +IF(HAVE_EMMINTRIN_H) + ADD_DEFINITIONS(-DHAVE_EMMINTRIN_H) +ENDIF(HAVE_EMMINTRIN_H) + +######################################################################## # Setup defines for interface address discovery ######################################################################## MESSAGE(STATUS "Configuring interface address discovery...") diff --git a/host/lib/transport/gen_convert_types.py b/host/lib/transport/gen_convert_types.py index e81bf7330..6b87bf134 100755 --- a/host/lib/transport/gen_convert_types.py +++ b/host/lib/transport/gen_convert_types.py @@ -30,6 +30,9 @@ TMPL_TEXT = """ \#include <stdexcept> \#include <cstring> \#include <complex> +\#include <iostream> + +\#define USE_EMMINTRIN_H true \#ifdef BOOST_BIG_ENDIAN static const bool is_big_endian = true; @@ -101,6 +104,39 @@ static UHD_INLINE void fc32_to_item32_nswap( } } +\#if defined(HAVE_EMMINTRIN_H) && USE_EMMINTRIN_H +\#include <emmintrin.h> + +static UHD_INLINE void fc32_to_item32_bswap( + const fc32_t *input, item32_t *output, size_t nsamps +){ + __m128 scalar = _mm_set_ps1(shorts_per_float); + + //convert samples with intrinsics pairs at a time + size_t i = 0; for (; i < nsamps/4; i+=4){ + //load from input + __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); + __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); + + //convert and scale + __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); + __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); + + //pack + byteswap -> byteswap 32 bit words + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); + + //store to output + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = uhd::byteswap(fc32_to_item32(input[i])); + } +} + +\#else static UHD_INLINE void fc32_to_item32_bswap( const fc32_t *input, item32_t *output, size_t nsamps ){ @@ -109,6 +145,8 @@ static UHD_INLINE void fc32_to_item32_bswap( } } +\#endif + /*********************************************************************** * Convert items32 buffer to complex float **********************************************************************/ @@ -129,6 +167,40 @@ static UHD_INLINE void item32_to_fc32_nswap( } } +\#if defined(HAVE_EMMINTRIN_H) && USE_EMMINTRIN_H +\#include <emmintrin.h> + +static UHD_INLINE void item32_to_fc32_bswap( + const item32_t *input, fc32_t *output, size_t nsamps +){ + __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16)); + + //convert samples with intrinsics pairs at a time + size_t i = 0; for (; i < nsamps/4; i+=4){ + //load from input + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); + + //byteswap + unpack -> byteswap 32 bit words + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); + __m128i tmpilo = _mm_unpacklo_epi16(tmpi, tmpi); + __m128i tmpihi = _mm_unpackhi_epi16(tmpi, tmpi); + + //convert and scale + __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); + __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); + + //store to output + _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); + _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = item32_to_fc32(uhd::byteswap(input[i])); + } +} + +\#else static UHD_INLINE void item32_to_fc32_bswap( const item32_t *input, fc32_t *output, size_t nsamps ){ @@ -137,6 +209,8 @@ static UHD_INLINE void item32_to_fc32_bswap( } } +\#endif + /*********************************************************************** * Sample-buffer converters **********************************************************************/ |