diff options
| author | Josh Blum <josh@joshknows.com> | 2010-08-24 17:26:07 -0700 | 
|---|---|---|
| committer | Josh Blum <josh@joshknows.com> | 2010-08-24 17:26:07 -0700 | 
| commit | 8c872ffb2e4f24927b6ec9de825a31c5eda014b8 (patch) | |
| tree | 400202c72cc21708838ce3f78baaeac0022a41e6 | |
| parent | 48ad3b734314bdec2128dacb20b09fd4cf1f5979 (diff) | |
| download | uhd-8c872ffb2e4f24927b6ec9de825a31c5eda014b8.tar.gz uhd-8c872ffb2e4f24927b6ec9de825a31c5eda014b8.tar.bz2 uhd-8c872ffb2e4f24927b6ec9de825a31c5eda014b8.zip | |
uhd: convert types corrected for little endian, created SSE2 float/short conversion for no-swap case
| -rw-r--r-- | host/lib/transport/convert_types_impl.hpp | 141 | ||||
| -rw-r--r-- | host/test/convert_types_test.cpp | 139 | 
2 files changed, 202 insertions, 78 deletions
| diff --git a/host/lib/transport/convert_types_impl.hpp b/host/lib/transport/convert_types_impl.hpp index 5958b08cb..641029795 100644 --- a/host/lib/transport/convert_types_impl.hpp +++ b/host/lib/transport/convert_types_impl.hpp @@ -28,6 +28,13 @@      #define USE_EMMINTRIN_H //use sse2 intrinsics  #endif +#if defined(USE_EMMINTRIN_H) +    #include <emmintrin.h> +#endif + +//! shortcut for a byteswap16 with casting +#define BSWAP16_C(num) uhd::byteswap(boost::uint16_t(num)) +  /***********************************************************************   * Typedefs   **********************************************************************/ @@ -47,9 +54,10 @@ static UHD_INLINE void sc16_to_item32_nswap(  static UHD_INLINE void sc16_to_item32_bswap(      const sc16_t *input, item32_t *output, size_t nsamps  ){ -    const item32_t *item32_input = (const item32_t *)input;      for (size_t i = 0; i < nsamps; i++){ -        output[i] = uhd::byteswap(item32_input[i]); +        boost::uint16_t real = BSWAP16_C(input[i].real()); +        boost::uint16_t imag = BSWAP16_C(input[i].imag()); +        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);      }  } @@ -65,34 +73,71 @@ static UHD_INLINE void item32_to_sc16_nswap(  static UHD_INLINE void item32_to_sc16_bswap(      const item32_t *input, sc16_t *output, size_t nsamps  ){ -    item32_t *item32_output = (item32_t *)output;      for (size_t i = 0; i < nsamps; i++){ -        item32_output[i] = uhd::byteswap(input[i]); +        boost::int16_t real = BSWAP16_C(input[i] >> 0); +        boost::int16_t imag = BSWAP16_C(input[i] >> 16); +        output[i] = sc16_t(real, imag);      }  }  /*********************************************************************** - * Convert complex float buffer to items32 + * Convert complex float buffer to items32 (no swap)   **********************************************************************/  static const float shorts_per_float = float(32767); -static UHD_INLINE item32_t fc32_to_item32(fc32_t num){ -    boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float); -    boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float); -    return (item32_t(real) << 16) | (item32_t(imag) << 0); +#define FC32_TO_SC16_C(num) boost::int16_t(num*shorts_per_float) + +//////////////////////////////////// +// none-swap +//////////////////////////////////// +#if defined(USE_EMMINTRIN_H) +static UHD_INLINE void fc32_to_item32_nswap( +    const fc32_t *input, item32_t *output, size_t nsamps +){ +    __m128 scalar = _mm_set_ps1(shorts_per_float); + +    //convert blocks of samples with intrinsics +    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ +        //load from input +        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); +        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); + +        //convert and scale +        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); +        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); + +        //pack +        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); + +        //store to output +        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); +    } + +    //convert remainder +    for (; i < nsamps; i++){ +        boost::uint16_t real = FC32_TO_SC16_C(input[i].real()); +        boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag()); +        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16); +    }  } +#else  static UHD_INLINE void fc32_to_item32_nswap(      const fc32_t *input, item32_t *output, size_t nsamps  ){      for (size_t i = 0; i < nsamps; i++){ -        output[i] = fc32_to_item32(input[i]); +        boost::uint16_t real = FC32_TO_SC16_C(input[i].real()); +        boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag()); +        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);      }  } -#if defined(USE_EMMINTRIN_H) -#include <emmintrin.h> +#endif +//////////////////////////////////// +// byte-swap +//////////////////////////////////// +#if defined(USE_EMMINTRIN_H)  static UHD_INLINE void fc32_to_item32_bswap(      const fc32_t *input, item32_t *output, size_t nsamps  ){ @@ -108,7 +153,7 @@ static UHD_INLINE void fc32_to_item32_bswap(          __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));          __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); -        //pack + byteswap -> byteswap 32 bit words +        //pack + byteswap -> byteswap 16 bit words          __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);          tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); @@ -118,7 +163,9 @@ static UHD_INLINE void fc32_to_item32_bswap(      //convert remainder      for (; i < nsamps; i++){ -        output[i] = uhd::byteswap(fc32_to_item32(input[i])); +        boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real())); +        boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag())); +        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);      }  } @@ -127,7 +174,9 @@ static UHD_INLINE void fc32_to_item32_bswap(      const fc32_t *input, item32_t *output, size_t nsamps  ){      for (size_t i = 0; i < nsamps; i++){ -        output[i] = uhd::byteswap(fc32_to_item32(input[i])); +        boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real())); +        boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag())); +        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);      }  } @@ -138,24 +187,60 @@ static UHD_INLINE void fc32_to_item32_bswap(   **********************************************************************/  static const float floats_per_short = float(1.0/shorts_per_float); -static UHD_INLINE fc32_t item32_to_fc32(item32_t item){ -    return fc32_t( -        float(boost::int16_t(item >> 16)*floats_per_short), -        float(boost::int16_t(item >> 0)*floats_per_short) -    ); +#define I16_TO_FC32_C(num) (boost::int16_t(num)*floats_per_short) + +//////////////////////////////////// +// none-swap +//////////////////////////////////// +#if defined(USE_EMMINTRIN_H) +static UHD_INLINE void item32_to_fc32_nswap( +    const item32_t *input, fc32_t *output, size_t nsamps +){ +    __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16)); +    __m128i zeroi = _mm_setzero_si128(); + +    //convert blocks of samples with intrinsics +    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ +        //load from input +        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); + +        //unpack +        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits +        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); + +        //convert and scale +        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); +        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); + +        //store to output +        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); +        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); +    } + +    //convert remainder +    for (; i < nsamps; i++){ +        float real = I16_TO_FC32_C(input[i] >> 0); +        float imag = I16_TO_FC32_C(input[i] >> 16); +        output[i] = fc32_t(real, imag); +    }  } +#else  static UHD_INLINE void item32_to_fc32_nswap(      const item32_t *input, fc32_t *output, size_t nsamps  ){      for (size_t i = 0; i < nsamps; i++){ -        output[i] = item32_to_fc32(input[i]); +        float real = I16_TO_FC32_C(input[i] >> 0); +        float imag = I16_TO_FC32_C(input[i] >> 16); +        output[i] = fc32_t(real, imag);      }  } +#endif +//////////////////////////////////// +// byte-swap +////////////////////////////////////  #if defined(USE_EMMINTRIN_H) -#include <emmintrin.h> -  static UHD_INLINE void item32_to_fc32_bswap(      const item32_t *input, fc32_t *output, size_t nsamps  ){ @@ -167,7 +252,7 @@ static UHD_INLINE void item32_to_fc32_bswap(          //load from input          __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); -        //byteswap + unpack -> byteswap 32 bit words +        //byteswap + unpack -> byteswap 16 bit words          tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));          __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits          __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); @@ -183,7 +268,9 @@ static UHD_INLINE void item32_to_fc32_bswap(      //convert remainder      for (; i < nsamps; i++){ -        output[i] = item32_to_fc32(uhd::byteswap(input[i])); +        float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0)); +        float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16)); +        output[i] = fc32_t(real, imag);      }  } @@ -192,7 +279,9 @@ static UHD_INLINE void item32_to_fc32_bswap(      const item32_t *input, fc32_t *output, size_t nsamps  ){      for (size_t i = 0; i < nsamps; i++){ -        output[i] = item32_to_fc32(uhd::byteswap(input[i])); +        float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0)); +        float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16)); +        output[i] = fc32_t(real, imag);      }  } diff --git a/host/test/convert_types_test.cpp b/host/test/convert_types_test.cpp index 1587be57f..d132a708b 100644 --- a/host/test/convert_types_test.cpp +++ b/host/test/convert_types_test.cpp @@ -17,109 +17,144 @@  #include <uhd/transport/convert_types.hpp>  #include <boost/test/unit_test.hpp> +#include <boost/foreach.hpp>  #include <boost/cstdint.hpp> +#include <boost/asio/buffer.hpp>  #include <complex> +#include <vector> +#include <cstdlib>  using namespace uhd; -template <typename host_type, typename dev_type, size_t nsamps> -void loopback( +//typedefs for complex types +typedef std::complex<boost::uint16_t> sc16_t; +typedef std::complex<float> fc32_t; + +//extract pointer to POD since using &vector.front() throws in MSVC +template <typename T> void * pod2ptr(T &pod){ +    return boost::asio::buffer_cast<void *>(boost::asio::buffer(pod)); +} +template <typename T> const void * pod2ptr(const T &pod){ +    return boost::asio::buffer_cast<const void *>(boost::asio::buffer(pod)); +} + +/*********************************************************************** + * Loopback runner: + *    convert input buffer into intermediate buffer + *    convert intermediate buffer into output buffer + **********************************************************************/ +template <typename Range> static void loopback( +    size_t nsamps,      const io_type_t &io_type,      const otw_type_t &otw_type, -    const host_type *input, -    host_type *output +    const Range &input, +    Range &output  ){ -    dev_type dev[nsamps]; +    //item32 is largest device type +    std::vector<boost::uint32_t> dev(nsamps);      //convert to dev type      transport::convert_io_type_to_otw_type( -        input, io_type, -        dev, otw_type, +        pod2ptr(input), io_type, +        pod2ptr(dev), otw_type,          nsamps      );      //convert back to host type      transport::convert_otw_type_to_io_type( -        dev, otw_type, -        output, io_type, +        pod2ptr(dev), otw_type, +        pod2ptr(output), io_type,          nsamps      );  } -typedef std::complex<boost::uint16_t> sc16_t; +/*********************************************************************** + * Test short conversion + **********************************************************************/ +static void test_convert_types_sc16( +    size_t nsamps, +    const io_type_t &io_type, +    const otw_type_t &otw_type +){ +    //fill the input samples +    std::vector<sc16_t> input(nsamps), output(nsamps); +    BOOST_FOREACH(sc16_t &in, input) in = sc16_t( +        std::rand()-(RAND_MAX/2), +        std::rand()-(RAND_MAX/2) +    ); -BOOST_AUTO_TEST_CASE(test_convert_types_be_sc16){ -    sc16_t in_sc16[] = { -        sc16_t(0, -1234), sc16_t(4321, 1234), -        sc16_t(9876, -4567), sc16_t(8912, 0) -    }, out_sc16[4]; +    //run the loopback and test +    loopback(nsamps, io_type, otw_type, input, output); +    BOOST_CHECK_EQUAL_COLLECTIONS(input.begin(), input.end(), output.begin(), output.end()); +} +BOOST_AUTO_TEST_CASE(test_convert_types_be_sc16){      io_type_t io_type(io_type_t::COMPLEX_INT16);      otw_type_t otw_type;      otw_type.byteorder = otw_type_t::BO_BIG_ENDIAN;      otw_type.width = 16; -    loopback<sc16_t, boost::uint32_t, 4>(io_type, otw_type, in_sc16, out_sc16); -    BOOST_CHECK_EQUAL_COLLECTIONS(in_sc16, in_sc16+4, out_sc16, out_sc16+4); +    //try various lengths to test edge cases +    for (size_t nsamps = 0; nsamps < 16; nsamps++){ +        test_convert_types_sc16(nsamps, io_type, otw_type); +    }  }  BOOST_AUTO_TEST_CASE(test_convert_types_le_sc16){ -    sc16_t in_sc16[] = { -        sc16_t(0, -1234), sc16_t(4321, 1234), -        sc16_t(9876, -4567), sc16_t(8912, 0) -    }, out_sc16[4]; -      io_type_t io_type(io_type_t::COMPLEX_INT16);      otw_type_t otw_type;      otw_type.byteorder = otw_type_t::BO_LITTLE_ENDIAN;      otw_type.width = 16; -    loopback<sc16_t, boost::uint32_t, 4>(io_type, otw_type, in_sc16, out_sc16); -    BOOST_CHECK_EQUAL_COLLECTIONS(in_sc16, in_sc16+4, out_sc16, out_sc16+4); +    //try various lengths to test edge cases +    for (size_t nsamps = 0; nsamps < 16; nsamps++){ +        test_convert_types_sc16(nsamps, io_type, otw_type); +    }  } -typedef std::complex<float> fc32_t; - -#define BOOST_CHECK_CLOSE_COMPLEX(a1, a2, p) \ -    BOOST_CHECK_CLOSE(a1.real(), a2.real(), p); \ -    BOOST_CHECK_CLOSE(a1.imag(), a2.imag(), p); +/*********************************************************************** + * Test float conversion + **********************************************************************/ +static void test_convert_types_fc32( +    size_t nsamps, +    const io_type_t &io_type, +    const otw_type_t &otw_type +){ +    //fill the input samples +    std::vector<fc32_t> input(nsamps), output(nsamps); +    BOOST_FOREACH(fc32_t &in, input) in = fc32_t( +        (std::rand()/float(RAND_MAX/2)) - 1, +        (std::rand()/float(RAND_MAX/2)) - 1 +    ); -static const float tolerance = float(0.1); +    //run the loopback and test +    loopback(nsamps, io_type, otw_type, input, output); +    for (size_t i = 0; i < nsamps; i++){ +        BOOST_CHECK_CLOSE_FRACTION(input[i].real(), output[i].real(), float(0.01)); +        BOOST_CHECK_CLOSE_FRACTION(input[i].imag(), output[i].imag(), float(0.01)); +    } +}  BOOST_AUTO_TEST_CASE(test_convert_types_be_fc32){ -    fc32_t in_fc32[] = { -        fc32_t(float(0), float(-0.2)), fc32_t(float(0.03), float(-0.16)), -        fc32_t(float(1.0), float(.45)), fc32_t(float(0.09), float(0)) -    }, out_fc32[4]; -      io_type_t io_type(io_type_t::COMPLEX_FLOAT32);      otw_type_t otw_type;      otw_type.byteorder = otw_type_t::BO_BIG_ENDIAN;      otw_type.width = 16; -    loopback<fc32_t, boost::uint32_t, 4>(io_type, otw_type, in_fc32, out_fc32); - -    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[0], out_fc32[0], tolerance); -    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[1], out_fc32[1], tolerance); -    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[2], out_fc32[2], tolerance); -    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[3], out_fc32[3], tolerance); +    //try various lengths to test edge cases +    for (size_t nsamps = 0; nsamps < 16; nsamps++){ +        test_convert_types_fc32(nsamps, io_type, otw_type); +    }  }  BOOST_AUTO_TEST_CASE(test_convert_types_le_fc32){ -    fc32_t in_fc32[] = { -        fc32_t(float(0), float(-0.2)), fc32_t(float(0.03), float(-0.16)), -        fc32_t(float(1.0), float(.45)), fc32_t(float(0.09), float(0)) -    }, out_fc32[4]; -      io_type_t io_type(io_type_t::COMPLEX_FLOAT32);      otw_type_t otw_type;      otw_type.byteorder = otw_type_t::BO_LITTLE_ENDIAN;      otw_type.width = 16; -    loopback<fc32_t, boost::uint32_t, 4>(io_type, otw_type, in_fc32, out_fc32); - -    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[0], out_fc32[0], tolerance); -    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[1], out_fc32[1], tolerance); -    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[2], out_fc32[2], tolerance); -    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[3], out_fc32[3], tolerance); +    //try various lengths to test edge cases +    for (size_t nsamps = 0; nsamps < 16; nsamps++){ +        test_convert_types_fc32(nsamps, io_type, otw_type); +    }  } | 
