aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosh Blum <josh@joshknows.com>2010-08-24 17:26:07 -0700
committerJosh Blum <josh@joshknows.com>2010-08-24 17:26:07 -0700
commit8c872ffb2e4f24927b6ec9de825a31c5eda014b8 (patch)
tree400202c72cc21708838ce3f78baaeac0022a41e6
parent48ad3b734314bdec2128dacb20b09fd4cf1f5979 (diff)
downloaduhd-8c872ffb2e4f24927b6ec9de825a31c5eda014b8.tar.gz
uhd-8c872ffb2e4f24927b6ec9de825a31c5eda014b8.tar.bz2
uhd-8c872ffb2e4f24927b6ec9de825a31c5eda014b8.zip
uhd: convert types corrected for little endian, created SSE2 float/short conversion for no-swap case
-rw-r--r--host/lib/transport/convert_types_impl.hpp141
-rw-r--r--host/test/convert_types_test.cpp139
2 files changed, 202 insertions, 78 deletions
diff --git a/host/lib/transport/convert_types_impl.hpp b/host/lib/transport/convert_types_impl.hpp
index 5958b08cb..641029795 100644
--- a/host/lib/transport/convert_types_impl.hpp
+++ b/host/lib/transport/convert_types_impl.hpp
@@ -28,6 +28,13 @@
#define USE_EMMINTRIN_H //use sse2 intrinsics
#endif
+#if defined(USE_EMMINTRIN_H)
+ #include <emmintrin.h>
+#endif
+
+//! shortcut for a byteswap16 with casting
+#define BSWAP16_C(num) uhd::byteswap(boost::uint16_t(num))
+
/***********************************************************************
* Typedefs
**********************************************************************/
@@ -47,9 +54,10 @@ static UHD_INLINE void sc16_to_item32_nswap(
static UHD_INLINE void sc16_to_item32_bswap(
const sc16_t *input, item32_t *output, size_t nsamps
){
- const item32_t *item32_input = (const item32_t *)input;
for (size_t i = 0; i < nsamps; i++){
- output[i] = uhd::byteswap(item32_input[i]);
+ boost::uint16_t real = BSWAP16_C(input[i].real());
+ boost::uint16_t imag = BSWAP16_C(input[i].imag());
+ output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
}
}
@@ -65,34 +73,71 @@ static UHD_INLINE void item32_to_sc16_nswap(
static UHD_INLINE void item32_to_sc16_bswap(
const item32_t *input, sc16_t *output, size_t nsamps
){
- item32_t *item32_output = (item32_t *)output;
for (size_t i = 0; i < nsamps; i++){
- item32_output[i] = uhd::byteswap(input[i]);
+ boost::int16_t real = BSWAP16_C(input[i] >> 0);
+ boost::int16_t imag = BSWAP16_C(input[i] >> 16);
+ output[i] = sc16_t(real, imag);
}
}
/***********************************************************************
- * Convert complex float buffer to items32
+ * Convert complex float buffer to items32 (no swap)
**********************************************************************/
static const float shorts_per_float = float(32767);
-static UHD_INLINE item32_t fc32_to_item32(fc32_t num){
- boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float);
- boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float);
- return (item32_t(real) << 16) | (item32_t(imag) << 0);
+#define FC32_TO_SC16_C(num) boost::int16_t(num*shorts_per_float)
+
+////////////////////////////////////
+// none-swap
+////////////////////////////////////
+#if defined(USE_EMMINTRIN_H)
+static UHD_INLINE void fc32_to_item32_nswap(
+ const fc32_t *input, item32_t *output, size_t nsamps
+){
+ __m128 scalar = _mm_set_ps1(shorts_per_float);
+
+ //convert blocks of samples with intrinsics
+ size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
+ //load from input
+ __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
+ __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
+
+ //convert and scale
+ __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
+ __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
+
+ //pack
+ __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
+
+ //store to output
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
+ }
+
+ //convert remainder
+ for (; i < nsamps; i++){
+ boost::uint16_t real = FC32_TO_SC16_C(input[i].real());
+ boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag());
+ output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
+ }
}
+#else
static UHD_INLINE void fc32_to_item32_nswap(
const fc32_t *input, item32_t *output, size_t nsamps
){
for (size_t i = 0; i < nsamps; i++){
- output[i] = fc32_to_item32(input[i]);
+ boost::uint16_t real = FC32_TO_SC16_C(input[i].real());
+ boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag());
+ output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
}
}
-#if defined(USE_EMMINTRIN_H)
-#include <emmintrin.h>
+#endif
+////////////////////////////////////
+// byte-swap
+////////////////////////////////////
+#if defined(USE_EMMINTRIN_H)
static UHD_INLINE void fc32_to_item32_bswap(
const fc32_t *input, item32_t *output, size_t nsamps
){
@@ -108,7 +153,7 @@ static UHD_INLINE void fc32_to_item32_bswap(
__m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
__m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
- //pack + byteswap -> byteswap 32 bit words
+ //pack + byteswap -> byteswap 16 bit words
__m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
@@ -118,7 +163,9 @@ static UHD_INLINE void fc32_to_item32_bswap(
//convert remainder
for (; i < nsamps; i++){
- output[i] = uhd::byteswap(fc32_to_item32(input[i]));
+ boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real()));
+ boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag()));
+ output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
}
}
@@ -127,7 +174,9 @@ static UHD_INLINE void fc32_to_item32_bswap(
const fc32_t *input, item32_t *output, size_t nsamps
){
for (size_t i = 0; i < nsamps; i++){
- output[i] = uhd::byteswap(fc32_to_item32(input[i]));
+ boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real()));
+ boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag()));
+ output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
}
}
@@ -138,24 +187,60 @@ static UHD_INLINE void fc32_to_item32_bswap(
**********************************************************************/
static const float floats_per_short = float(1.0/shorts_per_float);
-static UHD_INLINE fc32_t item32_to_fc32(item32_t item){
- return fc32_t(
- float(boost::int16_t(item >> 16)*floats_per_short),
- float(boost::int16_t(item >> 0)*floats_per_short)
- );
+#define I16_TO_FC32_C(num) (boost::int16_t(num)*floats_per_short)
+
+////////////////////////////////////
+// none-swap
+////////////////////////////////////
+#if defined(USE_EMMINTRIN_H)
+static UHD_INLINE void item32_to_fc32_nswap(
+ const item32_t *input, fc32_t *output, size_t nsamps
+){
+ __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
+ __m128i zeroi = _mm_setzero_si128();
+
+ //convert blocks of samples with intrinsics
+ size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
+ //load from input
+ __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
+
+ //unpack
+ __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
+ __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
+
+ //convert and scale
+ __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
+ __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
+
+ //store to output
+ _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
+ _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
+ }
+
+ //convert remainder
+ for (; i < nsamps; i++){
+ float real = I16_TO_FC32_C(input[i] >> 0);
+ float imag = I16_TO_FC32_C(input[i] >> 16);
+ output[i] = fc32_t(real, imag);
+ }
}
+#else
static UHD_INLINE void item32_to_fc32_nswap(
const item32_t *input, fc32_t *output, size_t nsamps
){
for (size_t i = 0; i < nsamps; i++){
- output[i] = item32_to_fc32(input[i]);
+ float real = I16_TO_FC32_C(input[i] >> 0);
+ float imag = I16_TO_FC32_C(input[i] >> 16);
+ output[i] = fc32_t(real, imag);
}
}
+#endif
+////////////////////////////////////
+// byte-swap
+////////////////////////////////////
#if defined(USE_EMMINTRIN_H)
-#include <emmintrin.h>
-
static UHD_INLINE void item32_to_fc32_bswap(
const item32_t *input, fc32_t *output, size_t nsamps
){
@@ -167,7 +252,7 @@ static UHD_INLINE void item32_to_fc32_bswap(
//load from input
__m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
- //byteswap + unpack -> byteswap 32 bit words
+ //byteswap + unpack -> byteswap 16 bit words
tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
__m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
__m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
@@ -183,7 +268,9 @@ static UHD_INLINE void item32_to_fc32_bswap(
//convert remainder
for (; i < nsamps; i++){
- output[i] = item32_to_fc32(uhd::byteswap(input[i]));
+ float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0));
+ float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16));
+ output[i] = fc32_t(real, imag);
}
}
@@ -192,7 +279,9 @@ static UHD_INLINE void item32_to_fc32_bswap(
const item32_t *input, fc32_t *output, size_t nsamps
){
for (size_t i = 0; i < nsamps; i++){
- output[i] = item32_to_fc32(uhd::byteswap(input[i]));
+ float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0));
+ float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16));
+ output[i] = fc32_t(real, imag);
}
}
diff --git a/host/test/convert_types_test.cpp b/host/test/convert_types_test.cpp
index 1587be57f..d132a708b 100644
--- a/host/test/convert_types_test.cpp
+++ b/host/test/convert_types_test.cpp
@@ -17,109 +17,144 @@
#include <uhd/transport/convert_types.hpp>
#include <boost/test/unit_test.hpp>
+#include <boost/foreach.hpp>
#include <boost/cstdint.hpp>
+#include <boost/asio/buffer.hpp>
#include <complex>
+#include <vector>
+#include <cstdlib>
using namespace uhd;
-template <typename host_type, typename dev_type, size_t nsamps>
-void loopback(
+//typedefs for complex types
+typedef std::complex<boost::uint16_t> sc16_t;
+typedef std::complex<float> fc32_t;
+
+//extract pointer to POD since using &vector.front() throws in MSVC
+template <typename T> void * pod2ptr(T &pod){
+ return boost::asio::buffer_cast<void *>(boost::asio::buffer(pod));
+}
+template <typename T> const void * pod2ptr(const T &pod){
+ return boost::asio::buffer_cast<const void *>(boost::asio::buffer(pod));
+}
+
+/***********************************************************************
+ * Loopback runner:
+ * convert input buffer into intermediate buffer
+ * convert intermediate buffer into output buffer
+ **********************************************************************/
+template <typename Range> static void loopback(
+ size_t nsamps,
const io_type_t &io_type,
const otw_type_t &otw_type,
- const host_type *input,
- host_type *output
+ const Range &input,
+ Range &output
){
- dev_type dev[nsamps];
+ //item32 is largest device type
+ std::vector<boost::uint32_t> dev(nsamps);
//convert to dev type
transport::convert_io_type_to_otw_type(
- input, io_type,
- dev, otw_type,
+ pod2ptr(input), io_type,
+ pod2ptr(dev), otw_type,
nsamps
);
//convert back to host type
transport::convert_otw_type_to_io_type(
- dev, otw_type,
- output, io_type,
+ pod2ptr(dev), otw_type,
+ pod2ptr(output), io_type,
nsamps
);
}
-typedef std::complex<boost::uint16_t> sc16_t;
+/***********************************************************************
+ * Test short conversion
+ **********************************************************************/
+static void test_convert_types_sc16(
+ size_t nsamps,
+ const io_type_t &io_type,
+ const otw_type_t &otw_type
+){
+ //fill the input samples
+ std::vector<sc16_t> input(nsamps), output(nsamps);
+ BOOST_FOREACH(sc16_t &in, input) in = sc16_t(
+ std::rand()-(RAND_MAX/2),
+ std::rand()-(RAND_MAX/2)
+ );
-BOOST_AUTO_TEST_CASE(test_convert_types_be_sc16){
- sc16_t in_sc16[] = {
- sc16_t(0, -1234), sc16_t(4321, 1234),
- sc16_t(9876, -4567), sc16_t(8912, 0)
- }, out_sc16[4];
+ //run the loopback and test
+ loopback(nsamps, io_type, otw_type, input, output);
+ BOOST_CHECK_EQUAL_COLLECTIONS(input.begin(), input.end(), output.begin(), output.end());
+}
+BOOST_AUTO_TEST_CASE(test_convert_types_be_sc16){
io_type_t io_type(io_type_t::COMPLEX_INT16);
otw_type_t otw_type;
otw_type.byteorder = otw_type_t::BO_BIG_ENDIAN;
otw_type.width = 16;
- loopback<sc16_t, boost::uint32_t, 4>(io_type, otw_type, in_sc16, out_sc16);
- BOOST_CHECK_EQUAL_COLLECTIONS(in_sc16, in_sc16+4, out_sc16, out_sc16+4);
+ //try various lengths to test edge cases
+ for (size_t nsamps = 0; nsamps < 16; nsamps++){
+ test_convert_types_sc16(nsamps, io_type, otw_type);
+ }
}
BOOST_AUTO_TEST_CASE(test_convert_types_le_sc16){
- sc16_t in_sc16[] = {
- sc16_t(0, -1234), sc16_t(4321, 1234),
- sc16_t(9876, -4567), sc16_t(8912, 0)
- }, out_sc16[4];
-
io_type_t io_type(io_type_t::COMPLEX_INT16);
otw_type_t otw_type;
otw_type.byteorder = otw_type_t::BO_LITTLE_ENDIAN;
otw_type.width = 16;
- loopback<sc16_t, boost::uint32_t, 4>(io_type, otw_type, in_sc16, out_sc16);
- BOOST_CHECK_EQUAL_COLLECTIONS(in_sc16, in_sc16+4, out_sc16, out_sc16+4);
+ //try various lengths to test edge cases
+ for (size_t nsamps = 0; nsamps < 16; nsamps++){
+ test_convert_types_sc16(nsamps, io_type, otw_type);
+ }
}
-typedef std::complex<float> fc32_t;
-
-#define BOOST_CHECK_CLOSE_COMPLEX(a1, a2, p) \
- BOOST_CHECK_CLOSE(a1.real(), a2.real(), p); \
- BOOST_CHECK_CLOSE(a1.imag(), a2.imag(), p);
+/***********************************************************************
+ * Test float conversion
+ **********************************************************************/
+static void test_convert_types_fc32(
+ size_t nsamps,
+ const io_type_t &io_type,
+ const otw_type_t &otw_type
+){
+ //fill the input samples
+ std::vector<fc32_t> input(nsamps), output(nsamps);
+ BOOST_FOREACH(fc32_t &in, input) in = fc32_t(
+ (std::rand()/float(RAND_MAX/2)) - 1,
+ (std::rand()/float(RAND_MAX/2)) - 1
+ );
-static const float tolerance = float(0.1);
+ //run the loopback and test
+ loopback(nsamps, io_type, otw_type, input, output);
+ for (size_t i = 0; i < nsamps; i++){
+ BOOST_CHECK_CLOSE_FRACTION(input[i].real(), output[i].real(), float(0.01));
+ BOOST_CHECK_CLOSE_FRACTION(input[i].imag(), output[i].imag(), float(0.01));
+ }
+}
BOOST_AUTO_TEST_CASE(test_convert_types_be_fc32){
- fc32_t in_fc32[] = {
- fc32_t(float(0), float(-0.2)), fc32_t(float(0.03), float(-0.16)),
- fc32_t(float(1.0), float(.45)), fc32_t(float(0.09), float(0))
- }, out_fc32[4];
-
io_type_t io_type(io_type_t::COMPLEX_FLOAT32);
otw_type_t otw_type;
otw_type.byteorder = otw_type_t::BO_BIG_ENDIAN;
otw_type.width = 16;
- loopback<fc32_t, boost::uint32_t, 4>(io_type, otw_type, in_fc32, out_fc32);
-
- BOOST_CHECK_CLOSE_COMPLEX(in_fc32[0], out_fc32[0], tolerance);
- BOOST_CHECK_CLOSE_COMPLEX(in_fc32[1], out_fc32[1], tolerance);
- BOOST_CHECK_CLOSE_COMPLEX(in_fc32[2], out_fc32[2], tolerance);
- BOOST_CHECK_CLOSE_COMPLEX(in_fc32[3], out_fc32[3], tolerance);
+ //try various lengths to test edge cases
+ for (size_t nsamps = 0; nsamps < 16; nsamps++){
+ test_convert_types_fc32(nsamps, io_type, otw_type);
+ }
}
BOOST_AUTO_TEST_CASE(test_convert_types_le_fc32){
- fc32_t in_fc32[] = {
- fc32_t(float(0), float(-0.2)), fc32_t(float(0.03), float(-0.16)),
- fc32_t(float(1.0), float(.45)), fc32_t(float(0.09), float(0))
- }, out_fc32[4];
-
io_type_t io_type(io_type_t::COMPLEX_FLOAT32);
otw_type_t otw_type;
otw_type.byteorder = otw_type_t::BO_LITTLE_ENDIAN;
otw_type.width = 16;
- loopback<fc32_t, boost::uint32_t, 4>(io_type, otw_type, in_fc32, out_fc32);
-
- BOOST_CHECK_CLOSE_COMPLEX(in_fc32[0], out_fc32[0], tolerance);
- BOOST_CHECK_CLOSE_COMPLEX(in_fc32[1], out_fc32[1], tolerance);
- BOOST_CHECK_CLOSE_COMPLEX(in_fc32[2], out_fc32[2], tolerance);
- BOOST_CHECK_CLOSE_COMPLEX(in_fc32[3], out_fc32[3], tolerance);
+ //try various lengths to test edge cases
+ for (size_t nsamps = 0; nsamps < 16; nsamps++){
+ test_convert_types_fc32(nsamps, io_type, otw_type);
+ }
}