summaryrefslogtreecommitdiffstats
path: root/host
diff options
context:
space:
mode:
authorJosh Blum <josh@joshknows.com>2010-06-26 00:33:37 -0700
committerJosh Blum <josh@joshknows.com>2010-06-28 11:12:19 -0700
commita094f2a6373552c74657c73d048a938bcdeb6907 (patch)
treedd6f9150d8bc8e999215f1ddabc9ece565f6ee7b /host
parenta9a47d0a69419e862657567a4228e8de0f4b8342 (diff)
downloaduhd-a094f2a6373552c74657c73d048a938bcdeb6907.tar.gz
uhd-a094f2a6373552c74657c73d048a938bcdeb6907.tar.bz2
uhd-a094f2a6373552c74657c73d048a938bcdeb6907.zip
uhd: moron alert, used incorrect bounds in simd loop, the remainder loop was doing 3/4 the work
Diffstat (limited to 'host')
-rw-r--r--host/lib/transport/convert_types_impl.hpp4
-rwxr-xr-xhost/lib/transport/gen_convert_types.py19
2 files changed, 11 insertions, 12 deletions
diff --git a/host/lib/transport/convert_types_impl.hpp b/host/lib/transport/convert_types_impl.hpp
index ca1be175c..5958b08cb 100644
--- a/host/lib/transport/convert_types_impl.hpp
+++ b/host/lib/transport/convert_types_impl.hpp
@@ -99,7 +99,7 @@ static UHD_INLINE void fc32_to_item32_bswap(
__m128 scalar = _mm_set_ps1(shorts_per_float);
//convert blocks of samples with intrinsics
- size_t i = 0; for (; i < nsamps/4; i+=4){
+ size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
//load from input
__m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
__m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
@@ -163,7 +163,7 @@ static UHD_INLINE void item32_to_fc32_bswap(
__m128i zeroi = _mm_setzero_si128();
//convert blocks of samples with intrinsics
- size_t i = 0; for (; i < nsamps/4; i+=4){
+ size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
//load from input
__m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
diff --git a/host/lib/transport/gen_convert_types.py b/host/lib/transport/gen_convert_types.py
index b37fe242b..951b634d9 100755
--- a/host/lib/transport/gen_convert_types.py
+++ b/host/lib/transport/gen_convert_types.py
@@ -29,12 +29,6 @@ TMPL_TEXT = """
\#include <stdexcept>
\#include "convert_types_impl.hpp"
-\#ifdef BOOST_BIG_ENDIAN
- static const bool is_big_endian = true;
-\#else
- static const bool is_big_endian = false;
-\#endif
-
using namespace uhd;
/***********************************************************************
@@ -47,15 +41,20 @@ UHD_INLINE boost::uint8_t get_pred(
boost::uint8_t pred = 0;
switch(otw_type.byteorder){
- case otw_type_t::BO_BIG_ENDIAN: pred |= (is_big_endian)? $ph.nswap_p : $ph.bswap_p; break;
- case otw_type_t::BO_LITTLE_ENDIAN: pred |= (is_big_endian)? $ph.bswap_p : $ph.nswap_p; break;
+ \#ifdef BOOST_BIG_ENDIAN
+ case otw_type_t::BO_BIG_ENDIAN: pred |= $ph.nswap_p; break;
+ case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.bswap_p; break;
+ \#else
+ case otw_type_t::BO_BIG_ENDIAN: pred |= $ph.bswap_p; break;
+ case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.nswap_p; break;
+ \#endif
case otw_type_t::BO_NATIVE: pred |= $ph.nswap_p; break;
- default: throw std::runtime_error("unhandled byteorder type");
+ default: throw std::runtime_error("unhandled otw byteorder type");
}
switch(otw_type.get_sample_size()){
case sizeof(boost::uint32_t): pred |= $ph.item32_p; break;
- default: throw std::runtime_error("unhandled bit width");
+ default: throw std::runtime_error("unhandled otw sample size");
}
switch(io_type.tid){