diff options
Diffstat (limited to 'host/lib/transport')
-rw-r--r-- | host/lib/transport/CMakeLists.txt | 20 | ||||
-rw-r--r-- | host/lib/transport/convert_types_impl.hpp | 201 | ||||
-rwxr-xr-x | host/lib/transport/gen_convert_types.py | 122 | ||||
-rwxr-xr-x | host/lib/transport/gen_vrt.py | 23 | ||||
-rw-r--r-- | host/lib/transport/if_addrs.cpp | 6 | ||||
-rw-r--r-- | host/lib/transport/udp_zero_copy_asio.cpp | 9 |
6 files changed, 283 insertions, 98 deletions
diff --git a/host/lib/transport/CMakeLists.txt b/host/lib/transport/CMakeLists.txt index 872865d6c..70cf6312d 100644 --- a/host/lib/transport/CMakeLists.txt +++ b/host/lib/transport/CMakeLists.txt @@ -18,6 +18,16 @@ #This file will be included by cmake, use absolute paths! ######################################################################## +# Check for SIMD headers +######################################################################## +INCLUDE(CheckIncludeFileCXX) +CHECK_INCLUDE_FILE_CXX(emmintrin.h HAVE_EMMINTRIN_H) + +IF(HAVE_EMMINTRIN_H) + ADD_DEFINITIONS(-DHAVE_EMMINTRIN_H) +ENDIF(HAVE_EMMINTRIN_H) + +######################################################################## # Setup defines for interface address discovery ######################################################################## MESSAGE(STATUS "Configuring interface address discovery...") @@ -49,6 +59,16 @@ LIBUHD_PYTHON_GEN_SOURCE( ${CMAKE_BINARY_DIR}/lib/transport/convert_types.cpp ) +# append this directory to the include path so the generated convert types +# can include the implementation convert types file in the source directory +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/lib/transport) + +# make the generated convert types depend on the implementation header +SET_SOURCE_FILES_PROPERTIES( + ${CMAKE_BINARY_DIR}/lib/transport/convert_types.cpp PROPERTIES + OBJECT_DEPENDS ${CMAKE_SOURCE_DIR}/lib/transport/convert_types_impl.hpp +) + LIBUHD_APPEND_SOURCES( ${CMAKE_SOURCE_DIR}/lib/transport/if_addrs.cpp ${CMAKE_SOURCE_DIR}/lib/transport/udp_simple.cpp diff --git a/host/lib/transport/convert_types_impl.hpp b/host/lib/transport/convert_types_impl.hpp new file mode 100644 index 000000000..5958b08cb --- /dev/null +++ b/host/lib/transport/convert_types_impl.hpp @@ -0,0 +1,201 @@ +// +// Copyright 2010 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#ifndef INCLUDED_LIBUHD_TRANSPORT_CONVERT_TYPES_IMPL_HPP +#define INCLUDED_LIBUHD_TRANSPORT_CONVERT_TYPES_IMPL_HPP + +#include <uhd/config.hpp> +#include <uhd/utils/byteswap.hpp> +#include <boost/cstdint.hpp> +#include <cstring> +#include <complex> + +#ifdef HAVE_EMMINTRIN_H + #define USE_EMMINTRIN_H //use sse2 intrinsics +#endif + +/*********************************************************************** + * Typedefs + **********************************************************************/ +typedef std::complex<float> fc32_t; +typedef std::complex<boost::int16_t> sc16_t; +typedef boost::uint32_t item32_t; + +/*********************************************************************** + * Convert complex short buffer to items32 + **********************************************************************/ +static UHD_INLINE void sc16_to_item32_nswap( + const sc16_t *input, item32_t *output, size_t nsamps +){ + std::memcpy(output, input, nsamps*sizeof(item32_t)); +} + +static UHD_INLINE void sc16_to_item32_bswap( + const sc16_t *input, item32_t *output, size_t nsamps +){ + const item32_t *item32_input = (const item32_t *)input; + for (size_t i = 0; i < nsamps; i++){ + output[i] = uhd::byteswap(item32_input[i]); + } +} + +/*********************************************************************** + * Convert items32 buffer to complex short + **********************************************************************/ +static UHD_INLINE void item32_to_sc16_nswap( + const item32_t *input, sc16_t *output, size_t nsamps +){ + std::memcpy(output, input, nsamps*sizeof(item32_t)); +} + +static UHD_INLINE void item32_to_sc16_bswap( + const item32_t *input, sc16_t *output, size_t nsamps +){ + item32_t *item32_output = (item32_t *)output; + for (size_t i = 0; i < nsamps; i++){ + item32_output[i] = uhd::byteswap(input[i]); + } +} + +/*********************************************************************** + * Convert complex float buffer to items32 + **********************************************************************/ +static const float shorts_per_float = float(32767); + +static UHD_INLINE item32_t fc32_to_item32(fc32_t num){ + boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float); + boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float); + return (item32_t(real) << 16) | (item32_t(imag) << 0); +} + +static UHD_INLINE void fc32_to_item32_nswap( + const fc32_t *input, item32_t *output, size_t nsamps +){ + for (size_t i = 0; i < nsamps; i++){ + output[i] = fc32_to_item32(input[i]); + } +} + +#if defined(USE_EMMINTRIN_H) +#include <emmintrin.h> + +static UHD_INLINE void fc32_to_item32_bswap( + const fc32_t *input, item32_t *output, size_t nsamps +){ + __m128 scalar = _mm_set_ps1(shorts_per_float); + + //convert blocks of samples with intrinsics + size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ + //load from input + __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); + __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); + + //convert and scale + __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); + __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); + + //pack + byteswap -> byteswap 32 bit words + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); + + //store to output + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = uhd::byteswap(fc32_to_item32(input[i])); + } +} + +#else +static UHD_INLINE void fc32_to_item32_bswap( + const fc32_t *input, item32_t *output, size_t nsamps +){ + for (size_t i = 0; i < nsamps; i++){ + output[i] = uhd::byteswap(fc32_to_item32(input[i])); + } +} + +#endif + +/*********************************************************************** + * Convert items32 buffer to complex float + **********************************************************************/ +static const float floats_per_short = float(1.0/shorts_per_float); + +static UHD_INLINE fc32_t item32_to_fc32(item32_t item){ + return fc32_t( + float(boost::int16_t(item >> 16)*floats_per_short), + float(boost::int16_t(item >> 0)*floats_per_short) + ); +} + +static UHD_INLINE void item32_to_fc32_nswap( + const item32_t *input, fc32_t *output, size_t nsamps +){ + for (size_t i = 0; i < nsamps; i++){ + output[i] = item32_to_fc32(input[i]); + } +} + +#if defined(USE_EMMINTRIN_H) +#include <emmintrin.h> + +static UHD_INLINE void item32_to_fc32_bswap( + const item32_t *input, fc32_t *output, size_t nsamps +){ + __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16)); + __m128i zeroi = _mm_setzero_si128(); + + //convert blocks of samples with intrinsics + size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ + //load from input + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); + + //byteswap + unpack -> byteswap 32 bit words + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); + + //convert and scale + __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); + __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); + + //store to output + _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); + _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = item32_to_fc32(uhd::byteswap(input[i])); + } +} + +#else +static UHD_INLINE void item32_to_fc32_bswap( + const item32_t *input, fc32_t *output, size_t nsamps +){ + for (size_t i = 0; i < nsamps; i++){ + output[i] = item32_to_fc32(uhd::byteswap(input[i])); + } +} + +#endif + +#endif /* INCLUDED_LIBUHD_TRANSPORT_CONVERT_TYPES_IMPL_HPP */ diff --git a/host/lib/transport/gen_convert_types.py b/host/lib/transport/gen_convert_types.py index af2bcc7cb..951b634d9 100755 --- a/host/lib/transport/gen_convert_types.py +++ b/host/lib/transport/gen_convert_types.py @@ -24,66 +24,15 @@ TMPL_TEXT = """ \#include <uhd/config.hpp> \#include <uhd/transport/convert_types.hpp> -\#include <uhd/utils/byteswap.hpp> \#include <boost/cstdint.hpp> \#include <boost/detail/endian.hpp> \#include <stdexcept> -\#include <complex> - -//define the endian macros to convert integers -\#ifdef BOOST_BIG_ENDIAN - \#define BE_MACRO(x) x - \#define LE_MACRO(x) uhd::byteswap(x) - static const bool is_big_endian = true; -\#else - \#define BE_MACRO(x) uhd::byteswap(x) - \#define LE_MACRO(x) x - static const bool is_big_endian = false; -\#endif +\#include "convert_types_impl.hpp" using namespace uhd; /*********************************************************************** - * Constants - **********************************************************************/ -typedef std::complex<float> fc32_t; -typedef std::complex<boost::int16_t> sc16_t; -typedef boost::uint32_t item32_t; - -static const float shorts_per_float = float(32767); -static const float floats_per_short = float(1.0/shorts_per_float); - -/*********************************************************************** - * Single-sample converters - **********************************************************************/ -static UHD_INLINE item32_t sc16_to_item32(sc16_t num){ - boost::uint16_t real = boost::int16_t(num.real()); - boost::uint16_t imag = boost::int16_t(num.imag()); - return (item32_t(real) << 16) | (item32_t(imag) << 0); -} - -static UHD_INLINE sc16_t item32_to_sc16(item32_t item){ - return sc16_t( - boost::uint16_t(item >> 16), - boost::uint16_t(item >> 0) - ); -} - -static UHD_INLINE item32_t fc32_to_item32(fc32_t num){ - boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float); - boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float); - return (item32_t(real) << 16) | (item32_t(imag) << 0); -} - -static UHD_INLINE fc32_t item32_to_fc32(item32_t item){ - return fc32_t( - float(boost::int16_t(item >> 16)*floats_per_short), - float(boost::int16_t(item >> 0)*floats_per_short) - ); -} - -/*********************************************************************** - * Sample-buffer converters + * Generate predicate for jump table **********************************************************************/ UHD_INLINE boost::uint8_t get_pred( const io_type_t &io_type, @@ -92,27 +41,34 @@ UHD_INLINE boost::uint8_t get_pred( boost::uint8_t pred = 0; switch(otw_type.byteorder){ - case otw_type_t::BO_BIG_ENDIAN: pred |= $ph.be_p; break; - case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.le_p; break; - ##let the compiler determine the native byte order (we could use python sys.byteorder) - case otw_type_t::BO_NATIVE: pred |= (is_big_endian)? $ph.be_p : $ph.le_p; break; - default: throw std::runtime_error("unhandled byteorder type"); + \#ifdef BOOST_BIG_ENDIAN + case otw_type_t::BO_BIG_ENDIAN: pred |= $ph.nswap_p; break; + case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.bswap_p; break; + \#else + case otw_type_t::BO_BIG_ENDIAN: pred |= $ph.bswap_p; break; + case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.nswap_p; break; + \#endif + case otw_type_t::BO_NATIVE: pred |= $ph.nswap_p; break; + default: throw std::runtime_error("unhandled otw byteorder type"); } - switch(otw_type.width){ - case 16: pred |= $ph.w16_p; break; - default: throw std::runtime_error("unhandled bit width"); + switch(otw_type.get_sample_size()){ + case sizeof(boost::uint32_t): pred |= $ph.item32_p; break; + default: throw std::runtime_error("unhandled otw sample size"); } switch(io_type.tid){ - case io_type_t::COMPLEX_INT16: pred |= $ph.sc16_p; break; case io_type_t::COMPLEX_FLOAT32: pred |= $ph.fc32_p; break; + case io_type_t::COMPLEX_INT16: pred |= $ph.sc16_p; break; default: throw std::runtime_error("unhandled io type id"); } return pred; } +/*********************************************************************** + * Convert host type to device type + **********************************************************************/ void transport::convert_io_type_to_otw_type( const void *io_buff, const io_type_t &io_type, void *otw_buff, const otw_type_t &otw_type, @@ -123,16 +79,16 @@ void transport::convert_io_type_to_otw_type( case $pred: #set $out_type = $ph.get_dev_type($pred) #set $in_type = $ph.get_host_type($pred) - #set $converter = $in_type+"_to_"+$out_type - #set $xe_macro = $ph.get_xe_macro($pred) - for (size_t i = 0; i < num_samps; i++){ - (($(out_type)_t *)otw_buff)[i] = $(xe_macro)($(converter)(((const $(in_type)_t *)io_buff)[i])); - } + #set $converter = '_'.join([$in_type, 'to', $out_type, $ph.get_swap_type($pred)]) + $(converter)((const $(in_type)_t *)io_buff, ($(out_type)_t *)otw_buff, num_samps); break; #end for } } +/*********************************************************************** + * Convert device type to host type + **********************************************************************/ void transport::convert_otw_type_to_io_type( const void *otw_buff, const otw_type_t &otw_type, void *io_buff, const io_type_t &io_type, @@ -143,11 +99,8 @@ void transport::convert_otw_type_to_io_type( case $pred: #set $out_type = $ph.get_host_type($pred) #set $in_type = $ph.get_dev_type($pred) - #set $converter = $in_type+"_to_"+$out_type - #set $xe_macro = $ph.get_xe_macro($pred) - for (size_t i = 0; i < num_samps; i++){ - (($(out_type)_t *)io_buff)[i] = $(converter)($(xe_macro)(((const $(in_type)_t *)otw_buff)[i])); - } + #set $converter = '_'.join([$in_type, 'to', $out_type, $ph.get_swap_type($pred)]) + $(converter)((const $(in_type)_t *)otw_buff, ($(out_type)_t *)io_buff, num_samps); break; #end for } @@ -160,29 +113,32 @@ def parse_tmpl(_tmpl_text, **kwargs): return str(Template(_tmpl_text, kwargs)) class ph: - be_p = 0b00001 - le_p = 0b00000 - w16_p = 0b00000 - sc16_p = 0b00010 - fc32_p = 0b00000 + bswap_p = 0b00001 + nswap_p = 0b00000 + item32_p = 0b00000 + sc16_p = 0b00010 + fc32_p = 0b00000 nbits = 2 #see above @staticmethod - def get_xe_macro(pred): - if (pred & ph.be_p) == ph.be_p: return 'BE_MACRO' - if (pred & ph.le_p) == ph.le_p: return 'LE_MACRO' + def has(pred, flag): return (pred & flag) == flag + + @staticmethod + def get_swap_type(pred): + if ph.has(pred, ph.bswap_p): return 'bswap' + if ph.has(pred, ph.nswap_p): return 'nswap' raise NotImplementedError @staticmethod def get_dev_type(pred): - if (pred & ph.w16_p) == ph.w16_p: return 'item32' + if ph.has(pred, ph.item32_p): return 'item32' raise NotImplementedError @staticmethod def get_host_type(pred): - if (pred & ph.sc16_p) == ph.sc16_p: return 'sc16' - if (pred & ph.fc32_p) == ph.fc32_p: return 'fc32' + if ph.has(pred, ph.sc16_p): return 'sc16' + if ph.has(pred, ph.fc32_p): return 'fc32' raise NotImplementedError if __name__ == '__main__': diff --git a/host/lib/transport/gen_vrt.py b/host/lib/transport/gen_vrt.py index 6cdd6645d..8e0fce9ff 100755 --- a/host/lib/transport/gen_vrt.py +++ b/host/lib/transport/gen_vrt.py @@ -97,7 +97,7 @@ void vrt::pack_$(suffix)( #end if ########## Integer Time ########## #if $pred & $tsi_p - header_buff[$num_header_words] = $(XE_MACRO)(metadata.time_spec.secs); + header_buff[$num_header_words] = $(XE_MACRO)(boost::uint32_t(metadata.time_spec.get_full_secs())); #set $num_header_words += 1 #set $flags |= (0x3 << 22); #end if @@ -105,7 +105,7 @@ void vrt::pack_$(suffix)( #if $pred & $tsf_p header_buff[$num_header_words] = 0; #set $num_header_words += 1 - header_buff[$num_header_words] = $(XE_MACRO)(metadata.time_spec.get_ticks(tick_rate)); + header_buff[$num_header_words] = $(XE_MACRO)(boost::uint32_t(metadata.time_spec.get_tick_count(tick_rate))); #set $num_header_words += 1 #set $flags |= (0x1 << 20); #end if @@ -147,6 +147,7 @@ void vrt::unpack_$(suffix)( ){ //clear the metadata metadata = rx_metadata_t(); + boost::uint32_t secs = 0, ticks = 0; //extract vrt header boost::uint32_t vrt_hdr_word = $(XE_MACRO)(header_buff[0]); @@ -169,7 +170,7 @@ void vrt::unpack_$(suffix)( switch(pred){ #for $pred in range(2**5) case $pred: - #set $set_has_time_spec = False + #set $has_time_spec = False #set $num_header_words = 1 ########## Stream ID ########## #if $pred & $sid_p @@ -184,21 +185,21 @@ void vrt::unpack_$(suffix)( #end if ########## Integer Time ########## #if $pred & $tsi_p - metadata.has_time_spec = true; - #set $set_has_time_spec = True - metadata.time_spec.secs = $(XE_MACRO)(header_buff[$num_header_words]); + #set $has_time_spec = True + secs = $(XE_MACRO)(header_buff[$num_header_words]); #set $num_header_words += 1 #end if ########## Fractional Time ########## #if $pred & $tsf_p - #if not $set_has_time_spec - metadata.has_time_spec = true; - #set $set_has_time_spec = True - #end if + #set $has_time_spec = True #set $num_header_words += 1 - metadata.time_spec.set_ticks($(XE_MACRO)(header_buff[$num_header_words]), tick_rate); + ticks = $(XE_MACRO)(header_buff[$num_header_words]); #set $num_header_words += 1 #end if + #if $has_time_spec + metadata.has_time_spec = true; + metadata.time_spec = time_spec_t(secs, ticks, tick_rate); + #end if ########## Trailer ########## #if $pred & $tlr_p #set $num_trailer_words = 1; diff --git a/host/lib/transport/if_addrs.cpp b/host/lib/transport/if_addrs.cpp index 5c8c8a176..ad9a2325b 100644 --- a/host/lib/transport/if_addrs.cpp +++ b/host/lib/transport/if_addrs.cpp @@ -27,7 +27,7 @@ uhd::transport::if_addrs_t::if_addrs_t(void){ /*********************************************************************** * Interface address discovery through ifaddrs api **********************************************************************/ -#ifdef HAVE_IFADDRS_H +#if defined(HAVE_IFADDRS_H) #include <ifaddrs.h> static boost::asio::ip::address_v4 sockaddr_to_ip_addr(sockaddr *addr){ @@ -59,9 +59,9 @@ std::vector<uhd::transport::if_addrs_t> uhd::transport::get_if_addrs(void){ } /*********************************************************************** - * Interface address discovery through windows api (TODO) + * Interface address discovery through windows api **********************************************************************/ -#elif HAVE_WINSOCK2_H +#elif defined(HAVE_WINSOCK2_H) #include <winsock2.h> std::vector<uhd::transport::if_addrs_t> uhd::transport::get_if_addrs(void){ diff --git a/host/lib/transport/udp_zero_copy_asio.cpp b/host/lib/transport/udp_zero_copy_asio.cpp index c3c02707e..7f9292d24 100644 --- a/host/lib/transport/udp_zero_copy_asio.cpp +++ b/host/lib/transport/udp_zero_copy_asio.cpp @@ -27,7 +27,8 @@ using namespace uhd::transport; /*********************************************************************** * Constants **********************************************************************/ -static const size_t MIN_SOCK_BUFF_SIZE = size_t(100e3); +//enough buffering for half a second of samples at full rate on usrp2 +static const size_t MIN_SOCK_BUFF_SIZE = size_t(sizeof(boost::uint32_t) * 25e6 * 0.5); static const size_t MAX_DGRAM_SIZE = 1500; //assume max size on send and recv static const double RECV_TIMEOUT = 0.1; //100 ms @@ -159,6 +160,12 @@ template<typename Opt> static void resize_buff_helper( //otherwise, ensure that the buffer is at least the minimum size else if (udp_trans->get_buff_size<Opt>() < MIN_SOCK_BUFF_SIZE){ resize_buff_helper<Opt>(udp_trans, MIN_SOCK_BUFF_SIZE, name); + if (udp_trans->get_buff_size<Opt>() < MIN_SOCK_BUFF_SIZE){ + std::cerr << boost::format( + "Warning: the %s buffer size is smaller than the recommended size of %d bytes.\n" + " See the USRP2 application notes on buffer resizing." + ) % name % MIN_SOCK_BUFF_SIZE << std::endl; + } } } |