diff options
-rw-r--r-- | host/examples/CMakeLists.txt | 2 | ||||
-rw-r--r-- | host/examples/benchmark_rate.cpp | 236 | ||||
-rw-r--r-- | host/examples/benchmark_rx_rate.cpp | 167 | ||||
-rw-r--r-- | host/lib/convert/convert_with_sse2.cpp | 204 | ||||
-rw-r--r-- | host/lib/transport/libusb1_zero_copy.cpp | 5 | ||||
-rw-r--r-- | host/lib/usrp/usrp2/io_impl.cpp | 5 | ||||
-rwxr-xr-x | host/utils/usrp2_card_burner.py | 28 |
7 files changed, 378 insertions, 269 deletions
diff --git a/host/examples/CMakeLists.txt b/host/examples/CMakeLists.txt index fe9e6409e..10d1fddc3 100644 --- a/host/examples/CMakeLists.txt +++ b/host/examples/CMakeLists.txt @@ -19,7 +19,7 @@ # example applications ######################################################################## SET(example_sources - benchmark_rx_rate.cpp + benchmark_rate.cpp rx_multi_samples.cpp rx_samples_to_file.cpp rx_samples_to_udp.cpp diff --git a/host/examples/benchmark_rate.cpp b/host/examples/benchmark_rate.cpp new file mode 100644 index 000000000..6927b512b --- /dev/null +++ b/host/examples/benchmark_rate.cpp @@ -0,0 +1,236 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#include <uhd/utils/thread_priority.hpp> +#include <uhd/utils/safe_main.hpp> +#include <uhd/usrp/multi_usrp.hpp> +#include <boost/program_options.hpp> +#include <boost/format.hpp> +#include <boost/thread/thread.hpp> +#include <boost/math/special_functions/round.hpp> +#include <iostream> +#include <complex> + +namespace po = boost::program_options; + +unsigned long long num_overflows = 0; +unsigned long long num_underflows = 0; +unsigned long long num_rx_samps = 0; +unsigned long long num_tx_samps = 0; +unsigned long long num_dropped_samps = 0; + +/*********************************************************************** + * Benchmark RX Rate + **********************************************************************/ +void benchmark_rx_rate(uhd::usrp::multi_usrp::sptr usrp){ + uhd::set_thread_priority_safe(); + + //print pre-test summary + std::cout << boost::format( + "Testing receive rate %f Msps" + ) % (usrp->get_rx_rate()/1e6) << std::endl; + + //setup variables and allocate buffer + uhd::rx_metadata_t md; + const size_t max_samps_per_packet = usrp->get_device()->get_max_recv_samps_per_packet(); + std::vector<std::complex<float> > buff(max_samps_per_packet); + bool had_an_overflow = false; + uhd::time_spec_t last_time; + const double rate = usrp->get_rx_rate(); + + usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_START_CONTINUOUS); + while (not boost::this_thread::interruption_requested()){ + num_rx_samps += usrp->get_device()->recv( + &buff.front(), buff.size(), md, + uhd::io_type_t::COMPLEX_FLOAT32, + uhd::device::RECV_MODE_ONE_PACKET + ); + + //handle the error codes + switch(md.error_code){ + case uhd::rx_metadata_t::ERROR_CODE_NONE: + if (had_an_overflow){ + had_an_overflow = false; + num_dropped_samps += boost::math::iround((md.time_spec - last_time).get_real_secs()*rate); + } + break; + + case uhd::rx_metadata_t::ERROR_CODE_OVERFLOW: + had_an_overflow = true; + last_time = md.time_spec; + num_overflows++; + break; + + default: + std::cerr << "Error code: " << md.error_code << std::endl; + std::cerr << "Unexpected error on recv, exit test..." << std::endl; + goto loop_done; + } + + } loop_done: + usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_STOP_CONTINUOUS); +} + +/*********************************************************************** + * Benchmark TX Rate + **********************************************************************/ +void benchmark_tx_rate(uhd::usrp::multi_usrp::sptr usrp){ + uhd::set_thread_priority_safe(); + + //print pre-test summary + std::cout << boost::format( + "Testing transmit rate %f Msps" + ) % (usrp->get_tx_rate()/1e6) << std::endl; + + //setup variables and allocate buffer + uhd::tx_metadata_t md; + md.has_time_spec = false; + const size_t max_samps_per_packet = usrp->get_device()->get_max_send_samps_per_packet(); + std::vector<std::complex<float> > buff(max_samps_per_packet); + + while (not boost::this_thread::interruption_requested()){ + num_tx_samps += usrp->get_device()->send( + &buff.front(), buff.size(), md, + uhd::io_type_t::COMPLEX_FLOAT32, + uhd::device::SEND_MODE_ONE_PACKET + ); + } + + //send a mini EOB packet + md.end_of_burst = true; + usrp->get_device()->send("", 0, md, + uhd::io_type_t::COMPLEX_FLOAT32, + uhd::device::SEND_MODE_FULL_BUFF + ); +} + +void benchmark_tx_rate_async_helper(uhd::usrp::multi_usrp::sptr usrp){ + //setup variables and allocate buffer + uhd::async_metadata_t async_md; + + while (true){ + + if (not usrp->get_device()->recv_async_msg(async_md)){ + if (boost::this_thread::interruption_requested()) return; + } + + //handle the error codes + switch(async_md.event_code){ + case uhd::async_metadata_t::EVENT_CODE_BURST_ACK: + return; + + case uhd::async_metadata_t::EVENT_CODE_UNDERFLOW: + case uhd::async_metadata_t::EVENT_CODE_UNDERFLOW_IN_PACKET: + num_underflows++; + break; + + default: + std::cerr << "Event code: " << async_md.event_code << std::endl; + std::cerr << "Unexpected event on async recv, exit test..." << std::endl; + return; + } + } +} + +/*********************************************************************** + * Main code + dispatcher + **********************************************************************/ +int UHD_SAFE_MAIN(int argc, char *argv[]){ + + //variables to be set by po + std::string args; + double duration; + double rx_rate, tx_rate; + + //setup the program options + po::options_description desc("Allowed options"); + desc.add_options() + ("help", "help message") + ("args", po::value<std::string>(&args)->default_value(""), "single uhd device address args") + ("duration", po::value<double>(&duration)->default_value(10.0), "duration for the test in seconds") + ("rx_rate", po::value<double>(&rx_rate), "specify to perform a RX rate test (sps)") + ("tx_rate", po::value<double>(&tx_rate), "specify to perform a TX rate test (sps)") + ; + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + //print the help message + if (vm.count("help")){ + std::cout << boost::format("UHD Benchmark Rate %s") % desc << std::endl; + std::cout << + " Specify --rx_rate for a receive-only test.\n" + " Specify --tx_rate for a transmit-only test.\n" + " Specify both options for a full-duplex test.\n" + << std::endl; + return ~0; + } + + //create a usrp device + std::cout << std::endl; + uhd::device_addrs_t device_addrs = uhd::device::find(args); + if (device_addrs.empty()){ + std::cerr << "Could not find any devices for: " << args << std::endl; + return ~0; + } + if (device_addrs.at(0).get("type", "") == "usrp1"){ + std::cerr << "*** Warning! ***" << std::endl; + std::cerr << "Benchmark results will be inaccurate on USRP1 due to insufficient features.\n" << std::endl; + } + std::cout << boost::format("Creating the usrp device with: %s...") % args << std::endl; + uhd::usrp::multi_usrp::sptr usrp = uhd::usrp::multi_usrp::make(device_addrs.at(0)); + std::cout << boost::format("Using Device: %s") % usrp->get_pp_string() << std::endl; + + boost::thread_group thread_group; + + //spawn the receive test thread + if (vm.count("rx_rate")){ + usrp->set_rx_rate(rx_rate); + thread_group.create_thread(boost::bind(&benchmark_rx_rate, usrp)); + } + + //spawn the transmit test thread + if (vm.count("tx_rate")){ + usrp->set_tx_rate(tx_rate); + thread_group.create_thread(boost::bind(&benchmark_tx_rate, usrp)); + thread_group.create_thread(boost::bind(&benchmark_tx_rate_async_helper, usrp)); + } + + //sleep for the required duration + const long secs = long(duration); + const long usecs = long((duration - secs)*1e6); + boost::this_thread::sleep(boost::posix_time::seconds(secs) + boost::posix_time::microseconds(usecs)); + + //interrupt and join the threads + thread_group.interrupt_all(); + thread_group.join_all(); + + //print summary + std::cout << std::endl << boost::format( + "Benchmark rate summary:\n" + " Num received samples: %u\n" + " Num dropped samples: %u\n" + " Num overflows detected: %u\n" + " Num transmitted samples: %u\n" + " Num underflows detected: %u\n" + ) % num_rx_samps % num_dropped_samps % num_overflows % num_tx_samps % num_underflows << std::endl; + + //finished + std::cout << std::endl << "Done!" << std::endl << std::endl; + + return 0; +} diff --git a/host/examples/benchmark_rx_rate.cpp b/host/examples/benchmark_rx_rate.cpp deleted file mode 100644 index 50af1b98b..000000000 --- a/host/examples/benchmark_rx_rate.cpp +++ /dev/null @@ -1,167 +0,0 @@ -// -// Copyright 2010-2011 Ettus Research LLC -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see <http://www.gnu.org/licenses/>. -// - -#include <uhd/utils/thread_priority.hpp> -#include <uhd/utils/safe_main.hpp> -#include <uhd/usrp/multi_usrp.hpp> -#include <boost/math/special_functions/round.hpp> -#include <boost/program_options.hpp> -#include <boost/format.hpp> -#include <iostream> -#include <complex> - -namespace po = boost::program_options; - -static inline void test_device( - uhd::usrp::multi_usrp::sptr usrp, - double rx_rate_sps, - double duration_secs -){ - const size_t max_samps_per_packet = usrp->get_device()->get_max_recv_samps_per_packet(); - std::cout << boost::format("Testing receive rate %f Msps (%f second run)") % (rx_rate_sps/1e6) % duration_secs << std::endl; - - //allocate recv buffer and metatdata - uhd::rx_metadata_t md; - std::vector<std::complex<float> > buff(max_samps_per_packet); - - //flush the buffers in the recv path - while(usrp->get_device()->recv( - &buff.front(), buff.size(), md, - uhd::io_type_t::COMPLEX_FLOAT32, - uhd::device::RECV_MODE_ONE_PACKET - )){ - /* NOP */ - }; - - //declare status variables - bool got_first_packet = false; - size_t total_recv_packets = 0; - size_t total_lost_samples = 0; - size_t total_recv_samples = 0; - uhd::time_spec_t initial_time_spec; - uhd::time_spec_t next_expected_time_spec; - - usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_START_CONTINUOUS); - do { - size_t num_rx_samps = usrp->get_device()->recv( - &buff.front(), buff.size(), md, - uhd::io_type_t::COMPLEX_FLOAT32, - uhd::device::RECV_MODE_ONE_PACKET - ); - - //handle the error codes - switch(md.error_code){ - case uhd::rx_metadata_t::ERROR_CODE_NONE: - case uhd::rx_metadata_t::ERROR_CODE_OVERFLOW: - break; - - default: - std::cerr << "Error code: " << md.error_code << std::endl; - std::cerr << "Unexpected error on recv, exit test..." << std::endl; - return; - } - - if (not md.has_time_spec){ - std::cerr << "Metadata missing time spec, exit test..." << std::endl; - return; - } - - total_recv_samples += num_rx_samps; - total_recv_packets++; - - if (not got_first_packet){ - initial_time_spec = md.time_spec; - next_expected_time_spec = initial_time_spec; - got_first_packet = true; - } - - double approx_lost_samps = rx_rate_sps*(md.time_spec - next_expected_time_spec).get_real_secs(); - total_lost_samples += std::max(0, boost::math::iround(approx_lost_samps)); - next_expected_time_spec = md.time_spec + uhd::time_spec_t(0, num_rx_samps, rx_rate_sps); - - } while((next_expected_time_spec - initial_time_spec) < uhd::time_spec_t(duration_secs)); - usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_STOP_CONTINUOUS); - - //print a summary - std::cout << std::endl; //go to newline, recv may spew SXSYSZ... - std::cout << boost::format(" Received packets: %d") % total_recv_packets << std::endl; - std::cout << boost::format(" Received samples: %d") % total_recv_samples << std::endl; - std::cout << boost::format(" Lost samples: %d") % total_lost_samples << std::endl; - size_t packets_lost = boost::math::iround(double(total_lost_samples)/max_samps_per_packet); - std::cout << boost::format(" Lost packets: %d (approximate)") % packets_lost << std::endl; - double actual_rx_rate_sps = (total_recv_samples*rx_rate_sps)/(total_recv_samples+total_lost_samples); - std::cout << boost::format(" Sustained receive rate: %f Msps") % (actual_rx_rate_sps/1e6) << std::endl; - std::cout << std::endl << std::endl; -} - -int UHD_SAFE_MAIN(int argc, char *argv[]){ - uhd::set_thread_priority_safe(); - - //variables to be set by po - std::string args; - double duration; - double rate; - - //setup the program options - po::options_description desc("Allowed options"); - desc.add_options() - ("help", "help message") - ("args", po::value<std::string>(&args)->default_value(""), "single uhd device address args") - ("duration", po::value<double>(&duration)->default_value(10.0), "duration for each test in seconds") - ("rate", po::value<double>(&rate), "specify to perform a single test as this rate (sps)") - ; - po::variables_map vm; - po::store(po::parse_command_line(argc, argv, desc), vm); - po::notify(vm); - - //print the help message - if (vm.count("help")){ - std::cout << boost::format("UHD Benchmark RX Rate %s") % desc << std::endl; - return ~0; - } - - //verify that rate was specified - if (not vm.count("rate")){ - std::cerr << "Please specify the sample rate with --rate" << std::endl; - return ~0; - } - - //create a usrp device - std::cout << std::endl; - uhd::device_addrs_t device_addrs = uhd::device::find(args); - if (device_addrs.empty()){ - std::cerr << "Could not find any devices for: " << args << std::endl; - return ~0; - } - if (device_addrs.at(0).get("type", "") == "usrp1"){ - std::cerr << "*** Warning! ***" << std::endl; - std::cerr << "Benchmark RX results will be inaccurate on USRP1 due to soft-time control.\n" << std::endl; - } - std::cout << boost::format("Creating the usrp device with: %s...") % args << std::endl; - uhd::usrp::multi_usrp::sptr usrp = uhd::usrp::multi_usrp::make(device_addrs.at(0)); - std::cout << boost::format("Using Device: %s") % usrp->get_pp_string() << std::endl; - - //start the test - usrp->set_rx_rate(rate); - rate = usrp->get_rx_rate(); - test_device(usrp, rate, duration); - - //finished - std::cout << std::endl << "Done!" << std::endl << std::endl; - - return 0; -} diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp index 52beea24a..9772028dc 100644 --- a/host/lib/convert/convert_with_sse2.cpp +++ b/host/lib/convert/convert_with_sse2.cpp @@ -25,25 +25,37 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){ const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); - __m128 scalar = _mm_set_ps1(float(scale_factor)); - - //convert blocks of samples with intrinsics - size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ - //load from input - __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); - __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); - - //convert and scale - __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); - __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); - - //pack + swap 16-bit pairs - __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); - tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); - tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); - - //store to output - _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); + const __m128 scalar = _mm_set_ps1(float(scale_factor)); + + #define convert_fc32_1_to_item32_1_nswap_guts(_al_) \ + for (; i < (nsamps & ~0x3); i+=4){ \ + /* load from input */ \ + __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \ + __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \ + \ + /* convert and scale */ \ + __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \ + __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \ + \ + /* pack + swap 16-bit pairs */ \ + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + \ + /* store to output */ \ + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(input) & 0xf){ + case 0x8: + output[i] = fc32_to_item32(input[i], float(scale_factor)); i++; + case 0x0: + convert_fc32_1_to_item32_1_nswap_guts() + break; + default: convert_fc32_1_to_item32_1_nswap_guts(u) } //convert remainder @@ -56,24 +68,36 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){ const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); - __m128 scalar = _mm_set_ps1(float(scale_factor)); - - //convert blocks of samples with intrinsics - size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ - //load from input - __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); - __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); - - //convert and scale - __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); - __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); - - //pack + byteswap -> byteswap 16 bit words - __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); - tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); - - //store to output - _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); + const __m128 scalar = _mm_set_ps1(float(scale_factor)); + + #define convert_fc32_1_to_item32_1_bswap_guts(_al_) \ + for (; i < (nsamps & ~0x3); i+=4){ \ + /* load from input */ \ + __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \ + __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \ + \ + /* convert and scale */ \ + __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \ + __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \ + \ + /* pack + byteswap -> byteswap 16 bit words */ \ + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ + \ + /* store to output */ \ + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(input) & 0xf){ + case 0x8: + output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor))); i++; + case 0x0: + convert_fc32_1_to_item32_1_bswap_guts() + break; + default: convert_fc32_1_to_item32_1_bswap_guts(u) } //convert remainder @@ -86,27 +110,39 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){ const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); - __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); - __m128i zeroi = _mm_setzero_si128(); - - //convert blocks of samples with intrinsics - size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ - //load from input - __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); - - //unpack + swap 16-bit pairs - tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); - tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); - __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits - __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); - - //convert and scale - __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); - __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); - - //store to output - _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); - _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); + const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); + const __m128i zeroi = _mm_setzero_si128(); + + #define convert_item32_1_to_fc32_1_nswap_guts(_al_) \ + for (; i < (nsamps & ~0x3); i+=4){ \ + /* load from input */ \ + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ + \ + /* unpack + swap 16-bit pairs */ \ + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ + \ + /* convert and scale */ \ + __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \ + __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \ + \ + /* store to output */ \ + _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \ + _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(output) & 0xf){ + case 0x8: + output[i] = item32_to_fc32(input[i], float(scale_factor)); i++; + case 0x0: + convert_item32_1_to_fc32_1_nswap_guts() + break; + default: convert_item32_1_to_fc32_1_nswap_guts(u) } //convert remainder @@ -119,26 +155,38 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){ const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); - __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); - __m128i zeroi = _mm_setzero_si128(); - - //convert blocks of samples with intrinsics - size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ - //load from input - __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); - - //byteswap + unpack -> byteswap 16 bit words - tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); - __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits - __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); - - //convert and scale - __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); - __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); - - //store to output - _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); - _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); + const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); + const __m128i zeroi = _mm_setzero_si128(); + + #define convert_item32_1_to_fc32_1_bswap_guts(_al_) \ + for (; i < (nsamps & ~0x3); i+=4){ \ + /* load from input */ \ + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ + \ + /* byteswap + unpack -> byteswap 16 bit words */ \ + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ + \ + /* convert and scale */ \ + __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \ + __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \ + \ + /* store to output */ \ + _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \ + _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(output) & 0xf){ + case 0x8: + output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++; + case 0x0: + convert_item32_1_to_fc32_1_bswap_guts() + break; + default: convert_item32_1_to_fc32_1_bswap_guts(u) } //convert remainder diff --git a/host/lib/transport/libusb1_zero_copy.cpp b/host/lib/transport/libusb1_zero_copy.cpp index 19a7a3742..f781f890d 100644 --- a/host/lib/transport/libusb1_zero_copy.cpp +++ b/host/lib/transport/libusb1_zero_copy.cpp @@ -222,7 +222,6 @@ public: } } //shutdown the threads - _threads_running = false; _thread_group.interrupt_all(); _thread_group.join_all(); } @@ -277,15 +276,13 @@ private: //! event handler threads boost::thread_group _thread_group; - bool _threads_running; void run_event_loop(boost::barrier &spawn_barrier){ - _threads_running = true; spawn_barrier.wait(); set_thread_priority_safe(); libusb_context *context = libusb::session::get_global_session()->get_context(); try{ - while(_threads_running){ + while (not boost::this_thread::interruption_requested()){ timeval tv; tv.tv_sec = 0; tv.tv_usec = 100000; //100ms diff --git a/host/lib/usrp/usrp2/io_impl.cpp b/host/lib/usrp/usrp2/io_impl.cpp index ffe9a88e7..df452942c 100644 --- a/host/lib/usrp/usrp2/io_impl.cpp +++ b/host/lib/usrp/usrp2/io_impl.cpp @@ -146,7 +146,6 @@ struct usrp2_impl::io_impl{ } ~io_impl(void){ - recv_pirate_crew_raiding = false; recv_pirate_crew.interrupt_all(); recv_pirate_crew.join_all(); } @@ -185,7 +184,6 @@ struct usrp2_impl::io_impl{ //methods and variables for the pirate crew void recv_pirate_loop(boost::barrier &, usrp2_mboard_impl::sptr, zero_copy_if::sptr, size_t); boost::thread_group recv_pirate_crew; - bool recv_pirate_crew_raiding; bounded_buffer<async_metadata_t> async_msg_fifo; }; @@ -201,14 +199,13 @@ void usrp2_impl::io_impl::recv_pirate_loop( zero_copy_if::sptr err_xport, size_t index ){ - recv_pirate_crew_raiding = true; spawn_barrier.wait(); set_thread_priority_safe(); //store a reference to the flow control monitor (offset by max dsps) flow_control_monitor &fc_mon = *(this->fc_mons[index*usrp2_mboard_impl::MAX_NUM_DSPS]); - while(recv_pirate_crew_raiding){ + while (not boost::this_thread::interruption_requested()){ managed_recv_buffer::sptr buff = err_xport->get_recv_buff(); if (not buff.get()) continue; //ignore timeout/error buffers diff --git a/host/utils/usrp2_card_burner.py b/host/utils/usrp2_card_burner.py index 26adb91c7..43689dd20 100755 --- a/host/utils/usrp2_card_burner.py +++ b/host/utils/usrp2_card_burner.py @@ -50,7 +50,7 @@ def command(*args): stderr=subprocess.STDOUT, ) ret = p.wait() - verbose = p.stdout.read().decode('ascii') + verbose = p.stdout.read().decode() if ret != 0: raise Exception(verbose) return verbose @@ -92,12 +92,12 @@ def get_raw_device_hints(): if in_info: info += '\n'+line.strip() def is_info_valid(info): try: - assert 'link to' in info + if 'link to' not in info: return False #handles two spellings of remov(e)able: - assert 'remov' in info.lower() - if 'size is' in info: assert int(extract_info_value(info, 'size is')) <= MAX_SD_CARD_SIZE - return True + if 'remov' not in info.lower(): return False + if 'size is' in info and int(extract_info_value(info, 'size is')) > MAX_SD_CARD_SIZE: return False except: return False + return True def extract_info_name(info): for key in ('Mounted on', 'link to'): if key in info: return extract_info_value(info, key) @@ -110,13 +110,11 @@ def get_raw_device_hints(): #################################################################### if platform.system() == 'Linux': devs = list() - try: output = open('/proc/partitions', 'r').read().decode('ascii') - except: return devs - for line in output.splitlines(): + for line in command('cat', '/proc/partitions').splitlines(): try: major, minor, blocks, name = line.split() - assert not name[-1].isdigit() or int(minor) == 0 - assert int(blocks)*1024 <= MAX_SD_CARD_SIZE + if not name[-1].isdigit() and int(minor) == 0: continue + if int(blocks)*1024 > MAX_SD_CARD_SIZE: continue except: continue devs.append(os.path.join('/dev', name)) @@ -128,17 +126,17 @@ def get_raw_device_hints(): if platform.system() == 'Darwin': devs = [d.split()[0] for d in [l for l in command('diskutil', 'list').splitlines() if l.startswith('/dev')]] def output_to_info(output): - return dict([list(map(str.strip, pair.lower().split(':'))) for pair in [l for l in output.splitlines() if ':' in l]]) + return dict([list(map(lambda x: x.strip(), pair.lower().split(':'))) for pair in [l for l in output.splitlines() if ':' in l]]) def is_dev_valid(dev): info = output_to_info(command('diskutil', 'info', dev)) try: - if 'internal' in info: assert info['internal'] == 'no' - if 'ejectable' in info: assert info['ejectable'] == 'yes' + if 'internal' in info and info['internal'] == 'yes': return False + if 'ejectable' in info and info['ejectable'] == 'no': return False if 'total size' in info: size_match = re.match('^.*\((\d+)\s*bytes\).*$', info['total size']) - if size_match: assert int(size_match.groups()[0]) <= MAX_SD_CARD_SIZE - return True + if size_match and int(size_match.groups()[0]) > MAX_SD_CARD_SIZE: return False except: return False + return True return sorted(set(filter(is_dev_valid, devs))) |