summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--host/examples/CMakeLists.txt2
-rw-r--r--host/examples/benchmark_rate.cpp236
-rw-r--r--host/examples/benchmark_rx_rate.cpp167
-rw-r--r--host/lib/convert/convert_with_sse2.cpp204
-rw-r--r--host/lib/transport/libusb1_zero_copy.cpp5
-rw-r--r--host/lib/usrp/usrp2/io_impl.cpp5
-rwxr-xr-xhost/utils/usrp2_card_burner.py28
7 files changed, 378 insertions, 269 deletions
diff --git a/host/examples/CMakeLists.txt b/host/examples/CMakeLists.txt
index fe9e6409e..10d1fddc3 100644
--- a/host/examples/CMakeLists.txt
+++ b/host/examples/CMakeLists.txt
@@ -19,7 +19,7 @@
# example applications
########################################################################
SET(example_sources
- benchmark_rx_rate.cpp
+ benchmark_rate.cpp
rx_multi_samples.cpp
rx_samples_to_file.cpp
rx_samples_to_udp.cpp
diff --git a/host/examples/benchmark_rate.cpp b/host/examples/benchmark_rate.cpp
new file mode 100644
index 000000000..6927b512b
--- /dev/null
+++ b/host/examples/benchmark_rate.cpp
@@ -0,0 +1,236 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include <uhd/utils/thread_priority.hpp>
+#include <uhd/utils/safe_main.hpp>
+#include <uhd/usrp/multi_usrp.hpp>
+#include <boost/program_options.hpp>
+#include <boost/format.hpp>
+#include <boost/thread/thread.hpp>
+#include <boost/math/special_functions/round.hpp>
+#include <iostream>
+#include <complex>
+
+namespace po = boost::program_options;
+
+unsigned long long num_overflows = 0;
+unsigned long long num_underflows = 0;
+unsigned long long num_rx_samps = 0;
+unsigned long long num_tx_samps = 0;
+unsigned long long num_dropped_samps = 0;
+
+/***********************************************************************
+ * Benchmark RX Rate
+ **********************************************************************/
+void benchmark_rx_rate(uhd::usrp::multi_usrp::sptr usrp){
+ uhd::set_thread_priority_safe();
+
+ //print pre-test summary
+ std::cout << boost::format(
+ "Testing receive rate %f Msps"
+ ) % (usrp->get_rx_rate()/1e6) << std::endl;
+
+ //setup variables and allocate buffer
+ uhd::rx_metadata_t md;
+ const size_t max_samps_per_packet = usrp->get_device()->get_max_recv_samps_per_packet();
+ std::vector<std::complex<float> > buff(max_samps_per_packet);
+ bool had_an_overflow = false;
+ uhd::time_spec_t last_time;
+ const double rate = usrp->get_rx_rate();
+
+ usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_START_CONTINUOUS);
+ while (not boost::this_thread::interruption_requested()){
+ num_rx_samps += usrp->get_device()->recv(
+ &buff.front(), buff.size(), md,
+ uhd::io_type_t::COMPLEX_FLOAT32,
+ uhd::device::RECV_MODE_ONE_PACKET
+ );
+
+ //handle the error codes
+ switch(md.error_code){
+ case uhd::rx_metadata_t::ERROR_CODE_NONE:
+ if (had_an_overflow){
+ had_an_overflow = false;
+ num_dropped_samps += boost::math::iround((md.time_spec - last_time).get_real_secs()*rate);
+ }
+ break;
+
+ case uhd::rx_metadata_t::ERROR_CODE_OVERFLOW:
+ had_an_overflow = true;
+ last_time = md.time_spec;
+ num_overflows++;
+ break;
+
+ default:
+ std::cerr << "Error code: " << md.error_code << std::endl;
+ std::cerr << "Unexpected error on recv, exit test..." << std::endl;
+ goto loop_done;
+ }
+
+ } loop_done:
+ usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_STOP_CONTINUOUS);
+}
+
+/***********************************************************************
+ * Benchmark TX Rate
+ **********************************************************************/
+void benchmark_tx_rate(uhd::usrp::multi_usrp::sptr usrp){
+ uhd::set_thread_priority_safe();
+
+ //print pre-test summary
+ std::cout << boost::format(
+ "Testing transmit rate %f Msps"
+ ) % (usrp->get_tx_rate()/1e6) << std::endl;
+
+ //setup variables and allocate buffer
+ uhd::tx_metadata_t md;
+ md.has_time_spec = false;
+ const size_t max_samps_per_packet = usrp->get_device()->get_max_send_samps_per_packet();
+ std::vector<std::complex<float> > buff(max_samps_per_packet);
+
+ while (not boost::this_thread::interruption_requested()){
+ num_tx_samps += usrp->get_device()->send(
+ &buff.front(), buff.size(), md,
+ uhd::io_type_t::COMPLEX_FLOAT32,
+ uhd::device::SEND_MODE_ONE_PACKET
+ );
+ }
+
+ //send a mini EOB packet
+ md.end_of_burst = true;
+ usrp->get_device()->send("", 0, md,
+ uhd::io_type_t::COMPLEX_FLOAT32,
+ uhd::device::SEND_MODE_FULL_BUFF
+ );
+}
+
+void benchmark_tx_rate_async_helper(uhd::usrp::multi_usrp::sptr usrp){
+ //setup variables and allocate buffer
+ uhd::async_metadata_t async_md;
+
+ while (true){
+
+ if (not usrp->get_device()->recv_async_msg(async_md)){
+ if (boost::this_thread::interruption_requested()) return;
+ }
+
+ //handle the error codes
+ switch(async_md.event_code){
+ case uhd::async_metadata_t::EVENT_CODE_BURST_ACK:
+ return;
+
+ case uhd::async_metadata_t::EVENT_CODE_UNDERFLOW:
+ case uhd::async_metadata_t::EVENT_CODE_UNDERFLOW_IN_PACKET:
+ num_underflows++;
+ break;
+
+ default:
+ std::cerr << "Event code: " << async_md.event_code << std::endl;
+ std::cerr << "Unexpected event on async recv, exit test..." << std::endl;
+ return;
+ }
+ }
+}
+
+/***********************************************************************
+ * Main code + dispatcher
+ **********************************************************************/
+int UHD_SAFE_MAIN(int argc, char *argv[]){
+
+ //variables to be set by po
+ std::string args;
+ double duration;
+ double rx_rate, tx_rate;
+
+ //setup the program options
+ po::options_description desc("Allowed options");
+ desc.add_options()
+ ("help", "help message")
+ ("args", po::value<std::string>(&args)->default_value(""), "single uhd device address args")
+ ("duration", po::value<double>(&duration)->default_value(10.0), "duration for the test in seconds")
+ ("rx_rate", po::value<double>(&rx_rate), "specify to perform a RX rate test (sps)")
+ ("tx_rate", po::value<double>(&tx_rate), "specify to perform a TX rate test (sps)")
+ ;
+ po::variables_map vm;
+ po::store(po::parse_command_line(argc, argv, desc), vm);
+ po::notify(vm);
+
+ //print the help message
+ if (vm.count("help")){
+ std::cout << boost::format("UHD Benchmark Rate %s") % desc << std::endl;
+ std::cout <<
+ " Specify --rx_rate for a receive-only test.\n"
+ " Specify --tx_rate for a transmit-only test.\n"
+ " Specify both options for a full-duplex test.\n"
+ << std::endl;
+ return ~0;
+ }
+
+ //create a usrp device
+ std::cout << std::endl;
+ uhd::device_addrs_t device_addrs = uhd::device::find(args);
+ if (device_addrs.empty()){
+ std::cerr << "Could not find any devices for: " << args << std::endl;
+ return ~0;
+ }
+ if (device_addrs.at(0).get("type", "") == "usrp1"){
+ std::cerr << "*** Warning! ***" << std::endl;
+ std::cerr << "Benchmark results will be inaccurate on USRP1 due to insufficient features.\n" << std::endl;
+ }
+ std::cout << boost::format("Creating the usrp device with: %s...") % args << std::endl;
+ uhd::usrp::multi_usrp::sptr usrp = uhd::usrp::multi_usrp::make(device_addrs.at(0));
+ std::cout << boost::format("Using Device: %s") % usrp->get_pp_string() << std::endl;
+
+ boost::thread_group thread_group;
+
+ //spawn the receive test thread
+ if (vm.count("rx_rate")){
+ usrp->set_rx_rate(rx_rate);
+ thread_group.create_thread(boost::bind(&benchmark_rx_rate, usrp));
+ }
+
+ //spawn the transmit test thread
+ if (vm.count("tx_rate")){
+ usrp->set_tx_rate(tx_rate);
+ thread_group.create_thread(boost::bind(&benchmark_tx_rate, usrp));
+ thread_group.create_thread(boost::bind(&benchmark_tx_rate_async_helper, usrp));
+ }
+
+ //sleep for the required duration
+ const long secs = long(duration);
+ const long usecs = long((duration - secs)*1e6);
+ boost::this_thread::sleep(boost::posix_time::seconds(secs) + boost::posix_time::microseconds(usecs));
+
+ //interrupt and join the threads
+ thread_group.interrupt_all();
+ thread_group.join_all();
+
+ //print summary
+ std::cout << std::endl << boost::format(
+ "Benchmark rate summary:\n"
+ " Num received samples: %u\n"
+ " Num dropped samples: %u\n"
+ " Num overflows detected: %u\n"
+ " Num transmitted samples: %u\n"
+ " Num underflows detected: %u\n"
+ ) % num_rx_samps % num_dropped_samps % num_overflows % num_tx_samps % num_underflows << std::endl;
+
+ //finished
+ std::cout << std::endl << "Done!" << std::endl << std::endl;
+
+ return 0;
+}
diff --git a/host/examples/benchmark_rx_rate.cpp b/host/examples/benchmark_rx_rate.cpp
deleted file mode 100644
index 50af1b98b..000000000
--- a/host/examples/benchmark_rx_rate.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-//
-// Copyright 2010-2011 Ettus Research LLC
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program. If not, see <http://www.gnu.org/licenses/>.
-//
-
-#include <uhd/utils/thread_priority.hpp>
-#include <uhd/utils/safe_main.hpp>
-#include <uhd/usrp/multi_usrp.hpp>
-#include <boost/math/special_functions/round.hpp>
-#include <boost/program_options.hpp>
-#include <boost/format.hpp>
-#include <iostream>
-#include <complex>
-
-namespace po = boost::program_options;
-
-static inline void test_device(
- uhd::usrp::multi_usrp::sptr usrp,
- double rx_rate_sps,
- double duration_secs
-){
- const size_t max_samps_per_packet = usrp->get_device()->get_max_recv_samps_per_packet();
- std::cout << boost::format("Testing receive rate %f Msps (%f second run)") % (rx_rate_sps/1e6) % duration_secs << std::endl;
-
- //allocate recv buffer and metatdata
- uhd::rx_metadata_t md;
- std::vector<std::complex<float> > buff(max_samps_per_packet);
-
- //flush the buffers in the recv path
- while(usrp->get_device()->recv(
- &buff.front(), buff.size(), md,
- uhd::io_type_t::COMPLEX_FLOAT32,
- uhd::device::RECV_MODE_ONE_PACKET
- )){
- /* NOP */
- };
-
- //declare status variables
- bool got_first_packet = false;
- size_t total_recv_packets = 0;
- size_t total_lost_samples = 0;
- size_t total_recv_samples = 0;
- uhd::time_spec_t initial_time_spec;
- uhd::time_spec_t next_expected_time_spec;
-
- usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_START_CONTINUOUS);
- do {
- size_t num_rx_samps = usrp->get_device()->recv(
- &buff.front(), buff.size(), md,
- uhd::io_type_t::COMPLEX_FLOAT32,
- uhd::device::RECV_MODE_ONE_PACKET
- );
-
- //handle the error codes
- switch(md.error_code){
- case uhd::rx_metadata_t::ERROR_CODE_NONE:
- case uhd::rx_metadata_t::ERROR_CODE_OVERFLOW:
- break;
-
- default:
- std::cerr << "Error code: " << md.error_code << std::endl;
- std::cerr << "Unexpected error on recv, exit test..." << std::endl;
- return;
- }
-
- if (not md.has_time_spec){
- std::cerr << "Metadata missing time spec, exit test..." << std::endl;
- return;
- }
-
- total_recv_samples += num_rx_samps;
- total_recv_packets++;
-
- if (not got_first_packet){
- initial_time_spec = md.time_spec;
- next_expected_time_spec = initial_time_spec;
- got_first_packet = true;
- }
-
- double approx_lost_samps = rx_rate_sps*(md.time_spec - next_expected_time_spec).get_real_secs();
- total_lost_samples += std::max(0, boost::math::iround(approx_lost_samps));
- next_expected_time_spec = md.time_spec + uhd::time_spec_t(0, num_rx_samps, rx_rate_sps);
-
- } while((next_expected_time_spec - initial_time_spec) < uhd::time_spec_t(duration_secs));
- usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_STOP_CONTINUOUS);
-
- //print a summary
- std::cout << std::endl; //go to newline, recv may spew SXSYSZ...
- std::cout << boost::format(" Received packets: %d") % total_recv_packets << std::endl;
- std::cout << boost::format(" Received samples: %d") % total_recv_samples << std::endl;
- std::cout << boost::format(" Lost samples: %d") % total_lost_samples << std::endl;
- size_t packets_lost = boost::math::iround(double(total_lost_samples)/max_samps_per_packet);
- std::cout << boost::format(" Lost packets: %d (approximate)") % packets_lost << std::endl;
- double actual_rx_rate_sps = (total_recv_samples*rx_rate_sps)/(total_recv_samples+total_lost_samples);
- std::cout << boost::format(" Sustained receive rate: %f Msps") % (actual_rx_rate_sps/1e6) << std::endl;
- std::cout << std::endl << std::endl;
-}
-
-int UHD_SAFE_MAIN(int argc, char *argv[]){
- uhd::set_thread_priority_safe();
-
- //variables to be set by po
- std::string args;
- double duration;
- double rate;
-
- //setup the program options
- po::options_description desc("Allowed options");
- desc.add_options()
- ("help", "help message")
- ("args", po::value<std::string>(&args)->default_value(""), "single uhd device address args")
- ("duration", po::value<double>(&duration)->default_value(10.0), "duration for each test in seconds")
- ("rate", po::value<double>(&rate), "specify to perform a single test as this rate (sps)")
- ;
- po::variables_map vm;
- po::store(po::parse_command_line(argc, argv, desc), vm);
- po::notify(vm);
-
- //print the help message
- if (vm.count("help")){
- std::cout << boost::format("UHD Benchmark RX Rate %s") % desc << std::endl;
- return ~0;
- }
-
- //verify that rate was specified
- if (not vm.count("rate")){
- std::cerr << "Please specify the sample rate with --rate" << std::endl;
- return ~0;
- }
-
- //create a usrp device
- std::cout << std::endl;
- uhd::device_addrs_t device_addrs = uhd::device::find(args);
- if (device_addrs.empty()){
- std::cerr << "Could not find any devices for: " << args << std::endl;
- return ~0;
- }
- if (device_addrs.at(0).get("type", "") == "usrp1"){
- std::cerr << "*** Warning! ***" << std::endl;
- std::cerr << "Benchmark RX results will be inaccurate on USRP1 due to soft-time control.\n" << std::endl;
- }
- std::cout << boost::format("Creating the usrp device with: %s...") % args << std::endl;
- uhd::usrp::multi_usrp::sptr usrp = uhd::usrp::multi_usrp::make(device_addrs.at(0));
- std::cout << boost::format("Using Device: %s") % usrp->get_pp_string() << std::endl;
-
- //start the test
- usrp->set_rx_rate(rate);
- rate = usrp->get_rx_rate();
- test_device(usrp, rate, duration);
-
- //finished
- std::cout << std::endl << "Done!" << std::endl << std::endl;
-
- return 0;
-}
diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp
index 52beea24a..9772028dc 100644
--- a/host/lib/convert/convert_with_sse2.cpp
+++ b/host/lib/convert/convert_with_sse2.cpp
@@ -25,25 +25,37 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
- __m128 scalar = _mm_set_ps1(float(scale_factor));
-
- //convert blocks of samples with intrinsics
- size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
- //load from input
- __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
- __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
-
- //convert and scale
- __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
- __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
-
- //pack + swap 16-bit pairs
- __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
- tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
- tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
-
- //store to output
- _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
+ const __m128 scalar = _mm_set_ps1(float(scale_factor));
+
+ #define convert_fc32_1_to_item32_1_nswap_guts(_al_) \
+ for (; i < (nsamps & ~0x3); i+=4){ \
+ /* load from input */ \
+ __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \
+ __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \
+ \
+ /* convert and scale */ \
+ __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \
+ __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \
+ \
+ /* pack + swap 16-bit pairs */ \
+ __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \
+ tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \
+ tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \
+ \
+ /* store to output */ \
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \
+ } \
+
+ size_t i = 0;
+
+ //dispatch according to alignment
+ switch (size_t(input) & 0xf){
+ case 0x8:
+ output[i] = fc32_to_item32(input[i], float(scale_factor)); i++;
+ case 0x0:
+ convert_fc32_1_to_item32_1_nswap_guts()
+ break;
+ default: convert_fc32_1_to_item32_1_nswap_guts(u)
}
//convert remainder
@@ -56,24 +68,36 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
- __m128 scalar = _mm_set_ps1(float(scale_factor));
-
- //convert blocks of samples with intrinsics
- size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
- //load from input
- __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
- __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
-
- //convert and scale
- __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
- __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
-
- //pack + byteswap -> byteswap 16 bit words
- __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
- tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
-
- //store to output
- _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
+ const __m128 scalar = _mm_set_ps1(float(scale_factor));
+
+ #define convert_fc32_1_to_item32_1_bswap_guts(_al_) \
+ for (; i < (nsamps & ~0x3); i+=4){ \
+ /* load from input */ \
+ __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \
+ __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \
+ \
+ /* convert and scale */ \
+ __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \
+ __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \
+ \
+ /* pack + byteswap -> byteswap 16 bit words */ \
+ __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \
+ tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+ \
+ /* store to output */ \
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \
+ } \
+
+ size_t i = 0;
+
+ //dispatch according to alignment
+ switch (size_t(input) & 0xf){
+ case 0x8:
+ output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor))); i++;
+ case 0x0:
+ convert_fc32_1_to_item32_1_bswap_guts()
+ break;
+ default: convert_fc32_1_to_item32_1_bswap_guts(u)
}
//convert remainder
@@ -86,27 +110,39 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){
const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
- __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
- __m128i zeroi = _mm_setzero_si128();
-
- //convert blocks of samples with intrinsics
- size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
- //load from input
- __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
-
- //unpack + swap 16-bit pairs
- tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
- tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
- __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
- __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
-
- //convert and scale
- __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
- __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
-
- //store to output
- _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
- _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
+ const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
+ const __m128i zeroi = _mm_setzero_si128();
+
+ #define convert_item32_1_to_fc32_1_nswap_guts(_al_) \
+ for (; i < (nsamps & ~0x3); i+=4){ \
+ /* load from input */ \
+ __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
+ \
+ /* unpack + swap 16-bit pairs */ \
+ tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \
+ tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \
+ __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
+ __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \
+ \
+ /* convert and scale */ \
+ __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \
+ __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \
+ \
+ /* store to output */ \
+ _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \
+ _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \
+ } \
+
+ size_t i = 0;
+
+ //dispatch according to alignment
+ switch (size_t(output) & 0xf){
+ case 0x8:
+ output[i] = item32_to_fc32(input[i], float(scale_factor)); i++;
+ case 0x0:
+ convert_item32_1_to_fc32_1_nswap_guts()
+ break;
+ default: convert_item32_1_to_fc32_1_nswap_guts(u)
}
//convert remainder
@@ -119,26 +155,38 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){
const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
- __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
- __m128i zeroi = _mm_setzero_si128();
-
- //convert blocks of samples with intrinsics
- size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
- //load from input
- __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
-
- //byteswap + unpack -> byteswap 16 bit words
- tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
- __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
- __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
-
- //convert and scale
- __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
- __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
-
- //store to output
- _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
- _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
+ const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
+ const __m128i zeroi = _mm_setzero_si128();
+
+ #define convert_item32_1_to_fc32_1_bswap_guts(_al_) \
+ for (; i < (nsamps & ~0x3); i+=4){ \
+ /* load from input */ \
+ __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
+ \
+ /* byteswap + unpack -> byteswap 16 bit words */ \
+ tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+ __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
+ __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \
+ \
+ /* convert and scale */ \
+ __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \
+ __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \
+ \
+ /* store to output */ \
+ _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \
+ _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \
+ } \
+
+ size_t i = 0;
+
+ //dispatch according to alignment
+ switch (size_t(output) & 0xf){
+ case 0x8:
+ output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++;
+ case 0x0:
+ convert_item32_1_to_fc32_1_bswap_guts()
+ break;
+ default: convert_item32_1_to_fc32_1_bswap_guts(u)
}
//convert remainder
diff --git a/host/lib/transport/libusb1_zero_copy.cpp b/host/lib/transport/libusb1_zero_copy.cpp
index 19a7a3742..f781f890d 100644
--- a/host/lib/transport/libusb1_zero_copy.cpp
+++ b/host/lib/transport/libusb1_zero_copy.cpp
@@ -222,7 +222,6 @@ public:
}
}
//shutdown the threads
- _threads_running = false;
_thread_group.interrupt_all();
_thread_group.join_all();
}
@@ -277,15 +276,13 @@ private:
//! event handler threads
boost::thread_group _thread_group;
- bool _threads_running;
void run_event_loop(boost::barrier &spawn_barrier){
- _threads_running = true;
spawn_barrier.wait();
set_thread_priority_safe();
libusb_context *context = libusb::session::get_global_session()->get_context();
try{
- while(_threads_running){
+ while (not boost::this_thread::interruption_requested()){
timeval tv;
tv.tv_sec = 0;
tv.tv_usec = 100000; //100ms
diff --git a/host/lib/usrp/usrp2/io_impl.cpp b/host/lib/usrp/usrp2/io_impl.cpp
index ffe9a88e7..df452942c 100644
--- a/host/lib/usrp/usrp2/io_impl.cpp
+++ b/host/lib/usrp/usrp2/io_impl.cpp
@@ -146,7 +146,6 @@ struct usrp2_impl::io_impl{
}
~io_impl(void){
- recv_pirate_crew_raiding = false;
recv_pirate_crew.interrupt_all();
recv_pirate_crew.join_all();
}
@@ -185,7 +184,6 @@ struct usrp2_impl::io_impl{
//methods and variables for the pirate crew
void recv_pirate_loop(boost::barrier &, usrp2_mboard_impl::sptr, zero_copy_if::sptr, size_t);
boost::thread_group recv_pirate_crew;
- bool recv_pirate_crew_raiding;
bounded_buffer<async_metadata_t> async_msg_fifo;
};
@@ -201,14 +199,13 @@ void usrp2_impl::io_impl::recv_pirate_loop(
zero_copy_if::sptr err_xport,
size_t index
){
- recv_pirate_crew_raiding = true;
spawn_barrier.wait();
set_thread_priority_safe();
//store a reference to the flow control monitor (offset by max dsps)
flow_control_monitor &fc_mon = *(this->fc_mons[index*usrp2_mboard_impl::MAX_NUM_DSPS]);
- while(recv_pirate_crew_raiding){
+ while (not boost::this_thread::interruption_requested()){
managed_recv_buffer::sptr buff = err_xport->get_recv_buff();
if (not buff.get()) continue; //ignore timeout/error buffers
diff --git a/host/utils/usrp2_card_burner.py b/host/utils/usrp2_card_burner.py
index 26adb91c7..43689dd20 100755
--- a/host/utils/usrp2_card_burner.py
+++ b/host/utils/usrp2_card_burner.py
@@ -50,7 +50,7 @@ def command(*args):
stderr=subprocess.STDOUT,
)
ret = p.wait()
- verbose = p.stdout.read().decode('ascii')
+ verbose = p.stdout.read().decode()
if ret != 0: raise Exception(verbose)
return verbose
@@ -92,12 +92,12 @@ def get_raw_device_hints():
if in_info: info += '\n'+line.strip()
def is_info_valid(info):
try:
- assert 'link to' in info
+ if 'link to' not in info: return False
#handles two spellings of remov(e)able:
- assert 'remov' in info.lower()
- if 'size is' in info: assert int(extract_info_value(info, 'size is')) <= MAX_SD_CARD_SIZE
- return True
+ if 'remov' not in info.lower(): return False
+ if 'size is' in info and int(extract_info_value(info, 'size is')) > MAX_SD_CARD_SIZE: return False
except: return False
+ return True
def extract_info_name(info):
for key in ('Mounted on', 'link to'):
if key in info: return extract_info_value(info, key)
@@ -110,13 +110,11 @@ def get_raw_device_hints():
####################################################################
if platform.system() == 'Linux':
devs = list()
- try: output = open('/proc/partitions', 'r').read().decode('ascii')
- except: return devs
- for line in output.splitlines():
+ for line in command('cat', '/proc/partitions').splitlines():
try:
major, minor, blocks, name = line.split()
- assert not name[-1].isdigit() or int(minor) == 0
- assert int(blocks)*1024 <= MAX_SD_CARD_SIZE
+ if not name[-1].isdigit() and int(minor) == 0: continue
+ if int(blocks)*1024 > MAX_SD_CARD_SIZE: continue
except: continue
devs.append(os.path.join('/dev', name))
@@ -128,17 +126,17 @@ def get_raw_device_hints():
if platform.system() == 'Darwin':
devs = [d.split()[0] for d in [l for l in command('diskutil', 'list').splitlines() if l.startswith('/dev')]]
def output_to_info(output):
- return dict([list(map(str.strip, pair.lower().split(':'))) for pair in [l for l in output.splitlines() if ':' in l]])
+ return dict([list(map(lambda x: x.strip(), pair.lower().split(':'))) for pair in [l for l in output.splitlines() if ':' in l]])
def is_dev_valid(dev):
info = output_to_info(command('diskutil', 'info', dev))
try:
- if 'internal' in info: assert info['internal'] == 'no'
- if 'ejectable' in info: assert info['ejectable'] == 'yes'
+ if 'internal' in info and info['internal'] == 'yes': return False
+ if 'ejectable' in info and info['ejectable'] == 'no': return False
if 'total size' in info:
size_match = re.match('^.*\((\d+)\s*bytes\).*$', info['total size'])
- if size_match: assert int(size_match.groups()[0]) <= MAX_SD_CARD_SIZE
- return True
+ if size_match and int(size_match.groups()[0]) > MAX_SD_CARD_SIZE: return False
except: return False
+ return True
return sorted(set(filter(is_dev_valid, devs)))