diff options
| -rw-r--r-- | host/examples/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | host/examples/benchmark_rate.cpp | 236 | ||||
| -rw-r--r-- | host/examples/benchmark_rx_rate.cpp | 167 | ||||
| -rw-r--r-- | host/lib/convert/convert_with_sse2.cpp | 204 | ||||
| -rw-r--r-- | host/lib/transport/libusb1_zero_copy.cpp | 5 | ||||
| -rw-r--r-- | host/lib/usrp/usrp2/io_impl.cpp | 5 | ||||
| -rwxr-xr-x | host/utils/usrp2_card_burner.py | 28 | 
7 files changed, 378 insertions, 269 deletions
| diff --git a/host/examples/CMakeLists.txt b/host/examples/CMakeLists.txt index fe9e6409e..10d1fddc3 100644 --- a/host/examples/CMakeLists.txt +++ b/host/examples/CMakeLists.txt @@ -19,7 +19,7 @@  # example applications  ########################################################################  SET(example_sources -    benchmark_rx_rate.cpp +    benchmark_rate.cpp      rx_multi_samples.cpp      rx_samples_to_file.cpp      rx_samples_to_udp.cpp diff --git a/host/examples/benchmark_rate.cpp b/host/examples/benchmark_rate.cpp new file mode 100644 index 000000000..6927b512b --- /dev/null +++ b/host/examples/benchmark_rate.cpp @@ -0,0 +1,236 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. +// + +#include <uhd/utils/thread_priority.hpp> +#include <uhd/utils/safe_main.hpp> +#include <uhd/usrp/multi_usrp.hpp> +#include <boost/program_options.hpp> +#include <boost/format.hpp> +#include <boost/thread/thread.hpp> +#include <boost/math/special_functions/round.hpp> +#include <iostream> +#include <complex> + +namespace po = boost::program_options; + +unsigned long long num_overflows = 0; +unsigned long long num_underflows = 0; +unsigned long long num_rx_samps = 0; +unsigned long long num_tx_samps = 0; +unsigned long long num_dropped_samps = 0; + +/*********************************************************************** + * Benchmark RX Rate + **********************************************************************/ +void benchmark_rx_rate(uhd::usrp::multi_usrp::sptr usrp){ +    uhd::set_thread_priority_safe(); + +    //print pre-test summary +    std::cout << boost::format( +        "Testing receive rate %f Msps" +    ) % (usrp->get_rx_rate()/1e6) << std::endl; + +    //setup variables and allocate buffer +    uhd::rx_metadata_t md; +    const size_t max_samps_per_packet = usrp->get_device()->get_max_recv_samps_per_packet(); +    std::vector<std::complex<float> > buff(max_samps_per_packet); +    bool had_an_overflow = false; +    uhd::time_spec_t last_time; +    const double rate = usrp->get_rx_rate(); + +    usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_START_CONTINUOUS); +    while (not boost::this_thread::interruption_requested()){ +        num_rx_samps += usrp->get_device()->recv( +            &buff.front(), buff.size(), md, +            uhd::io_type_t::COMPLEX_FLOAT32, +            uhd::device::RECV_MODE_ONE_PACKET +        ); + +        //handle the error codes +        switch(md.error_code){ +        case uhd::rx_metadata_t::ERROR_CODE_NONE: +            if (had_an_overflow){ +                had_an_overflow = false; +                num_dropped_samps += boost::math::iround((md.time_spec - last_time).get_real_secs()*rate); +            } +            break; + +        case uhd::rx_metadata_t::ERROR_CODE_OVERFLOW: +            had_an_overflow = true; +            last_time = md.time_spec; +            num_overflows++; +            break; + +        default: +            std::cerr << "Error code: " << md.error_code << std::endl; +            std::cerr << "Unexpected error on recv, exit test..." << std::endl; +            goto loop_done; +        } + +    } loop_done: +    usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_STOP_CONTINUOUS); +} + +/*********************************************************************** + * Benchmark TX Rate + **********************************************************************/ +void benchmark_tx_rate(uhd::usrp::multi_usrp::sptr usrp){ +    uhd::set_thread_priority_safe(); + +    //print pre-test summary +    std::cout << boost::format( +        "Testing transmit rate %f Msps" +    ) % (usrp->get_tx_rate()/1e6) << std::endl; + +    //setup variables and allocate buffer +    uhd::tx_metadata_t md; +    md.has_time_spec = false; +    const size_t max_samps_per_packet = usrp->get_device()->get_max_send_samps_per_packet(); +    std::vector<std::complex<float> > buff(max_samps_per_packet); + +    while (not boost::this_thread::interruption_requested()){ +        num_tx_samps += usrp->get_device()->send( +            &buff.front(), buff.size(), md, +            uhd::io_type_t::COMPLEX_FLOAT32, +            uhd::device::SEND_MODE_ONE_PACKET +        ); +    } + +    //send a mini EOB packet +    md.end_of_burst   = true; +    usrp->get_device()->send("", 0, md, +        uhd::io_type_t::COMPLEX_FLOAT32, +        uhd::device::SEND_MODE_FULL_BUFF +    ); +} + +void benchmark_tx_rate_async_helper(uhd::usrp::multi_usrp::sptr usrp){ +    //setup variables and allocate buffer +    uhd::async_metadata_t async_md; + +    while (true){ + +        if (not usrp->get_device()->recv_async_msg(async_md)){ +            if (boost::this_thread::interruption_requested()) return; +        } + +        //handle the error codes +        switch(async_md.event_code){ +        case uhd::async_metadata_t::EVENT_CODE_BURST_ACK: +            return; + +        case uhd::async_metadata_t::EVENT_CODE_UNDERFLOW: +        case uhd::async_metadata_t::EVENT_CODE_UNDERFLOW_IN_PACKET: +            num_underflows++; +            break; + +        default: +            std::cerr << "Event code: " << async_md.event_code << std::endl; +            std::cerr << "Unexpected event on async recv, exit test..." << std::endl; +            return; +        } +    } +} + +/*********************************************************************** + * Main code + dispatcher + **********************************************************************/ +int UHD_SAFE_MAIN(int argc, char *argv[]){ + +    //variables to be set by po +    std::string args; +    double duration; +    double rx_rate, tx_rate; + +    //setup the program options +    po::options_description desc("Allowed options"); +    desc.add_options() +        ("help", "help message") +        ("args", po::value<std::string>(&args)->default_value(""), "single uhd device address args") +        ("duration", po::value<double>(&duration)->default_value(10.0), "duration for the test in seconds") +        ("rx_rate", po::value<double>(&rx_rate), "specify to perform a RX rate test (sps)") +        ("tx_rate", po::value<double>(&tx_rate), "specify to perform a TX rate test (sps)") +    ; +    po::variables_map vm; +    po::store(po::parse_command_line(argc, argv, desc), vm); +    po::notify(vm); + +    //print the help message +    if (vm.count("help")){ +        std::cout << boost::format("UHD Benchmark Rate %s") % desc << std::endl; +        std::cout << +        "    Specify --rx_rate for a receive-only test.\n" +        "    Specify --tx_rate for a transmit-only test.\n" +        "    Specify both options for a full-duplex test.\n" +        << std::endl; +        return ~0; +    } + +    //create a usrp device +    std::cout << std::endl; +    uhd::device_addrs_t device_addrs = uhd::device::find(args); +    if (device_addrs.empty()){ +        std::cerr << "Could not find any devices for: " << args << std::endl; +        return ~0; +    } +    if (device_addrs.at(0).get("type", "") == "usrp1"){ +        std::cerr << "*** Warning! ***" << std::endl; +        std::cerr << "Benchmark results will be inaccurate on USRP1 due to insufficient features.\n" << std::endl; +    } +    std::cout << boost::format("Creating the usrp device with: %s...") % args << std::endl; +    uhd::usrp::multi_usrp::sptr usrp = uhd::usrp::multi_usrp::make(device_addrs.at(0)); +    std::cout << boost::format("Using Device: %s") % usrp->get_pp_string() << std::endl; + +    boost::thread_group thread_group; + +    //spawn the receive test thread +    if (vm.count("rx_rate")){ +        usrp->set_rx_rate(rx_rate); +        thread_group.create_thread(boost::bind(&benchmark_rx_rate, usrp)); +    } + +    //spawn the transmit test thread +    if (vm.count("tx_rate")){ +        usrp->set_tx_rate(tx_rate); +        thread_group.create_thread(boost::bind(&benchmark_tx_rate, usrp)); +        thread_group.create_thread(boost::bind(&benchmark_tx_rate_async_helper, usrp)); +    } + +    //sleep for the required duration +    const long secs = long(duration); +    const long usecs = long((duration - secs)*1e6); +    boost::this_thread::sleep(boost::posix_time::seconds(secs) + boost::posix_time::microseconds(usecs)); + +    //interrupt and join the threads +    thread_group.interrupt_all(); +    thread_group.join_all(); + +    //print summary +    std::cout << std::endl << boost::format( +        "Benchmark rate summary:\n" +        "  Num received samples:    %u\n" +        "  Num dropped samples:     %u\n" +        "  Num overflows detected:  %u\n" +        "  Num transmitted samples: %u\n" +        "  Num underflows detected: %u\n" +    ) % num_rx_samps % num_dropped_samps % num_overflows % num_tx_samps % num_underflows << std::endl; + +    //finished +    std::cout << std::endl << "Done!" << std::endl << std::endl; + +    return 0; +} diff --git a/host/examples/benchmark_rx_rate.cpp b/host/examples/benchmark_rx_rate.cpp deleted file mode 100644 index 50af1b98b..000000000 --- a/host/examples/benchmark_rx_rate.cpp +++ /dev/null @@ -1,167 +0,0 @@ -// -// Copyright 2010-2011 Ettus Research LLC -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program.  If not, see <http://www.gnu.org/licenses/>. -// - -#include <uhd/utils/thread_priority.hpp> -#include <uhd/utils/safe_main.hpp> -#include <uhd/usrp/multi_usrp.hpp> -#include <boost/math/special_functions/round.hpp> -#include <boost/program_options.hpp> -#include <boost/format.hpp> -#include <iostream> -#include <complex> - -namespace po = boost::program_options; - -static inline void test_device( -    uhd::usrp::multi_usrp::sptr usrp, -    double rx_rate_sps, -    double duration_secs -){ -    const size_t max_samps_per_packet = usrp->get_device()->get_max_recv_samps_per_packet(); -    std::cout << boost::format("Testing receive rate %f Msps (%f second run)") % (rx_rate_sps/1e6) % duration_secs << std::endl; - -    //allocate recv buffer and metatdata -    uhd::rx_metadata_t md; -    std::vector<std::complex<float> > buff(max_samps_per_packet); - -    //flush the buffers in the recv path -    while(usrp->get_device()->recv( -        &buff.front(), buff.size(), md, -        uhd::io_type_t::COMPLEX_FLOAT32, -        uhd::device::RECV_MODE_ONE_PACKET -    )){ -        /* NOP */ -    }; - -    //declare status variables -    bool got_first_packet = false; -    size_t total_recv_packets = 0; -    size_t total_lost_samples = 0; -    size_t total_recv_samples = 0; -    uhd::time_spec_t initial_time_spec; -    uhd::time_spec_t next_expected_time_spec; - -    usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_START_CONTINUOUS); -    do { -        size_t num_rx_samps = usrp->get_device()->recv( -            &buff.front(), buff.size(), md, -            uhd::io_type_t::COMPLEX_FLOAT32, -            uhd::device::RECV_MODE_ONE_PACKET -        ); - -        //handle the error codes -        switch(md.error_code){ -        case uhd::rx_metadata_t::ERROR_CODE_NONE: -        case uhd::rx_metadata_t::ERROR_CODE_OVERFLOW: -            break; - -        default: -            std::cerr << "Error code: " << md.error_code << std::endl; -            std::cerr << "Unexpected error on recv, exit test..." << std::endl; -            return; -        } - -        if (not md.has_time_spec){ -            std::cerr << "Metadata missing time spec, exit test..." << std::endl; -            return; -        } - -        total_recv_samples += num_rx_samps; -        total_recv_packets++; - -        if (not got_first_packet){ -            initial_time_spec = md.time_spec; -            next_expected_time_spec = initial_time_spec; -            got_first_packet = true; -        } - -        double approx_lost_samps = rx_rate_sps*(md.time_spec - next_expected_time_spec).get_real_secs(); -        total_lost_samples += std::max(0, boost::math::iround(approx_lost_samps)); -        next_expected_time_spec = md.time_spec + uhd::time_spec_t(0, num_rx_samps, rx_rate_sps); - -    } while((next_expected_time_spec - initial_time_spec) < uhd::time_spec_t(duration_secs)); -    usrp->issue_stream_cmd(uhd::stream_cmd_t::STREAM_MODE_STOP_CONTINUOUS); - -    //print a summary -    std::cout << std::endl; //go to newline, recv may spew SXSYSZ... -    std::cout << boost::format("    Received packets: %d") % total_recv_packets << std::endl; -    std::cout << boost::format("    Received samples: %d") % total_recv_samples << std::endl; -    std::cout << boost::format("    Lost samples: %d") % total_lost_samples << std::endl; -    size_t packets_lost = boost::math::iround(double(total_lost_samples)/max_samps_per_packet); -    std::cout << boost::format("    Lost packets: %d (approximate)") % packets_lost << std::endl; -    double actual_rx_rate_sps = (total_recv_samples*rx_rate_sps)/(total_recv_samples+total_lost_samples); -    std::cout << boost::format("    Sustained receive rate: %f Msps") % (actual_rx_rate_sps/1e6) << std::endl; -    std::cout << std::endl << std::endl; -} - -int UHD_SAFE_MAIN(int argc, char *argv[]){ -    uhd::set_thread_priority_safe(); - -    //variables to be set by po -    std::string args; -    double duration; -    double rate; - -    //setup the program options -    po::options_description desc("Allowed options"); -    desc.add_options() -        ("help", "help message") -        ("args", po::value<std::string>(&args)->default_value(""), "single uhd device address args") -        ("duration", po::value<double>(&duration)->default_value(10.0), "duration for each test in seconds") -        ("rate", po::value<double>(&rate), "specify to perform a single test as this rate (sps)") -    ; -    po::variables_map vm; -    po::store(po::parse_command_line(argc, argv, desc), vm); -    po::notify(vm); - -    //print the help message -    if (vm.count("help")){ -        std::cout << boost::format("UHD Benchmark RX Rate %s") % desc << std::endl; -        return ~0; -    } - -    //verify that rate was specified -    if (not vm.count("rate")){ -        std::cerr << "Please specify the sample rate with --rate" << std::endl; -        return ~0; -    } - -    //create a usrp device -    std::cout << std::endl; -    uhd::device_addrs_t device_addrs = uhd::device::find(args); -    if (device_addrs.empty()){ -        std::cerr << "Could not find any devices for: " << args << std::endl; -        return ~0; -    } -    if (device_addrs.at(0).get("type", "") == "usrp1"){ -        std::cerr << "*** Warning! ***" << std::endl; -        std::cerr << "Benchmark RX results will be inaccurate on USRP1 due to soft-time control.\n" << std::endl; -    } -    std::cout << boost::format("Creating the usrp device with: %s...") % args << std::endl; -    uhd::usrp::multi_usrp::sptr usrp = uhd::usrp::multi_usrp::make(device_addrs.at(0)); -    std::cout << boost::format("Using Device: %s") % usrp->get_pp_string() << std::endl; - -    //start the test -    usrp->set_rx_rate(rate); -    rate = usrp->get_rx_rate(); -    test_device(usrp, rate, duration); - -    //finished -    std::cout << std::endl << "Done!" << std::endl << std::endl; - -    return 0; -} diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp index 52beea24a..9772028dc 100644 --- a/host/lib/convert/convert_with_sse2.cpp +++ b/host/lib/convert/convert_with_sse2.cpp @@ -25,25 +25,37 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){      const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);      item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); -    __m128 scalar = _mm_set_ps1(float(scale_factor)); - -    //convert blocks of samples with intrinsics -    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ -        //load from input -        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); -        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); - -        //convert and scale -        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); -        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); - -        //pack + swap 16-bit pairs -        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); -        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); -        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); - -        //store to output -        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); +    const __m128 scalar = _mm_set_ps1(float(scale_factor)); + +    #define convert_fc32_1_to_item32_1_nswap_guts(_al_)                 \ +    for (; i < (nsamps & ~0x3); i+=4){                                  \ +        /* load from input */                                           \ +        __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \ +        __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \ +                                                                        \ +        /* convert and scale */ \ +        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \ +        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \ +                                                                        \ +        /* pack + swap 16-bit pairs */                                  \ +        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \ +        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \ +        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \ +                                                                        \ +        /* store to output */                                           \ +        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \ +    }                                                                   \ + +    size_t i = 0; + +    //dispatch according to alignment +    switch (size_t(input) & 0xf){ +    case 0x8: +        output[i] = fc32_to_item32(input[i], float(scale_factor)); i++; +    case 0x0: +        convert_fc32_1_to_item32_1_nswap_guts() +        break; +    default: convert_fc32_1_to_item32_1_nswap_guts(u)      }      //convert remainder @@ -56,24 +68,36 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){      const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);      item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); -    __m128 scalar = _mm_set_ps1(float(scale_factor)); - -    //convert blocks of samples with intrinsics -    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ -        //load from input -        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); -        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); - -        //convert and scale -        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); -        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); - -        //pack + byteswap -> byteswap 16 bit words -        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); -        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); - -        //store to output -        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); +    const __m128 scalar = _mm_set_ps1(float(scale_factor)); + +    #define convert_fc32_1_to_item32_1_bswap_guts(_al_)                 \ +    for (; i < (nsamps & ~0x3); i+=4){                                  \ +        /* load from input */                                           \ +        __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \ +        __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \ +                                                                        \ +        /* convert and scale */ \ +        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \ +        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \ +                                                                        \ +        /* pack + byteswap -> byteswap 16 bit words */                  \ +        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \ +        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ +                                                                        \ +        /* store to output */                                           \ +        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \ +    }                                                                   \ + +    size_t i = 0; + +    //dispatch according to alignment +    switch (size_t(input) & 0xf){ +    case 0x8: +        output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor))); i++; +    case 0x0: +        convert_fc32_1_to_item32_1_bswap_guts() +        break; +    default: convert_fc32_1_to_item32_1_bswap_guts(u)      }      //convert remainder @@ -86,27 +110,39 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){      const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);      fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); -    __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); -    __m128i zeroi = _mm_setzero_si128(); - -    //convert blocks of samples with intrinsics -    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ -        //load from input -        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); - -        //unpack + swap 16-bit pairs -        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); -        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); -        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits -        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); - -        //convert and scale -        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); -        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); - -        //store to output -        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); -        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); +    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); +    const __m128i zeroi = _mm_setzero_si128(); + +    #define convert_item32_1_to_fc32_1_nswap_guts(_al_)                 \ +    for (; i < (nsamps & ~0x3); i+=4){                                  \ +        /* load from input */                                           \ +        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ +                                                                        \ +        /* unpack + swap 16-bit pairs */                                \ +        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \ +        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \ +        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ +        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \ +                                                                        \ +        /* convert and scale */                                         \ +        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \ +        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \ +                                                                        \ +        /* store to output */                                           \ +        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \ +        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \ +    }                                                                   \ + +    size_t i = 0; + +    //dispatch according to alignment +    switch (size_t(output) & 0xf){ +    case 0x8: +        output[i] = item32_to_fc32(input[i], float(scale_factor)); i++; +    case 0x0: +        convert_item32_1_to_fc32_1_nswap_guts() +        break; +    default: convert_item32_1_to_fc32_1_nswap_guts(u)      }      //convert remainder @@ -119,26 +155,38 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){      const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);      fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); -    __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); -    __m128i zeroi = _mm_setzero_si128(); - -    //convert blocks of samples with intrinsics -    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ -        //load from input -        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); - -        //byteswap + unpack -> byteswap 16 bit words -        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); -        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits -        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); - -        //convert and scale -        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); -        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); - -        //store to output -        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); -        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); +    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); +    const __m128i zeroi = _mm_setzero_si128(); + +    #define convert_item32_1_to_fc32_1_bswap_guts(_al_)                 \ +    for (; i < (nsamps & ~0x3); i+=4){                                  \ +        /* load from input */                                           \ +        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ +                                                                        \ +        /* byteswap + unpack -> byteswap 16 bit words */                \ +        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ +        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ +        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \ +                                                                        \ +        /* convert and scale */                                         \ +        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \ +        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \ +                                                                        \ +        /* store to output */                                           \ +        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \ +        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \ +    }                                                                   \ + +    size_t i = 0; + +    //dispatch according to alignment +    switch (size_t(output) & 0xf){ +    case 0x8: +        output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++; +    case 0x0: +        convert_item32_1_to_fc32_1_bswap_guts() +        break; +    default: convert_item32_1_to_fc32_1_bswap_guts(u)      }      //convert remainder diff --git a/host/lib/transport/libusb1_zero_copy.cpp b/host/lib/transport/libusb1_zero_copy.cpp index 19a7a3742..f781f890d 100644 --- a/host/lib/transport/libusb1_zero_copy.cpp +++ b/host/lib/transport/libusb1_zero_copy.cpp @@ -222,7 +222,6 @@ public:              }          }          //shutdown the threads -        _threads_running = false;          _thread_group.interrupt_all();          _thread_group.join_all();      } @@ -277,15 +276,13 @@ private:      //! event handler threads      boost::thread_group _thread_group; -    bool _threads_running;      void run_event_loop(boost::barrier &spawn_barrier){ -        _threads_running = true;          spawn_barrier.wait();          set_thread_priority_safe();          libusb_context *context = libusb::session::get_global_session()->get_context();          try{ -            while(_threads_running){ +            while (not boost::this_thread::interruption_requested()){                  timeval tv;                  tv.tv_sec = 0;                  tv.tv_usec = 100000; //100ms diff --git a/host/lib/usrp/usrp2/io_impl.cpp b/host/lib/usrp/usrp2/io_impl.cpp index ffe9a88e7..df452942c 100644 --- a/host/lib/usrp/usrp2/io_impl.cpp +++ b/host/lib/usrp/usrp2/io_impl.cpp @@ -146,7 +146,6 @@ struct usrp2_impl::io_impl{      }      ~io_impl(void){ -        recv_pirate_crew_raiding = false;          recv_pirate_crew.interrupt_all();          recv_pirate_crew.join_all();      } @@ -185,7 +184,6 @@ struct usrp2_impl::io_impl{      //methods and variables for the pirate crew      void recv_pirate_loop(boost::barrier &, usrp2_mboard_impl::sptr, zero_copy_if::sptr, size_t);      boost::thread_group recv_pirate_crew; -    bool recv_pirate_crew_raiding;      bounded_buffer<async_metadata_t> async_msg_fifo;  }; @@ -201,14 +199,13 @@ void usrp2_impl::io_impl::recv_pirate_loop(      zero_copy_if::sptr err_xport,      size_t index  ){ -    recv_pirate_crew_raiding = true;      spawn_barrier.wait();      set_thread_priority_safe();      //store a reference to the flow control monitor (offset by max dsps)      flow_control_monitor &fc_mon = *(this->fc_mons[index*usrp2_mboard_impl::MAX_NUM_DSPS]); -    while(recv_pirate_crew_raiding){ +    while (not boost::this_thread::interruption_requested()){          managed_recv_buffer::sptr buff = err_xport->get_recv_buff();          if (not buff.get()) continue; //ignore timeout/error buffers diff --git a/host/utils/usrp2_card_burner.py b/host/utils/usrp2_card_burner.py index 26adb91c7..43689dd20 100755 --- a/host/utils/usrp2_card_burner.py +++ b/host/utils/usrp2_card_burner.py @@ -50,7 +50,7 @@ def command(*args):          stderr=subprocess.STDOUT,      )      ret = p.wait() -    verbose = p.stdout.read().decode('ascii') +    verbose = p.stdout.read().decode()      if ret != 0: raise Exception(verbose)      return verbose @@ -92,12 +92,12 @@ def get_raw_device_hints():                  if in_info: info += '\n'+line.strip()          def is_info_valid(info):              try: -                assert 'link to' in info +                if 'link to' not in info: return False                  #handles two spellings of remov(e)able: -                assert 'remov' in info.lower() -                if 'size is' in info: assert int(extract_info_value(info, 'size is')) <= MAX_SD_CARD_SIZE -                return True +                if 'remov' not in info.lower(): return False +                if 'size is' in info and int(extract_info_value(info, 'size is')) > MAX_SD_CARD_SIZE: return False              except: return False +            return True          def extract_info_name(info):              for key in ('Mounted on', 'link to'):                  if key in info: return extract_info_value(info, key) @@ -110,13 +110,11 @@ def get_raw_device_hints():      ####################################################################      if platform.system() == 'Linux':          devs = list() -        try: output = open('/proc/partitions', 'r').read().decode('ascii') -        except: return devs -        for line in output.splitlines(): +        for line in command('cat', '/proc/partitions').splitlines():              try:                  major, minor, blocks, name = line.split() -                assert not name[-1].isdigit() or int(minor) == 0 -                assert int(blocks)*1024 <= MAX_SD_CARD_SIZE +                if not name[-1].isdigit() and int(minor) == 0: continue +                if int(blocks)*1024 > MAX_SD_CARD_SIZE: continue              except: continue              devs.append(os.path.join('/dev', name)) @@ -128,17 +126,17 @@ def get_raw_device_hints():      if platform.system() == 'Darwin':          devs = [d.split()[0] for d in [l for l in command('diskutil', 'list').splitlines() if l.startswith('/dev')]]          def output_to_info(output): -            return dict([list(map(str.strip, pair.lower().split(':'))) for pair in [l for l in output.splitlines() if ':' in l]]) +            return dict([list(map(lambda x: x.strip(), pair.lower().split(':'))) for pair in [l for l in output.splitlines() if ':' in l]])          def is_dev_valid(dev):              info = output_to_info(command('diskutil', 'info', dev))              try: -                if 'internal' in info: assert info['internal'] == 'no' -                if 'ejectable' in info: assert info['ejectable'] == 'yes' +                if 'internal' in info and info['internal'] == 'yes': return False +                if 'ejectable' in info and info['ejectable'] == 'no': return False                  if 'total size' in info:                      size_match = re.match('^.*\((\d+)\s*bytes\).*$', info['total size']) -                    if size_match: assert int(size_match.groups()[0]) <= MAX_SD_CARD_SIZE -                return True +                    if size_match and int(size_match.groups()[0]) > MAX_SD_CARD_SIZE: return False              except: return False +            return True          return sorted(set(filter(is_dev_valid, devs))) | 
