diff options
author | Michael West <michael.west@ettus.com> | 2017-03-29 13:24:32 -0700 |
---|---|---|
committer | Martin Braun <martin.braun@ettus.com> | 2017-06-26 13:23:07 -0700 |
commit | 2f7f873b7f0299ec1f8ae7c752246cb2f1608c0a (patch) | |
tree | adaaa9f96542dd201057ab3ef1e9f4b4d5957872 /host/lib | |
parent | 84f3f9e0db94adfca5ee2d7e31bace9af34d6303 (diff) | |
download | uhd-2f7f873b7f0299ec1f8ae7c752246cb2f1608c0a.tar.gz uhd-2f7f873b7f0299ec1f8ae7c752246cb2f1608c0a.tar.bz2 uhd-2f7f873b7f0299ec1f8ae7c752246cb2f1608c0a.zip |
X300: Dual channel TX performance improvements
Diffstat (limited to 'host/lib')
-rw-r--r-- | host/lib/rfnoc/legacy_compat.cpp | 32 | ||||
-rw-r--r-- | host/lib/usrp/common/async_packet_handler.hpp | 6 | ||||
-rw-r--r-- | host/lib/usrp/device3/device3_impl.hpp | 3 | ||||
-rw-r--r-- | host/lib/usrp/device3/device3_io_impl.cpp | 218 | ||||
-rw-r--r-- | host/lib/usrp/x300/x300_impl.cpp | 14 | ||||
-rw-r--r-- | host/lib/usrp/x300/x300_impl.hpp | 2 |
6 files changed, 157 insertions, 118 deletions
diff --git a/host/lib/rfnoc/legacy_compat.cpp b/host/lib/rfnoc/legacy_compat.cpp index 7acaa898c..bf653a89a 100644 --- a/host/lib/rfnoc/legacy_compat.cpp +++ b/host/lib/rfnoc/legacy_compat.cpp @@ -45,6 +45,7 @@ using uhd::stream_cmd_t; ***********************************************************************/ static const std::string RADIO_BLOCK_NAME = "Radio"; static const std::string DFIFO_BLOCK_NAME = "DmaFIFO"; +static const std::string SFIFO_BLOCK_NAME = "FIFO"; static const std::string DDC_BLOCK_NAME = "DDC"; static const std::string DUC_BLOCK_NAME = "DUC"; static const size_t MAX_BYTES_PER_HEADER = @@ -131,6 +132,7 @@ public: _has_ducs(not args.has_key("skip_duc") and not device->find_blocks(DUC_BLOCK_NAME).empty()), _has_ddcs(not args.has_key("skip_ddc") and not device->find_blocks(DDC_BLOCK_NAME).empty()), _has_dmafifo(not args.has_key("skip_dram") and not device->find_blocks(DFIFO_BLOCK_NAME).empty()), + _has_sramfifo(not args.has_key("skip_sram") and not device->find_blocks(SFIFO_BLOCK_NAME).empty()), _num_mboards(_tree->list("/mboards").size()), _num_radios_per_board(device->find_blocks<radio_ctrl>("0/Radio").size()), // These might throw, maybe we catch that and provide a nicer error message. _num_tx_chans_per_radio( @@ -165,8 +167,12 @@ public: } if (args.has_key("skip_dram")) { UHD_LEGACY_LOG() << "[legacy_compat] Skipping DRAM by user request." << std::endl; - } else if (not _has_dmafifo) { - UHD_MSG(warning) << "[legacy_compat] No DMA FIFO detected. You will only be able to transmit at slow rates." << std::endl; + } + if (args.has_key("skip_sram")) { + UHD_LEGACY_LOG() << "[legacy_compat] Skipping SRAM by user request." << std::endl; + } + if (not _has_dmafifo and not _has_sramfifo) { + UHD_MSG(warning) << "[legacy_compat] No FIFO detected. Higher transmit rates may encounter errors." << std::endl; } for (size_t mboard = 0; mboard < _num_mboards; mboard++) { @@ -522,7 +528,9 @@ private: // methods size_t &port_index ) { if (dir == uhd::TX_DIRECTION) { - if (_has_dmafifo) { + if (_has_sramfifo) { + return block_id_t(mboard_idx, SFIFO_BLOCK_NAME, radio_index).to_string(); + } else if (_has_dmafifo) { port_index = radio_index; return block_id_t(mboard_idx, DFIFO_BLOCK_NAME, 0).to_string(); } else { @@ -736,7 +744,15 @@ private: // methods block_id_t(mboard, RADIO_BLOCK_NAME, radio), chan, tx_bpp ); - if (_has_dmafifo) { + // Prioritize SRAM over DRAM for performance + if (_has_sramfifo) { + // We have SRAM FIFO *and* DUCs + _graph->connect( + block_id_t(mboard, SFIFO_BLOCK_NAME, radio), chan, + block_id_t(mboard, DUC_BLOCK_NAME, radio), chan, + tx_bpp + ); + } else if (_has_dmafifo) { // We have DMA FIFO *and* DUCs _graph->connect( block_id_t(mboard, DFIFO_BLOCK_NAME, 0), radio, @@ -744,6 +760,13 @@ private: // methods tx_bpp ); } + } else if (_has_sramfifo) { + // We have SRAM FIFO, *no* DUCs + _graph->connect( + block_id_t(mboard, SFIFO_BLOCK_NAME, radio), radio, + block_id_t(mboard, RADIO_BLOCK_NAME, radio), chan, + tx_bpp + ); } else if (_has_dmafifo) { // We have DMA FIFO, *no* DUCs _graph->connect( @@ -843,6 +866,7 @@ private: // attributes const bool _has_ducs; const bool _has_ddcs; const bool _has_dmafifo; + const bool _has_sramfifo; const size_t _num_mboards; const size_t _num_radios_per_board; const size_t _num_tx_chans_per_radio; diff --git a/host/lib/usrp/common/async_packet_handler.hpp b/host/lib/usrp/common/async_packet_handler.hpp index 20409c77a..4b162677b 100644 --- a/host/lib/usrp/common/async_packet_handler.hpp +++ b/host/lib/usrp/common/async_packet_handler.hpp @@ -40,7 +40,11 @@ namespace uhd{ namespace usrp{ //load into metadata metadata.channel = channel; metadata.has_time_spec = if_packet_info.has_tsf; - metadata.time_spec = time_spec_t::from_ticks(if_packet_info.tsf, tick_rate); + if (tick_rate == 0.0) { + metadata.time_spec = 0.0; + } else { + metadata.time_spec = time_spec_t::from_ticks(if_packet_info.tsf, tick_rate); + } metadata.event_code = async_metadata_t::event_code_t(to_host(payload[0]) & 0xff); //load user payload diff --git a/host/lib/usrp/device3/device3_impl.hpp b/host/lib/usrp/device3/device3_impl.hpp index 117e4af1c..22c93f25f 100644 --- a/host/lib/usrp/device3/device3_impl.hpp +++ b/host/lib/usrp/device3/device3_impl.hpp @@ -57,7 +57,8 @@ public: enum xport_type_t { CTRL = 0, TX_DATA, - RX_DATA + RX_DATA, + ASYNC_TX_MSG }; enum xport_t {AXI, ETH, PCIE}; diff --git a/host/lib/usrp/device3/device3_io_impl.cpp b/host/lib/usrp/device3/device3_io_impl.cpp index 199cb2786..dc4aacff8 100644 --- a/host/lib/usrp/device3/device3_io_impl.cpp +++ b/host/lib/usrp/device3/device3_io_impl.cpp @@ -298,23 +298,17 @@ static void handle_rx_flowctrl( /*********************************************************************** * TX Flow Control Functions **********************************************************************/ +#define DEVICE3_ASYNC_EVENT_CODE_FLOW_CTRL 0 + //! Stores the state of TX flow control struct tx_fc_cache_t { - tx_fc_cache_t(void): - stream_channel(0), - device_channel(0), - last_seq_out(0), + tx_fc_cache_t(size_t capacity): last_seq_ack(0), - last_seq_ack_cache(0) {} + space(capacity) {} - size_t stream_channel; - size_t device_channel; - size_t last_seq_out; - boost::atomic_size_t last_seq_ack; - size_t last_seq_ack_cache; - boost::shared_ptr<device3_impl::async_md_type> async_queue; - boost::shared_ptr<device3_impl::async_md_type> old_async_queue; + size_t last_seq_ack; + size_t space; }; /*! Return the size of the flow control window in packets. @@ -340,79 +334,74 @@ static size_t get_tx_flow_control_window( return window_in_pkts; } -// TODO: Remove this function -// This function only exists to make sure the transport is not destroyed -// until it is no longer needed. -static managed_send_buffer::sptr get_tx_buff( - zero_copy_if::sptr xport, - const double timeout -){ - return xport->get_send_buff(timeout); -} - static bool tx_flow_ctrl( - task::sptr /*holds ref*/, boost::shared_ptr<tx_fc_cache_t> fc_cache, - size_t fc_window, + zero_copy_if::sptr async_xport, + uint32_t (*endian_conv)(uint32_t), + void (*unpack)(const uint32_t *packet_buff, vrt::if_packet_info_t &), managed_buffer::sptr ) { - bool refresh_cache = false; - - // Busy loop waiting for flow control update. This is necessary because - // at this point there is data trying to be sent and it must be sent as - // quickly as possible when the flow control update arrives to avoid - // underruns at high rates. This is also OK because it only occurs when - // data needs to be sent and flow control is holding it back. while (true) { - if (refresh_cache) - { - // update the cached value from the atomic - fc_cache->last_seq_ack_cache = fc_cache->last_seq_ack; - } - - // delta is the amount of FC credit we've used up - const size_t delta = (fc_cache->last_seq_out & HW_SEQ_NUM_MASK) - - (fc_cache->last_seq_ack_cache & HW_SEQ_NUM_MASK); - // If we want to send another packet, we must have FC credit left - if ((delta & HW_SEQ_NUM_MASK) < fc_window) + // If there is space + if (fc_cache->space) { - // Packet will be sent - fc_cache->last_seq_out++; //update seq + // All is good - packet will be sent + fc_cache->space--; return true; } - else - { - if (refresh_cache) - { - // We have already refreshed the cache and still - // lack flow control permission to send new data. - - // A true busy loop choked out the message handler - // thread on machines with processor limitations - // (too few cores). Yield to allow flow control - // receiver thread to operate. - boost::this_thread::yield(); - } - else - { - // Allow the cache to refresh and try again to - // see if the device has granted flow control permission. - refresh_cache = true; - } - } + + // Look for a flow control message to update the space available in the buffer. + // A minimal timeout is used because larger timeouts can cause the thread to be + // scheduled out for too long at high data rates and result in underruns. + managed_recv_buffer::sptr buff = async_xport->get_recv_buff(0.000001); + if (buff) + { + vrt::if_packet_info_t if_packet_info; + if_packet_info.num_packet_words32 = buff->size()/sizeof(uint32_t); + const uint32_t *packet_buff = buff->cast<const uint32_t *>(); + try { + unpack(packet_buff, if_packet_info); + } + catch(const std::exception &ex) + { + UHD_MSG(error) << "Error unpacking async flow control packet: " << ex.what() << std::endl; + continue; + } + + if (if_packet_info.packet_type != vrt::if_packet_info_t::PACKET_TYPE_FC) + { + UHD_MSG(error) << "Unexpected packet type received by flow control handler: " << if_packet_info.packet_type << std::endl; + continue; + } + + // update the amount of space + size_t seq_ack = endian_conv(packet_buff[if_packet_info.num_header_words32+1]); + fc_cache->space += (seq_ack - fc_cache->last_seq_ack) & HW_SEQ_NUM_MASK; + fc_cache->last_seq_ack = seq_ack; + } } return false; } -#define DEVICE3_ASYNC_EVENT_CODE_FLOW_CTRL 0 -/*! Handle incoming messages. If they're flow control, update the TX FC cache. - * Otherwise, send them to the async message queue for the user to poll. +/*********************************************************************** + * TX Async Message Functions + **********************************************************************/ +struct async_tx_info_t +{ + size_t stream_channel; + size_t device_channel; + boost::shared_ptr<device3_impl::async_md_type> async_queue; + boost::shared_ptr<device3_impl::async_md_type> old_async_queue; +}; + +/*! Handle incoming messages. + * Send them to the async message queue for the user to poll. * * This is run inside a uhd::task as long as this streamer lives. */ static void handle_tx_async_msgs( - boost::shared_ptr<tx_fc_cache_t> fc_cache, + boost::shared_ptr<async_tx_info_t> async_info, zero_copy_if::sptr xport, endianness_t endianness, boost::function<double(void)> get_tick_rate @@ -462,31 +451,21 @@ static void handle_tx_async_msgs( if_packet_info, packet_buff, tick_rate, - fc_cache->stream_channel + async_info->stream_channel ); - // TODO: Shouldn't we be polling if_packet_info.packet_type == PACKET_TYPE_FC? - // Thing is, on X300, packet_type == 0, so that wouldn't work. But it seems it should. - //The FC response and the burst ack are two indicators that the radio - //consumed packets. Use them to update the FC metadata - if (metadata.event_code == DEVICE3_ASYNC_EVENT_CODE_FLOW_CTRL) { - fc_cache->last_seq_ack = metadata.user_payload[0]; - } - - //FC responses don't propagate up to the user so filter them here - if (metadata.event_code != DEVICE3_ASYNC_EVENT_CODE_FLOW_CTRL) { - fc_cache->async_queue->push_with_pop_on_full(metadata); - metadata.channel = fc_cache->device_channel; - fc_cache->old_async_queue->push_with_pop_on_full(metadata); + // Filter out any flow control messages and cache the rest + if (metadata.event_code == DEVICE3_ASYNC_EVENT_CODE_FLOW_CTRL) + { + UHD_MSG(error) << "Unexpected flow control message found in async message handling" << std::endl; + } else { + async_info->async_queue->push_with_pop_on_full(metadata); + metadata.channel = async_info->device_channel; + async_info->old_async_queue->push_with_pop_on_full(metadata); standard_async_msg_prints(metadata); } } - - -/*********************************************************************** - * Async Data - **********************************************************************/ bool device3_impl::recv_async_msg( async_metadata_t &async_metadata, double timeout ) @@ -726,6 +705,20 @@ void device3_impl::update_tx_streamers(double /* rate */) } } +// This class manages the lifetime of the TX async message handler task and transports +class device3_send_packet_streamer : public sph::send_packet_streamer +{ +public: + device3_send_packet_streamer(const size_t max_num_samps) : sph::send_packet_streamer(max_num_samps) {}; + ~device3_send_packet_streamer() { + _tx_async_msg_task.reset(); // Make sure the async task is destroyed before the transports + }; + + both_xports_t _xport; + both_xports_t _async_xport; + task::sptr _tx_async_msg_task; +}; + tx_streamer::sptr device3_impl::get_tx_stream(const uhd::stream_args_t &args_) { boost::mutex::scoped_lock lock(_transport_setup_mutex); @@ -741,7 +734,7 @@ tx_streamer::sptr device3_impl::get_tx_stream(const uhd::stream_args_t &args_) boost::shared_ptr<async_md_type> async_md(new async_md_type(1000/*messages deep*/)); // II. Iterate over all channels - boost::shared_ptr<sph::send_packet_streamer> my_streamer; + boost::shared_ptr<device3_send_packet_streamer> my_streamer; // The terminator's lifetime is coupled to the streamer. // There is only one terminator. If the streamer has multiple channels, // it will be connected to each downstream block. @@ -753,6 +746,7 @@ tx_streamer::sptr device3_impl::get_tx_stream(const uhd::stream_args_t &args_) args.args = chan_args[stream_i]; size_t mb_index = block_id.get_device_no(); size_t suggested_block_port = args.args.cast<size_t>("block_port", rfnoc::ANY_PORT); + uhd::endianness_t endianness = get_transport_endianness(mb_index); // Access to this channel's block control uhd::rfnoc::sink_block_ctrl_base::sptr blk_ctrl = @@ -779,6 +773,7 @@ tx_streamer::sptr device3_impl::get_tx_stream(const uhd::stream_args_t &args_) uhd::sid_t stream_address = blk_ctrl->get_address(block_port); UHD_STREAMER_LOG() << "[TX Streamer] creating tx stream " << tx_hints.to_string() << std::endl; both_xports_t xport = make_transport(stream_address, TX_DATA, tx_hints); + both_xports_t async_xport = make_transport(stream_address, ASYNC_TX_MSG, device_addr_t("")); UHD_STREAMER_LOG() << std::hex << "[TX Streamer] data_sid = " << xport.send_sid << std::dec << std::endl; // To calculate the max number of samples per packet, we assume the maximum header length @@ -790,8 +785,10 @@ tx_streamer::sptr device3_impl::get_tx_stream(const uhd::stream_args_t &args_) //make the new streamer given the samples per packet if (not my_streamer) - my_streamer = boost::make_shared<sph::send_packet_streamer>(spp); + my_streamer = boost::make_shared<device3_send_packet_streamer>(spp); my_streamer->resize(chan_list.size()); + my_streamer->_xport = xport; + my_streamer->_async_xport = async_xport; //init some streamer stuff std::string conv_endianness; @@ -827,29 +824,30 @@ tx_streamer::sptr device3_impl::get_tx_stream(const uhd::stream_args_t &args_) block_port ); - boost::shared_ptr<tx_fc_cache_t> fc_cache(new tx_fc_cache_t()); - fc_cache->stream_channel = stream_i; - fc_cache->device_channel = mb_index; - fc_cache->async_queue = async_md; - fc_cache->old_async_queue = _async_md; + boost::shared_ptr<async_tx_info_t> async_tx_info(new async_tx_info_t()); + async_tx_info->stream_channel = args.channels[stream_i]; + async_tx_info->device_channel = mb_index; + async_tx_info->async_queue = async_md; + async_tx_info->old_async_queue = _async_md; boost::function<double(void)> tick_rate_retriever = boost::bind( &rfnoc::tick_node_ctrl::get_tick_rate, send_terminator, std::set< rfnoc::node_ctrl_base::sptr >() // Need to specify default args with bind ); - task::sptr task = task::make( + + my_streamer->_tx_async_msg_task = task::make( boost::bind( &handle_tx_async_msgs, - fc_cache, - xport.recv, - get_transport_endianness(mb_index), + async_tx_info, + my_streamer->_async_xport.recv, + endianness, tick_rate_retriever ) ); blk_ctrl->sr_write(uhd::rfnoc::SR_CLEAR_RX_FC, 0xc1ea12, block_port); - blk_ctrl->sr_write(uhd::rfnoc::SR_RESP_IN_DST_SID, xport.recv_sid.get_dst(), block_port); + blk_ctrl->sr_write(uhd::rfnoc::SR_RESP_IN_DST_SID, my_streamer->_async_xport.recv_sid.get_dst(), block_port); UHD_STREAMER_LOG() << "[TX Streamer] resp_in_dst_sid == " << boost::format("0x%04X") % xport.recv_sid.get_dst() << std::endl; // FIXME: Once there is a better way to map the radio block and port @@ -864,7 +862,7 @@ tx_streamer::sptr device3_impl::get_tx_stream(const uhd::stream_args_t &args_) UHD_STREAMER_LOG() << "[TX Streamer] Number of downstream radio nodes: " << downstream_radio_nodes.size() << std::endl; BOOST_FOREACH(const boost::shared_ptr<uhd::rfnoc::radio_ctrl> &node, downstream_radio_nodes) { if (node->get_block_id() == radio_id) { - node->sr_write(uhd::rfnoc::SR_RESP_IN_DST_SID, xport.send_sid.get_src(), radio_port); + node->sr_write(uhd::rfnoc::SR_RESP_IN_DST_SID, my_streamer->_async_xport.recv_sid.get_dst(), radio_port); } } } else { @@ -877,23 +875,27 @@ tx_streamer::sptr device3_impl::get_tx_stream(const uhd::stream_args_t &args_) std::vector<boost::shared_ptr<uhd::rfnoc::radio_ctrl> > downstream_radio_nodes = blk_ctrl->find_downstream_node<uhd::rfnoc::radio_ctrl>(); UHD_STREAMER_LOG() << "[TX Streamer] Number of downstream radio nodes: " << downstream_radio_nodes.size() << std::endl; BOOST_FOREACH(const boost::shared_ptr<uhd::rfnoc::radio_ctrl> &node, downstream_radio_nodes) { - node->sr_write(uhd::rfnoc::SR_RESP_IN_DST_SID, xport.send_sid.get_src(), block_port); + node->sr_write(uhd::rfnoc::SR_RESP_IN_DST_SID, my_streamer->_async_xport.recv_sid.get_dst(), block_port); } } // Add flow control - xport.send = zero_copy_flow_ctrl::make( - xport.send, - boost::bind(&tx_flow_ctrl, task, fc_cache, fc_window, _1), + boost::shared_ptr<tx_fc_cache_t> fc_cache(new tx_fc_cache_t(fc_window)); + my_streamer->_xport.send = zero_copy_flow_ctrl::make( + my_streamer->_xport.send, + boost::bind( + &tx_flow_ctrl, + fc_cache, + my_streamer->_xport.recv, + (endianness == ENDIANNESS_BIG ? uhd::ntohx<uint32_t> : uhd::wtohx<uint32_t>), + (endianness == ENDIANNESS_BIG ? vrt::chdr::if_hdr_unpack_be : vrt::chdr::if_hdr_unpack_le), + _1), NULL); //Give the streamer a functor to get the send buffer - //get_tx_buff is static so bind has no lifetime issues - //xport.send (sptr) is required to add streamer->data-transport lifetime dependency - //task (sptr) is required to add a streamer->async-handler lifetime dependency my_streamer->set_xport_chan_get_buff( stream_i, - boost::bind(&get_tx_buff, xport.send, _1) + boost::bind(&zero_copy_if::get_send_buff, my_streamer->_xport.send, _1) ); //Give the streamer a functor handled received async messages my_streamer->set_async_receiver( diff --git a/host/lib/usrp/x300/x300_impl.cpp b/host/lib/usrp/x300/x300_impl.cpp index 4f3870357..934e2eaa5 100644 --- a/host/lib/usrp/x300/x300_impl.cpp +++ b/host/lib/usrp/x300/x300_impl.cpp @@ -511,6 +511,8 @@ void x300_impl::setup_mb(const size_t mb_i, const uhd::device_addr_t &dev_addr) eth_addrs.push_back(eth0_addr); mb.next_src_addr = 0; //Host source address for blocks + mb.next_tx_src_addr = 0; + mb.next_rx_src_addr = 0; if (dev_addr.has_key("second_addr")) { std::string eth1_addr = dev_addr["second_addr"]; @@ -1136,12 +1138,16 @@ uhd::both_xports_t x300_impl::make_transport( } else if (mb.xport_path == "eth") { // Decide on the IP/Interface pair based on the endpoint index - std::string interface_addr = mb.eth_conns[mb.next_src_addr].addr; + size_t &next_src_addr = + xport_type == TX_DATA ? mb.next_tx_src_addr : + xport_type == RX_DATA ? mb.next_rx_src_addr : + mb.next_src_addr; + std::string interface_addr = mb.eth_conns[next_src_addr].addr; const uint32_t xbar_src_addr = - mb.next_src_addr==0 ? X300_SRC_ADDR0 : X300_SRC_ADDR1; + next_src_addr==0 ? X300_SRC_ADDR0 : X300_SRC_ADDR1; const uint32_t xbar_src_dst = - mb.eth_conns[mb.next_src_addr].type==X300_IFACE_ETH0 ? X300_XB_DST_E0 : X300_XB_DST_E1; - mb.next_src_addr = (mb.next_src_addr + 1) % mb.eth_conns.size(); + mb.eth_conns[next_src_addr].type==X300_IFACE_ETH0 ? X300_XB_DST_E0 : X300_XB_DST_E1; + next_src_addr = (next_src_addr + 1) % mb.eth_conns.size(); xports.send_sid = this->allocate_sid(mb, address, xbar_src_addr, xbar_src_dst); xports.recv_sid = xports.send_sid.reversed(); diff --git a/host/lib/usrp/x300/x300_impl.hpp b/host/lib/usrp/x300/x300_impl.hpp index 14120bd1f..8f4f81156 100644 --- a/host/lib/usrp/x300/x300_impl.hpp +++ b/host/lib/usrp/x300/x300_impl.hpp @@ -162,6 +162,8 @@ private: std::vector<x300_eth_conn_t> eth_conns; size_t next_src_addr; + size_t next_tx_src_addr; + size_t next_rx_src_addr; // Discover the ethernet connections per motherboard void discover_eth(const uhd::usrp::mboard_eeprom_t mb_eeprom, |