From 47672ede5034df17bdc2f314f4e3b7afa8622bda Mon Sep 17 00:00:00 2001 From: Patrick Sisterhen Date: Wed, 29 Mar 2017 15:13:46 -0700 Subject: NI-RIO: FIFO management optimizations for PCIe performance Pre-acquiring as much buffer as possible When acquiring during a streaming operation, acquire a minimum amount (frame size) or the amount we know is acquire-able Adding docstrings to nirio_fifo Making const-qualifying consistent --- host/include/uhd/transport/nirio/nirio_fifo.h | 136 +++++++++++++++- host/include/uhd/transport/nirio/nirio_fifo.ipp | 189 ++++++++++++++++++---- host/lib/transport/nirio/niriok_proxy_impl_v2.cpp | 14 +- host/lib/transport/nirio_zero_copy.cpp | 2 + 4 files changed, 295 insertions(+), 46 deletions(-) diff --git a/host/include/uhd/transport/nirio/nirio_fifo.h b/host/include/uhd/transport/nirio/nirio_fifo.h index a0cd7149f..f6eb97ce2 100644 --- a/host/include/uhd/transport/nirio/nirio_fifo.h +++ b/host/include/uhd/transport/nirio/nirio_fifo.h @@ -28,6 +28,7 @@ #include #include #include +#include namespace uhd { namespace niusrprio { @@ -43,29 +44,91 @@ class nirio_fifo : private boost::noncopyable public: typedef boost::shared_ptr< nirio_fifo > sptr; + typedef enum { + MINIMIZE_LATENCY, + MAXIMIZE_THROUGHPUT + } fifo_optimization_option_t; + + /*! + * Initializing constructor + * \param riok_proxy Proxy to the kernel driver + * \param direction Direction of the fifo (INPUT_FIFO [from device] or OUTPUT_FIFO [to device]) + * \param name Name of the fifo + * \param fifo_instance Instance of the fifo as defined by RIO FPGA target + */ nirio_fifo( niriok_proxy::sptr riok_proxy, - fifo_direction_t direction, + const fifo_direction_t direction, const std::string& name, - uint32_t fifo_instance); + const uint32_t fifo_instance); virtual ~nirio_fifo(); + /*! + * Configures the characterists of this DMA FIFO, allocates DMA buffer and maps it to user mode + * \param requested_depth Desired size of DMA FIFO buffer on host (in elements) + * \param frame_size_in_elements Size of a single frame (smallest transfer block) for this FIFO + * \param actual_depth Receives the size (in elements) of the allocated DMA buffer + * \param actual_size Receives the size (in bytes) of the allocated DMA buffer + * \param fifo_optimization_option FIFO acquire policy (MINIMIZE_LATENCY [fetch what was there last time] or MAXIMIZE_THROUGHPUT [fetch all available]) + * \return status + */ nirio_status initialize( const size_t requested_depth, + const size_t frame_size_in_elements, size_t& actual_depth, - size_t& actual_size); + size_t& actual_size, + const fifo_optimization_option_t fifo_optimization_option = MINIMIZE_LATENCY); + /*! + * Stops FIFO if started, releases any acquired blocks, and unmaps the DMA buffer + */ void finalize(); + /*! + * Name accessor + * \return FIFO name + */ inline const std::string& get_name() const { return _name; } + + /*! + * Channel/Instance accessor + * \return FIFO channel (instance) + */ inline uint32_t get_channel() const { return _fifo_channel; } + + /*! + * Direction accessor + * \return FIFO direction + */ inline fifo_direction_t get_direction() const { return _fifo_direction; } + + /*! + * Type accessor + * \return FIFO element type + */ inline nirio_scalar_type_t get_scalar_type() const { return _datatype_info.scalar_type; } + /*! + * Starts the DMA transfer between host and device, pre-acquires any available blocks + * \return status + */ nirio_status start(); + /*! + * Stops the DMA transfer between host and device, releases any acquired blocks + * \return status + */ nirio_status stop(); + /*! + * Acquires space in the DMA buffer so it can be written by the host (output) or read by the host (input) + * \param elements Receives the address of the acquired block (pointer to mapped zero-copy DMA buffer) + * \param elements_requested Size (in elements) of the block to acquire + * \param timeout The amount of time (in ms) to wait for the elements to become available in the DMA buffer + * \param elements_acquired Receives the number of DMA buffer elements actually acquired + * \param elements_remaining Receives the number of DMA buffer elements available to be acquired by the host + * \return status + */ nirio_status acquire( data_t*& elements, const size_t elements_requested, @@ -73,26 +136,80 @@ public: size_t& elements_acquired, size_t& elements_remaining); + /*! + * Releases space in the DMA buffer so it can be read by the device (output) or written by the device (input) + * \param elements Size (in elements) of the block to release. + * \return status + */ nirio_status release(const size_t elements); + /*! + * Reads data from the DMA FIFO into the provided buffer + * \param buf The buffer into which to read data from the DMA FIFO + * \param num_elements Size (in elements) of the data to read + * \param timeout The amount of time (in ms) to wait for the elements to become available in the DMA buffer + * \param num_read Receives the number of DMA buffer elements actually read + * \param num_remaining Receives the number of DMA buffer elements available be read by the host + * \return status + */ nirio_status read( data_t* buf, const uint32_t num_elements, - uint32_t timeout, + const uint32_t timeout, uint32_t& num_read, uint32_t& num_remaining); + /*! + * Writes data from the DMA FIFO + * \param buf The buffer containing data to be written to the DMA FIFO + * \param num_elements Size (in elements) of the data to write + * \param timeout The amount of time (in ms) to wait for the elements to become available in the DMA buffer + * \param num_remaining Receives the number of DMA buffer elements available be written by the host + * \return status + */ nirio_status write( const data_t* buf, const uint32_t num_elements, - uint32_t timeout, + const uint32_t timeout, uint32_t& num_remaining); private: //Methods + + /*! + * datatype info accessor + * \return datatype info + */ datatype_info_t _get_datatype_info(); + + /*! + * Queries the total transfer count so far between host and device + * \param transfer_count Receives the value from total transfer count register + * \return status + */ nirio_status _get_transfer_count(uint64_t& transfer_count); + + /*! + * Sleeps until expected transfer time has elapsed and checks total transfer count to see if transfer completed + * \param timeout_ms The amount of time (in ms) to wait for the elements to become available in the DMA buffer + * \return status + */ nirio_status _ensure_transfer_completed(uint32_t timeout_ms); + + /*! + * Conducts the low level operations to reserve DMA buffer space from RIO kernel driver + * \param elements_requested The minimum number of elements to acquire + * \param timeout_in_ms The amount of time (in ms) to wait for the elements to become available in the DMA buffer + * \param fifo_optimization_option FIFO acquire policy (MINIMIZE_LATENCY [fetch what was there last time] or MAXIMIZE_THROUGHPUT [fetch all available]) + * \param status status chaining variable + * \return Whether acquisition of requested elements (or more) was successful. + */ + bool _acquire_block_from_rio_buffer( + size_t elements_requested, + uint64_t timeout_in_ms, + const fifo_optimization_option_t fifo_optimization_option, + nirio_status& status); + private: //Members enum fifo_state_t { UNMAPPED, MAPPED, STARTED @@ -103,7 +220,8 @@ private: //Members uint32_t _fifo_channel; datatype_info_t _datatype_info; fifo_state_t _state; - size_t _acquired_pending; + size_t _remaining_in_claimed_block; + size_t _remaining_acquirable_elements; nirio_driver_iface::rio_mmap_t _mem_map; boost::recursive_mutex _mutex; niriok_proxy::sptr _riok_proxy_ptr; @@ -111,6 +229,12 @@ private: //Members uint64_t _expected_xfer_count; uint32_t _dma_base_addr; + data_t* _elements_buffer; + size_t _actual_depth_in_elements; + boost::atomic _total_elements_acquired; + size_t _frame_size_in_elements; + fifo_optimization_option_t _fifo_optimization_option; + static const uint32_t FIFO_LOCK_TIMEOUT_IN_MS = 5000; }; diff --git a/host/include/uhd/transport/nirio/nirio_fifo.ipp b/host/include/uhd/transport/nirio/nirio_fifo.ipp index a7e2bb2c5..5669f259f 100644 --- a/host/include/uhd/transport/nirio/nirio_fifo.ipp +++ b/host/include/uhd/transport/nirio/nirio_fifo.ipp @@ -26,19 +26,25 @@ template nirio_fifo::nirio_fifo( niriok_proxy::sptr riok_proxy, - fifo_direction_t direction, + const fifo_direction_t direction, const std::string& name, - uint32_t fifo_instance) : + const uint32_t fifo_instance) : _name(name), _fifo_direction(direction), _fifo_channel(fifo_instance), _datatype_info(_get_datatype_info()), _state(UNMAPPED), - _acquired_pending(0), + _remaining_in_claimed_block(0), + _remaining_acquirable_elements(0), _mem_map(), _riok_proxy_ptr(riok_proxy), _expected_xfer_count(0), - _dma_base_addr(0) + _dma_base_addr(0), + _elements_buffer(NULL), + _actual_depth_in_elements(0), + _total_elements_acquired(0), + _frame_size_in_elements(0), + _fifo_optimization_option(MINIMIZE_LATENCY) { nirio_status status = 0; nirio_status_chain(_riok_proxy_ptr->set_attribute(RIO_ADDRESS_SPACE, BUS_INTERFACE), status); @@ -58,8 +64,10 @@ nirio_fifo::~nirio_fifo() template nirio_status nirio_fifo::initialize( const size_t requested_depth, + const size_t frame_size_in_elements, size_t& actual_depth, - size_t& actual_size) + size_t& actual_size, + const fifo_optimization_option_t fifo_optimization_option) { nirio_status status = NiRio_Status_Success; if (!_riok_proxy_ptr) return NiRio_Status_ResourceNotInitialized; @@ -67,6 +75,9 @@ nirio_status nirio_fifo::initialize( if (_state == UNMAPPED) { + _frame_size_in_elements = frame_size_in_elements; + _fifo_optimization_option = fifo_optimization_option; + uint32_t actual_depth_u32 = 0; uint32_t actual_size_u32 = 0; @@ -83,6 +94,7 @@ nirio_status nirio_fifo::initialize( if (nirio_status_fatal(status)) return status; actual_depth = static_cast(actual_depth_u32); + _actual_depth_in_elements = actual_depth; actual_size = static_cast(actual_size_u32); status = _riok_proxy_ptr->map_fifo_memory(_fifo_channel, actual_size, _mem_map); @@ -110,6 +122,89 @@ void nirio_fifo::finalize() } } + +template +bool nirio_fifo::_acquire_block_from_rio_buffer( + size_t elements_requested, + uint64_t timeout_in_ms, + const fifo_optimization_option_t fifo_optimization_option, + nirio_status& status) +{ + uint32_t elements_acquired_u32 = 0; + uint32_t elements_remaining_u32 = 0; + size_t elements_to_request = 0; + void* elements_buffer = NULL; + char context_buffer[64]; + + if (fifo_optimization_option == MAXIMIZE_THROUGHPUT) + { + // We'll maximize throughput by acquiring all the data that is available + // But this comes at the cost of an extra wait_on_fifo in which we query + // the total available by requesting 0. + + // first, see how many are available to acquire + // by trying to acquire 0 + nirio_status_chain(_riok_proxy_ptr->wait_on_fifo( + _fifo_channel, + 0, // elements requested + static_cast(_datatype_info.scalar_type), + _datatype_info.width * 8, + 0, // timeout + _fifo_direction == OUTPUT_FIFO, + elements_buffer, + elements_acquired_u32, + elements_remaining_u32), + status); + + // acquire the maximum possible elements- all remaining + // yet limit to a multiple of the frame size + // (don't want to acquire partial frames) + elements_to_request = elements_remaining_u32 - (elements_remaining_u32 % _frame_size_in_elements); + + // the next call to wait_on_fifo can have a 0 timeout since we + // know there is at least as much data as we will request available + timeout_in_ms = 0; + } + else + { + // fifo_optimization_option == MINIMIZE_LATENCY + // acquire either the minimum amount (frame size) or the amount remaining from the last call + // (coerced to a multiple of frames) + elements_to_request = std::max( + elements_requested, + (_remaining_acquirable_elements - (_remaining_acquirable_elements % _frame_size_in_elements))); + } + + nirio_status_chain(_riok_proxy_ptr->wait_on_fifo( + _fifo_channel, + elements_to_request, + static_cast(_datatype_info.scalar_type), + _datatype_info.width * 8, + timeout_in_ms, + _fifo_direction == OUTPUT_FIFO, + elements_buffer, + elements_acquired_u32, + elements_remaining_u32), + status); + + if (nirio_status_not_fatal(status)) + { + _remaining_acquirable_elements = static_cast(elements_remaining_u32); + + if (elements_acquired_u32 > 0) + { + _total_elements_acquired += static_cast(elements_acquired_u32); + _remaining_in_claimed_block = static_cast(elements_acquired_u32); + _elements_buffer = static_cast(elements_buffer); + } + + return true; + } + + return false; +} + + template nirio_status nirio_fifo::start() { @@ -122,11 +217,24 @@ nirio_status nirio_fifo::start() //Do nothing. Already started. } else if (_state == MAPPED) { + _total_elements_acquired = 0; + _remaining_in_claimed_block = 0; + _remaining_acquirable_elements = 0; + status = _riok_proxy_ptr->start_fifo(_fifo_channel); + if (nirio_status_not_fatal(status)) { _state = STARTED; - _acquired_pending = 0; _expected_xfer_count = 0; + + if (_fifo_direction == OUTPUT_FIFO) + { + // pre-acquire a block of data + // (should be entire DMA buffer at this point) + + // requesting 0 elements, but it will acquire all since MAXIMUM_THROUGHPUT + _acquire_block_from_rio_buffer(0, 1000, MAXIMIZE_THROUGHPUT, status); + } } } else { status = NiRio_Status_ResourceNotInitialized; @@ -143,7 +251,12 @@ nirio_status nirio_fifo::stop() boost::unique_lock lock(_mutex); if (_state == STARTED) { - if (_acquired_pending > 0) release(_acquired_pending); + + // release any remaining acquired elements + if (_total_elements_acquired > 0) release(_total_elements_acquired); + _total_elements_acquired = 0; + _remaining_in_claimed_block = 0; + _remaining_acquirable_elements = 0; status = _riok_proxy_ptr->stop_fifo(_fifo_channel); @@ -167,34 +280,44 @@ nirio_status nirio_fifo::acquire( boost::unique_lock lock(_mutex); if (_state == STARTED) { - uint32_t elements_acquired_u32 = 0; - uint32_t elements_remaining_u32 = 0; - void* elements_buffer = static_cast(elements); - status = _riok_proxy_ptr->wait_on_fifo( - _fifo_channel, - static_cast(elements_requested), - static_cast(_datatype_info.scalar_type), - _datatype_info.width * 8, - timeout, - _fifo_direction == OUTPUT_FIFO, - elements_buffer, - elements_acquired_u32, - elements_remaining_u32); - if (nirio_status_not_fatal(status)) { - elements = static_cast(elements_buffer); - elements_acquired = static_cast(elements_acquired_u32); - elements_remaining = static_cast(elements_remaining_u32); - _acquired_pending = elements_acquired; - - if (UHD_NIRIO_RX_FIFO_XFER_CHECK_EN && - _riok_proxy_ptr->get_rio_quirks().rx_fifo_xfer_check_en() && - get_direction() == INPUT_FIFO - ) { + if (_remaining_in_claimed_block == 0) + { + + // so acquire some now + if (!_acquire_block_from_rio_buffer( + elements_requested, + timeout, + _fifo_optimization_option, + status)) + { + elements_acquired = 0; + elements_remaining = _remaining_acquirable_elements; + return status; + } + + + if (get_direction() == INPUT_FIFO && + UHD_NIRIO_RX_FIFO_XFER_CHECK_EN && + _riok_proxy_ptr->get_rio_quirks().rx_fifo_xfer_check_en()) + { _expected_xfer_count += static_cast(elements_requested * sizeof(data_t)); status = _ensure_transfer_completed(timeout); } } + + if (nirio_status_not_fatal(status)) + { + // Assign the request the proper area of the DMA FIFO buffer + elements = _elements_buffer; + elements_acquired = std::min(_remaining_in_claimed_block, elements_requested); + _remaining_in_claimed_block -= elements_acquired; + elements_remaining = _remaining_in_claimed_block + _remaining_acquirable_elements; + + // Advance the write pointer forward in the acquired elements buffer + _elements_buffer += elements_acquired; + } + } else { status = NiRio_Status_ResourceNotInitialized; } @@ -214,7 +337,7 @@ nirio_status nirio_fifo::release(const size_t elements) status = _riok_proxy_ptr->grant_fifo( _fifo_channel, static_cast(elements)); - _acquired_pending = 0; + _total_elements_acquired -= elements; } else { status = NiRio_Status_ResourceNotInitialized; } @@ -226,7 +349,7 @@ template nirio_status nirio_fifo::read( data_t* buf, const uint32_t num_elements, - uint32_t timeout, + const uint32_t timeout, uint32_t& num_read, uint32_t& num_remaining) { @@ -257,7 +380,7 @@ template nirio_status nirio_fifo::write( const data_t* buf, const uint32_t num_elements, - uint32_t timeout, + const uint32_t timeout, uint32_t& num_remaining) { nirio_status status = NiRio_Status_Success; diff --git a/host/lib/transport/nirio/niriok_proxy_impl_v2.cpp b/host/lib/transport/nirio/niriok_proxy_impl_v2.cpp index f75de01e1..4e88b8a6e 100644 --- a/host/lib/transport/nirio/niriok_proxy_impl_v2.cpp +++ b/host/lib/transport/nirio/niriok_proxy_impl_v2.cpp @@ -759,14 +759,14 @@ namespace uhd { namespace niusrprio in.timeout = timeout; in.status = NiRio_Status_Success; - ioctl_status = + ioctl_status = nirio_driver_iface::rio_ioctl( - _device_handle, - IOCTL_TRANSPORT_FIFO_WAIT, - &in, - sizeof(in), - &out, - sizeof(out)); + _device_handle, + IOCTL_TRANSPORT_FIFO_WAIT, + &in, + sizeof(in), + &out, + sizeof(out)); if (nirio_status_fatal(ioctl_status)) return ioctl_status; data_pointer = reinterpret_cast(out.elements); diff --git a/host/lib/transport/nirio_zero_copy.cpp b/host/lib/transport/nirio_zero_copy.cpp index 8bec49a5f..9ed02a6dc 100644 --- a/host/lib/transport/nirio_zero_copy.cpp +++ b/host/lib/transport/nirio_zero_copy.cpp @@ -182,11 +182,13 @@ public: nirio_status_chain( _recv_fifo->initialize( (_xport_params.recv_frame_size*_xport_params.num_recv_frames)/sizeof(fifo_data_t), + _xport_params.recv_frame_size / sizeof(fifo_data_t), actual_depth, actual_size), status); nirio_status_chain( _send_fifo->initialize( (_xport_params.send_frame_size*_xport_params.num_send_frames)/sizeof(fifo_data_t), + _xport_params.send_frame_size / sizeof(fifo_data_t), actual_depth, actual_size), status); -- cgit v1.2.3