aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Braun <martin.braun@ettus.com>2019-02-21 15:58:04 -0800
committerMartin Braun <martin.braun@ettus.com>2019-02-22 16:12:50 -0800
commit616e01aeac3ae91cb913ed4a74fdd9d88d748d8e (patch)
treea3086bd22781f4519c0c9e3ebf1a2be09b68c5f4
parent056faab862e97ca9949c13790ab88ec50543000f (diff)
downloaduhd-616e01aeac3ae91cb913ed4a74fdd9d88d748d8e.tar.gz
uhd-616e01aeac3ae91cb913ed4a74fdd9d88d748d8e.tar.bz2
uhd-616e01aeac3ae91cb913ed4a74fdd9d88d748d8e.zip
mpmd: Allow reclaim failures on component updates
MPM has a flag that identifies component reloads as requiring to restart the RPC server. This change honours that flag, and doesn't cause a fatal failure when reclaims fail to ack for certain operations. For example, running uhd_image_loader on an N310 could fail after the FPGA was reloaded because the communication to the RPC server was temporarily interrupted. This is not always avoidable, since the RPC server does actually go down, and Ethernet connections might also be lost. So, we cut our losses and accept failures in that case.
-rw-r--r--host/lib/usrp/mpmd/mpmd_impl.hpp28
-rw-r--r--host/lib/usrp/mpmd/mpmd_mboard_impl.cpp40
-rw-r--r--host/lib/usrp/mpmd/mpmd_prop_tree.cpp13
3 files changed, 67 insertions, 14 deletions
diff --git a/host/lib/usrp/mpmd/mpmd_impl.hpp b/host/lib/usrp/mpmd/mpmd_impl.hpp
index cf0dad484..f74627afa 100644
--- a/host/lib/usrp/mpmd/mpmd_impl.hpp
+++ b/host/lib/usrp/mpmd/mpmd_impl.hpp
@@ -130,6 +130,23 @@ public:
uhd::device_addr_t get_rx_hints() const;
uhd::device_addr_t get_tx_hints() const;
+ /*! Setting this flag will enable a mode where a reclaim failure is
+ * acceptable.
+ *
+ * The only legitimate time to do this is when a procedure is called that
+ * can cause communication with the RPC server to be interrupted
+ * legitimately, but non-critically. For example, when updating the FPGA
+ * image, the RPC server gets rebooted, but the claim loop is running in a
+ * separate thread, and needs some kind of flag to be notified that
+ * something is up.
+ */
+ void allow_claim_failure(const bool allow) {
+ if (allow) {
+ _allow_claim_failure_latch = true;
+ }
+ _allow_claim_failure_flag = allow;
+ }
+
private:
/*! Reference to the RPC client that handles claiming
*/
@@ -171,6 +188,17 @@ private:
uhd::mpmd::xport::mpmd_xport_mgr::uptr _xport_mgr;
uhd::device_addr_t send_args;
uhd::device_addr_t recv_args;
+
+ /*! This flag is only used within the claim() function. Go look there if you
+ * really need to know what it does.
+ */
+ std::atomic<bool> _allow_claim_failure_flag{false};
+
+ /*! This flag is only used within the claim() function. Go look there if you
+ * really need to know what it does.
+ */
+ std::atomic<bool> _allow_claim_failure_latch{false};
+
};
diff --git a/host/lib/usrp/mpmd/mpmd_mboard_impl.cpp b/host/lib/usrp/mpmd/mpmd_mboard_impl.cpp
index 11404ffaf..da4f99794 100644
--- a/host/lib/usrp/mpmd/mpmd_mboard_impl.cpp
+++ b/host/lib/usrp/mpmd/mpmd_mboard_impl.cpp
@@ -1,5 +1,6 @@
//
// Copyright 2017 Ettus Research, a National Instruments Company
+// Copyright 2019 Ettus Research, a National Instruments Brand
//
// SPDX-License-Identifier: GPL-3.0-or-later
//
@@ -301,12 +302,7 @@ mpmd_mboard_impl::mpmd_mboard_impl(
mpmd_mboard_impl::~mpmd_mboard_impl()
{
- try {
- dump_logs();
- } catch (...) {
- UHD_LOG_WARNING("MPMD", "Could not flush log queue on exit!");
- }
- UHD_SAFE_CALL(if (not rpc->request_with_token<bool>("unclaim")) {
+ UHD_SAFE_CALL(dump_logs(); if (not rpc->request_with_token<bool>("unclaim")) {
UHD_LOG_WARNING("MPMD", "Failure to ack unclaim!");
});
}
@@ -398,10 +394,25 @@ uhd::device_addr_t mpmd_mboard_impl::get_tx_hints() const
bool mpmd_mboard_impl::claim()
{
try {
- return _claim_rpc->request_with_token<bool>("reclaim");
- } catch (...) {
- UHD_LOG_WARNING("MPMD", "Reclaim failed. Exiting claimer loop.");
- return false;
+ auto result = _claim_rpc->request_with_token<bool>("reclaim");
+ // When _allow_claim_failure_flag goes from true to false, we still have
+ // to wait for a successful reclaim before we can also set
+ // _allow_claim_failure_latch to false, because we have no way of
+ // synchronizing those events.
+ // In other words, we might be setting allow_claim_failure back to false
+ // too soon, but we have no way of knowing exactly when the right time
+ // is.
+ _allow_claim_failure_latch = _allow_claim_failure_flag.load();
+ return result;
+ } catch (const uhd::runtime_error& ex) {
+ // Note: Any RPC error will raise a uhd::runtime_error. Other errors are
+ // not handled here.
+ if (_allow_claim_failure_latch) {
+ UHD_LOG_DEBUG("MPMD", ex.what());
+ } else {
+ UHD_LOG_WARNING("MPMD", ex.what());
+ }
+ return _allow_claim_failure_latch;
}
}
@@ -420,8 +431,13 @@ uhd::task::sptr mpmd_mboard_impl::claim_device_and_make_task()
auto now = std::chrono::steady_clock::now();
if (not this->claim()) {
throw uhd::value_error("mpmd device reclaiming loop failed!");
- };
- this->dump_logs();
+ } else {
+ try {
+ this->dump_logs();
+ } catch(const uhd::runtime_error&) {
+ UHD_LOG_WARNING("MPMD", "Could not read back log queue!");
+ }
+ }
std::this_thread::sleep_until(
now + std::chrono::milliseconds(MPMD_RECLAIM_INTERVAL_MS));
});
diff --git a/host/lib/usrp/mpmd/mpmd_prop_tree.cpp b/host/lib/usrp/mpmd/mpmd_prop_tree.cpp
index e1b5d43cb..f00008971 100644
--- a/host/lib/usrp/mpmd/mpmd_prop_tree.cpp
+++ b/host/lib/usrp/mpmd/mpmd_prop_tree.cpp
@@ -1,5 +1,6 @@
//
// Copyright 2018 Ettus Research, a National Instruments Company
+// Copyright 2019 Ettus Research, a National Instruments Brand
//
// SPDX-License-Identifier: GPL-3.0-or-later
//
@@ -178,8 +179,16 @@ void mpmd_impl::init_property_tree(
for (const auto& comp_name : updateable_components) {
UHD_LOG_TRACE("MPMD", "Adding motherboard component: " << comp_name);
tree->create<uhd::usrp::component_files_t>(mb_path / "components" / comp_name)
- .set_coercer([mb](const uhd::usrp::component_files_t& comp_files) {
- return _update_component(comp_files, mb);
+ .set_coercer([mb, comp_name](const uhd::usrp::component_files_t& comp_files) {
+ auto comp_info = _get_component_info(comp_name, mb)[0];
+ if (comp_info.metadata.get("reset", "") == "True") {
+ UHD_LOG_DEBUG(
+ "MPMD", "Bracing for potential loss of RPC server connection.");
+ mb->allow_claim_failure(true);
+ }
+ auto result = _update_component(comp_files, mb);
+ mb->allow_claim_failure(false);
+ return result;
})
.set_publisher([mb, comp_name]() {
return _get_component_info(comp_name, mb);