From ba0e3c8dcc90ea3e4cf015dea1038cb51a89b159 Mon Sep 17 00:00:00 2001
From: Jason Abele <jason@ettus.com>
Date: Fri, 8 Jul 2011 12:56:29 -0700
Subject: Fix rounding in DBSRX2 tuning

---
 host/lib/usrp/dboard/db_dbsrx2.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'host')

diff --git a/host/lib/usrp/dboard/db_dbsrx2.cpp b/host/lib/usrp/dboard/db_dbsrx2.cpp
index aaced7a5d..f646a238f 100644
--- a/host/lib/usrp/dboard/db_dbsrx2.cpp
+++ b/host/lib/usrp/dboard/db_dbsrx2.cpp
@@ -230,7 +230,7 @@ void dbsrx2::set_lo_freq(double target_freq){
 
     N = (target_freq*R*ext_div)/(ref_freq); //actual spec range is (19, 251)
     intdiv = int(std::floor(N)); //  if (intdiv < 19  or intdiv > 251) continue;
-    fracdiv = boost::math::iround((N - intdiv)*double(1 << 20));
+    fracdiv = std::floor((N - intdiv)*double(1 << 20));
 
     //calculate the actual freq from the values above
     N = double(intdiv) + double(fracdiv)/double(1 << 20);
-- 
cgit v1.2.3


From 5239879e9f97bdbb6e3c531cee85824823ebff89 Mon Sep 17 00:00:00 2001
From: Josh Blum <josh@joshknows.com>
Date: Mon, 11 Jul 2011 13:02:21 -0700
Subject: usrp2: added a place for product code in eeprom map

---
 host/lib/usrp/mboard_eeprom.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'host')

diff --git a/host/lib/usrp/mboard_eeprom.cpp b/host/lib/usrp/mboard_eeprom.cpp
index 2ee4a9284..f65b0aac8 100644
--- a/host/lib/usrp/mboard_eeprom.cpp
+++ b/host/lib/usrp/mboard_eeprom.cpp
@@ -75,6 +75,7 @@ static const uhd::dict<std::string, boost::uint8_t> USRP_N100_OFFSETS = boost::a
     ("mac-addr", 0x02)
     ("ip-addr", 0x0C)
     //leave space here for other addresses (perhaps)
+    ("prod-lsb-msb", 0x14)
     ("gpsdo", 0x17)
     ("serial", 0x18)
     ("name", 0x18 + SERIAL_LEN)
@@ -92,6 +93,11 @@ static void load_n100(mboard_eeprom_t &mb_eeprom, i2c_iface &iface){
     boost::uint16_t rev = (boost::uint16_t(rev_lsb_msb.at(0)) << 0) | (boost::uint16_t(rev_lsb_msb.at(1)) << 8);
     mb_eeprom["rev"] = boost::lexical_cast<std::string>(rev);
 
+    //extract the product code
+    byte_vector_t prod_lsb_msb = iface.read_eeprom(N100_EEPROM_ADDR, USRP_N100_OFFSETS["prod-lsb-msb"], 2);
+    boost::uint16_t prod = (boost::uint16_t(prod_lsb_msb.at(0)) << 0) | (boost::uint16_t(prod_lsb_msb.at(1)) << 8);
+    mb_eeprom["product"] = (prod == 0 or prod == 0xffff)? "" : boost::lexical_cast<std::string>(prod);
+
     //extract the addresses
     mb_eeprom["mac-addr"] = mac_addr_t::from_bytes(iface.read_eeprom(
         N100_EEPROM_ADDR, USRP_N100_OFFSETS["mac-addr"], 6
@@ -140,6 +146,16 @@ static void store_n100(const mboard_eeprom_t &mb_eeprom, i2c_iface &iface){
         iface.write_eeprom(N100_EEPROM_ADDR, USRP_N100_OFFSETS["rev-lsb-msb"], rev_lsb_msb);
     }
 
+    //parse the product code
+    if (mb_eeprom.has_key("product")){
+        boost::uint16_t prod = boost::lexical_cast<boost::uint16_t>(mb_eeprom["product"]);
+        byte_vector_t prod_lsb_msb = boost::assign::list_of
+            (boost::uint8_t(prod >> 0))
+            (boost::uint8_t(prod >> 8))
+        ;
+        iface.write_eeprom(N100_EEPROM_ADDR, USRP_N100_OFFSETS["prod-lsb-msb"], prod_lsb_msb);
+    }
+
     //store the addresses
     if (mb_eeprom.has_key("mac-addr")) iface.write_eeprom(
         N100_EEPROM_ADDR, USRP_N100_OFFSETS["mac-addr"],
-- 
cgit v1.2.3


From 3fea33db9a5d951b9db7c7653eaae0e6291de0bc Mon Sep 17 00:00:00 2001
From: Josh Blum <josh@joshknows.com>
Date: Thu, 14 Jul 2011 22:41:56 -0700
Subject: uhd: pulled misc changes from other branches into master

---
 host/CMakeLists.txt                    |  2 +-
 host/examples/benchmark_rate.cpp       |  3 ++-
 host/lib/convert/convert_with_sse2.cpp | 40 +++++++++++++++++-----------------
 3 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'host')

diff --git a/host/CMakeLists.txt b/host/CMakeLists.txt
index 034257551..b5f8e57c2 100644
--- a/host/CMakeLists.txt
+++ b/host/CMakeLists.txt
@@ -129,7 +129,7 @@ IF(MSVC)
     ENDIF(BOOST_ALL_DYN_LINK)
 ENDIF(MSVC)
 
-SET(Boost_ADDITIONAL_VERSIONS "1.42.0" "1.42" "1.43.0" "1.43" "1.44.0" "1.44" "1.45.0" "1.45" "1.46.0" "1.46")
+SET(Boost_ADDITIONAL_VERSIONS "1.42.0" "1.42" "1.43.0" "1.43" "1.44.0" "1.44" "1.45.0" "1.45" "1.46.0" "1.46" "1.47.0" "1.47")
 FIND_PACKAGE(Boost 1.36 COMPONENTS ${BOOST_REQUIRED_COMPONENTS})
 
 INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS})
diff --git a/host/examples/benchmark_rate.cpp b/host/examples/benchmark_rate.cpp
index 688cd797a..774b240d4 100644
--- a/host/examples/benchmark_rate.cpp
+++ b/host/examples/benchmark_rate.cpp
@@ -157,6 +157,7 @@ void benchmark_tx_rate_async_helper(uhd::usrp::multi_usrp::sptr usrp){
  * Main code + dispatcher
  **********************************************************************/
 int UHD_SAFE_MAIN(int argc, char *argv[]){
+    uhd::set_thread_priority_safe();
 
     //variables to be set by po
     std::string args;
@@ -177,7 +178,7 @@ int UHD_SAFE_MAIN(int argc, char *argv[]){
     po::notify(vm);
 
     //print the help message
-    if (vm.count("help")){
+    if (vm.count("help") or (vm.count("rx_rate") + vm.count("tx_rate")) == 0){
         std::cout << boost::format("UHD Benchmark Rate %s") % desc << std::endl;
         std::cout <<
         "    Specify --rx_rate for a receive-only test.\n"
diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp
index 0cbb2c444..0649baab4 100644
--- a/host/lib/convert/convert_with_sse2.cpp
+++ b/host/lib/convert/convert_with_sse2.cpp
@@ -28,10 +28,10 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){
     const __m128 scalar = _mm_set_ps1(float(scale_factor));
 
     #define convert_fc32_1_to_item32_1_nswap_guts(_al_)                 \
-    for (; i+4 < nsamps; i+=4){                                  \
+    for (; i+4 < nsamps; i+=4){                                         \
         /* load from input */                                           \
-        __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \
-        __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \
+        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
+        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
                                                                         \
         /* convert and scale */ \
         __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
@@ -53,9 +53,9 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){
     case 0x8:
         output[i] = fc32_to_item32(input[i], float(scale_factor)); i++;
     case 0x0:
-        convert_fc32_1_to_item32_1_nswap_guts()
+        convert_fc32_1_to_item32_1_nswap_guts(_)
         break;
-    default: convert_fc32_1_to_item32_1_nswap_guts(u)
+    default: convert_fc32_1_to_item32_1_nswap_guts(u_)
     }
 
     //convert remainder
@@ -71,10 +71,10 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){
     const __m128 scalar = _mm_set_ps1(float(scale_factor));
 
     #define convert_fc32_1_to_item32_1_bswap_guts(_al_)                 \
-    for (; i+4 < nsamps; i+=4){                                  \
+    for (; i+4 < nsamps; i+=4){                                         \
         /* load from input */                                           \
-        __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \
-        __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \
+        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
+        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
                                                                         \
         /* convert and scale */ \
         __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
@@ -95,9 +95,9 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){
     case 0x8:
         output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor))); i++;
     case 0x0:
-        convert_fc32_1_to_item32_1_bswap_guts()
+        convert_fc32_1_to_item32_1_bswap_guts(_)
         break;
-    default: convert_fc32_1_to_item32_1_bswap_guts(u)
+    default: convert_fc32_1_to_item32_1_bswap_guts(u_)
     }
 
     //convert remainder
@@ -114,7 +114,7 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){
     const __m128i zeroi = _mm_setzero_si128();
 
     #define convert_item32_1_to_fc32_1_nswap_guts(_al_)                 \
-    for (; i+4 < nsamps; i+=4){                                  \
+    for (; i+4 < nsamps; i+=4){                                         \
         /* load from input */                                           \
         __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
                                                                         \
@@ -129,8 +129,8 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){
         __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \
                                                                         \
         /* store to output */                                           \
-        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \
-        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \
+        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \
+        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \
     }                                                                   \
 
     size_t i = 0;
@@ -140,9 +140,9 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){
     case 0x8:
         output[i] = item32_to_fc32(input[i], float(scale_factor)); i++;
     case 0x0:
-        convert_item32_1_to_fc32_1_nswap_guts()
+        convert_item32_1_to_fc32_1_nswap_guts(_)
         break;
-    default: convert_item32_1_to_fc32_1_nswap_guts(u)
+    default: convert_item32_1_to_fc32_1_nswap_guts(u_)
     }
 
     //convert remainder
@@ -159,7 +159,7 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){
     const __m128i zeroi = _mm_setzero_si128();
 
     #define convert_item32_1_to_fc32_1_bswap_guts(_al_)                 \
-    for (; i+4 < nsamps; i+=4){                                  \
+    for (; i+4 < nsamps; i+=4){                                         \
         /* load from input */                                           \
         __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
                                                                         \
@@ -173,8 +173,8 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){
         __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \
                                                                         \
         /* store to output */                                           \
-        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \
-        _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \
+        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \
+        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \
     }                                                                   \
 
     size_t i = 0;
@@ -184,9 +184,9 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){
     case 0x8:
         output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++;
     case 0x0:
-        convert_item32_1_to_fc32_1_bswap_guts()
+        convert_item32_1_to_fc32_1_bswap_guts(_)
         break;
-    default: convert_item32_1_to_fc32_1_bswap_guts(u)
+    default: convert_item32_1_to_fc32_1_bswap_guts(u_)
     }
 
     //convert remainder
-- 
cgit v1.2.3


From 7c84c240726114525b2501fb8e1bd8b112690983 Mon Sep 17 00:00:00 2001
From: Jason Abele <jason@ettus.com>
Date: Fri, 15 Jul 2011 14:21:22 -0700
Subject: Updates to WBX dboard driver for version 3 support

    Use step attenuator for TX gain control
---
 host/lib/usrp/dboard/db_wbx_common.cpp | 40 +++++++++++++++++++++-------------
 host/lib/usrp/dboard/db_wbx_simple.cpp | 12 +++++++---
 2 files changed, 34 insertions(+), 18 deletions(-)

(limited to 'host')

diff --git a/host/lib/usrp/dboard/db_wbx_common.cpp b/host/lib/usrp/dboard/db_wbx_common.cpp
index 1a016e89c..c21ba80dc 100644
--- a/host/lib/usrp/dboard/db_wbx_common.cpp
+++ b/host/lib/usrp/dboard/db_wbx_common.cpp
@@ -44,7 +44,7 @@
 #define TX_ATTN_MASK    (TX_ATTN_16|TX_ATTN_8|TX_ATTN_4|TX_ATTN_2|TX_ATTN_1)      // valid bits of TX Attenuator Control
 
 // Mixer functions
-#define TX_MIXER_ENB    (TXMOD_EN|ADF4350_PDBRF)
+#define TX_MIXER_ENB    (TXMOD_EN|ADF4350_PDBRF)    // for v3, TXMOD_EN tied to ADF4350_PDBRF rather than separate
 #define TX_MIXER_DIS    0
 
 #define RX_MIXER_ENB    (RXBB_PDB|ADF4350_PDBRF)
@@ -102,18 +102,19 @@ wbx_base::wbx_base(ctor_args_t args) : xcvr_dboard_base(args){
 
     //v3 has different io bits for attenuator control
     int v3_iobits = is_v3() ? TX_ATTN_MASK : ADF4350_CE;
+    int v3_tx_mod = is_v3() ? ADF4350_PDBRF : TXMOD_EN|ADF4350_PDBRF;
 
     //set the gpio directions and atr controls
-    this->get_iface()->set_pin_ctrl(dboard_iface::UNIT_TX, TXMOD_EN|ADF4350_PDBRF);
+    this->get_iface()->set_pin_ctrl(dboard_iface::UNIT_TX, v3_tx_mod);
     this->get_iface()->set_pin_ctrl(dboard_iface::UNIT_RX, RXBB_PDB|ADF4350_PDBRF);
-    this->get_iface()->set_gpio_ddr(dboard_iface::UNIT_TX, TX_PUP_5V|TX_PUP_3V|TXMOD_EN|ADF4350_PDBRF|v3_iobits);
+    this->get_iface()->set_gpio_ddr(dboard_iface::UNIT_TX, TX_PUP_5V|TX_PUP_3V|v3_tx_mod|v3_iobits);
     this->get_iface()->set_gpio_ddr(dboard_iface::UNIT_RX, RX_PUP_5V|RX_PUP_3V|ADF4350_CE|RXBB_PDB|ADF4350_PDBRF|RX_ATTN_MASK);
 
     //setup ATR for the mixer enables (always enabled to prevent phase slip between bursts)
-    this->get_iface()->set_atr_reg(dboard_iface::UNIT_TX, dboard_iface::ATR_REG_IDLE,        TX_MIXER_ENB, TX_MIXER_DIS | TX_MIXER_ENB);
-    this->get_iface()->set_atr_reg(dboard_iface::UNIT_TX, dboard_iface::ATR_REG_RX_ONLY,     TX_MIXER_ENB, TX_MIXER_DIS | TX_MIXER_ENB);
-    this->get_iface()->set_atr_reg(dboard_iface::UNIT_TX, dboard_iface::ATR_REG_TX_ONLY,     TX_MIXER_ENB, TX_MIXER_DIS | TX_MIXER_ENB);
-    this->get_iface()->set_atr_reg(dboard_iface::UNIT_TX, dboard_iface::ATR_REG_FULL_DUPLEX, TX_MIXER_ENB, TX_MIXER_DIS | TX_MIXER_ENB);
+    this->get_iface()->set_atr_reg(dboard_iface::UNIT_TX, dboard_iface::ATR_REG_IDLE,        v3_tx_mod, TX_MIXER_DIS | v3_tx_mod);
+    this->get_iface()->set_atr_reg(dboard_iface::UNIT_TX, dboard_iface::ATR_REG_RX_ONLY,     v3_tx_mod, TX_MIXER_DIS | v3_tx_mod);
+    this->get_iface()->set_atr_reg(dboard_iface::UNIT_TX, dboard_iface::ATR_REG_TX_ONLY,     v3_tx_mod, TX_MIXER_DIS | v3_tx_mod);
+    this->get_iface()->set_atr_reg(dboard_iface::UNIT_TX, dboard_iface::ATR_REG_FULL_DUPLEX, v3_tx_mod, TX_MIXER_DIS | v3_tx_mod);
 
     this->get_iface()->set_atr_reg(dboard_iface::UNIT_RX, dboard_iface::ATR_REG_IDLE,        RX_MIXER_ENB, RX_MIXER_DIS | RX_MIXER_ENB);
     this->get_iface()->set_atr_reg(dboard_iface::UNIT_RX, dboard_iface::ATR_REG_TX_ONLY,     RX_MIXER_ENB, RX_MIXER_DIS | RX_MIXER_ENB);
@@ -191,7 +192,7 @@ static int tx_pga0_gain_to_iobits(double &gain){
     double attn = wbx_v3_tx_gain_ranges["PGA0"].stop() - gain;
 
     //calculate the attenuation
-    int attn_code = boost::math::iround(attn*2);
+    int attn_code = boost::math::iround(attn);
     int iobits = (
             (attn_code & 16 ? 0 : TX_ATTN_16) |
             (attn_code &  8 ? 0 : TX_ATTN_8) |
@@ -236,11 +237,11 @@ void wbx_base::set_tx_gain(double gain, const std::string &name){
     if (is_v3()) {
         assert_has(wbx_v3_tx_gain_ranges.keys(), name, "wbx tx gain name");
         if(name == "PGA0"){
-            double dac_volts = tx_pga0_gain_to_iobits(gain);
+            boost::uint16_t io_bits = tx_pga0_gain_to_iobits(gain);
             _tx_gains[name] = gain;
 
-            //write the new voltage to the aux dac
-            this->get_iface()->write_aux_dac(dboard_iface::UNIT_TX, dboard_iface::AUX_DAC_A, dac_volts);
+            //write the new gain to tx gpio outputs
+            this->get_iface()->set_gpio_out(dboard_iface::UNIT_TX, io_bits, TX_ATTN_MASK);
         }
         else UHD_THROW_INVALID_CODE_PATH();
     }
@@ -447,7 +448,7 @@ bool wbx_base::get_locked(dboard_iface::unit_t unit){
 }
 
 bool wbx_base::is_v3(void){
-    return get_rx_id() == 0x057;
+    return get_rx_id().to_uint16() == 0x057;
 }
 
 /***********************************************************************
@@ -570,12 +571,21 @@ void wbx_base::tx_get(const wax::obj &key_, wax::obj &val){
         return;
 
     case SUBDEV_PROP_GAIN_RANGE:
-        assert_has(wbx_tx_gain_ranges.keys(), key.name, "wbx tx gain name");
-        val = wbx_tx_gain_ranges[key.name];
+        if (is_v3()) {
+            assert_has(wbx_v3_tx_gain_ranges.keys(), key.name, "wbx tx gain name");
+            val = wbx_v3_tx_gain_ranges[key.name];
+        }
+        else {
+            assert_has(wbx_tx_gain_ranges.keys(), key.name, "wbx tx gain name");
+            val = wbx_tx_gain_ranges[key.name];
+        }
         return;
 
     case SUBDEV_PROP_GAIN_NAMES:
-        val = prop_names_t(wbx_tx_gain_ranges.keys());
+        if (is_v3())
+            val = prop_names_t(wbx_v3_tx_gain_ranges.keys());
+        else
+            val = prop_names_t(wbx_tx_gain_ranges.keys());
         return;
 
     case SUBDEV_PROP_FREQ:
diff --git a/host/lib/usrp/dboard/db_wbx_simple.cpp b/host/lib/usrp/dboard/db_wbx_simple.cpp
index 602ce389d..aa4937b19 100644
--- a/host/lib/usrp/dboard/db_wbx_simple.cpp
+++ b/host/lib/usrp/dboard/db_wbx_simple.cpp
@@ -16,7 +16,7 @@
 //
 
 // Antenna constants
-#define ANTSW_IO        ((1 << 5)|(1 << 15))    // on UNIT_TX, 0 = TX, 1 = RX, on UNIT_RX 0 = main ant, 1 = RX2
+#define ANTSW_IO        ((1 << 15))             // on UNIT_TX, 0 = TX, 1 = RX, on UNIT_RX 0 = main ant, 1 = RX2
 #define ANT_TX          0                       //the tx line is transmitting
 #define ANT_RX          ANTSW_IO                //the tx line is receiving
 #define ANT_TXRX        0                       //the rx line is on txrx
@@ -150,7 +150,10 @@ void wbx_simple::rx_get(const wax::obj &key_, wax::obj &val){
     //handle the get request conditioned on the key
     switch(key.as<subdev_prop_t>()){
     case SUBDEV_PROP_NAME:
-        val = std::string("WBX RX + Simple GDB");
+        if (is_v3())
+            val = std::string("WBX v3 RX + Simple GDB");
+        else
+            val = std::string("WBX RX + Simple GDB");
         return;
 
     case SUBDEV_PROP_FREQ:
@@ -204,7 +207,10 @@ void wbx_simple::tx_get(const wax::obj &key_, wax::obj &val){
     //handle the get request conditioned on the key
     switch(key.as<subdev_prop_t>()){
     case SUBDEV_PROP_NAME:
-        val = std::string("WBX TX + Simple GDB");
+        if (is_v3())
+            val = std::string("WBX v3 TX + Simple GDB");
+        else
+            val = std::string("WBX TX + Simple GDB");
         return;
 
     case SUBDEV_PROP_FREQ:
-- 
cgit v1.2.3


From 87d67d7777ed21121896b7733723ca3109e18c8c Mon Sep 17 00:00:00 2001
From: Josh Blum <josh@joshknows.com>
Date: Fri, 15 Jul 2011 13:36:56 -0700
Subject: uhd: created SSE2 conversion routines for fc64

---
 host/lib/convert/CMakeLists.txt             |  10 +-
 host/lib/convert/convert_fc32_with_sse2.cpp | 196 +++++++++++++++++++++++++
 host/lib/convert/convert_fc64_with_sse2.cpp | 212 ++++++++++++++++++++++++++++
 host/lib/convert/convert_with_sse2.cpp      | 196 -------------------------
 4 files changed, 414 insertions(+), 200 deletions(-)
 create mode 100644 host/lib/convert/convert_fc32_with_sse2.cpp
 create mode 100644 host/lib/convert/convert_fc64_with_sse2.cpp
 delete mode 100644 host/lib/convert/convert_with_sse2.cpp

(limited to 'host')

diff --git a/host/lib/convert/CMakeLists.txt b/host/lib/convert/CMakeLists.txt
index 5f05b0cb8..e6e8ec088 100644
--- a/host/lib/convert/CMakeLists.txt
+++ b/host/lib/convert/CMakeLists.txt
@@ -36,13 +36,15 @@ CHECK_INCLUDE_FILE_CXX(emmintrin.h HAVE_EMMINTRIN_H)
 UNSET(CMAKE_REQUIRED_FLAGS)
 
 IF(HAVE_EMMINTRIN_H)
+    SET(convert_with_sse2_sources
+        ${CMAKE_CURRENT_SOURCE_DIR}/convert_fc32_with_sse2.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/convert_fc64_with_sse2.cpp
+    )
     SET_SOURCE_FILES_PROPERTIES(
-        ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_sse2.cpp
+        ${convert_with_sse2_sources}
         PROPERTIES COMPILE_FLAGS "${EMMINTRIN_FLAGS}"
     )
-    LIBUHD_APPEND_SOURCES(
-        ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_sse2.cpp
-    )
+    LIBUHD_APPEND_SOURCES(${convert_with_sse2_sources})
 ENDIF(HAVE_EMMINTRIN_H)
 
 ########################################################################
diff --git a/host/lib/convert/convert_fc32_with_sse2.cpp b/host/lib/convert/convert_fc32_with_sse2.cpp
new file mode 100644
index 000000000..676e1561c
--- /dev/null
+++ b/host/lib/convert/convert_fc32_with_sse2.cpp
@@ -0,0 +1,196 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "convert_common.hpp"
+#include <uhd/utils/byteswap.hpp>
+#include <emmintrin.h>
+
+using namespace uhd::convert;
+
+DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){
+    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    const __m128 scalar = _mm_set_ps1(float(scale_factor));
+
+    #define convert_fc32_1_to_item32_1_nswap_guts(_al_)                 \
+    for (; i+4 < nsamps; i+=4){                                         \
+        /* load from input */                                           \
+        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
+        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
+                                                                        \
+        /* convert and scale */                                         \
+        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
+        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \
+                                                                        \
+        /* pack + swap 16-bit pairs */                                  \
+        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
+        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+                                                                        \
+        /* store to output */                                           \
+        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
+    }                                                                   \
+
+    size_t i = 0;
+
+    //dispatch according to alignment
+    switch (size_t(input) & 0xf){
+    case 0x8:
+        output[i] = fc32_to_item32(input[i], float(scale_factor)); i++;
+    case 0x0:
+        convert_fc32_1_to_item32_1_nswap_guts(_)
+        break;
+    default: convert_fc32_1_to_item32_1_nswap_guts(u_)
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = fc32_to_item32(input[i], float(scale_factor));
+    }
+}
+
+DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){
+    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    const __m128 scalar = _mm_set_ps1(float(scale_factor));
+
+    #define convert_fc32_1_to_item32_1_bswap_guts(_al_)                 \
+    for (; i+4 < nsamps; i+=4){                                         \
+        /* load from input */                                           \
+        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
+        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
+                                                                        \
+        /* convert and scale */                                         \
+        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
+        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \
+                                                                        \
+        /* pack + byteswap -> byteswap 16 bit words */                  \
+        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
+        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+                                                                        \
+        /* store to output */                                           \
+        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
+    }                                                                   \
+
+    size_t i = 0;
+
+    //dispatch according to alignment
+    switch (size_t(input) & 0xf){
+    case 0x8:
+        output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor))); i++;
+    case 0x0:
+        convert_fc32_1_to_item32_1_bswap_guts(_)
+        break;
+    default: convert_fc32_1_to_item32_1_bswap_guts(u_)
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor)));
+    }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+
+    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
+    const __m128i zeroi = _mm_setzero_si128();
+
+    #define convert_item32_1_to_fc32_1_nswap_guts(_al_)                 \
+    for (; i+4 < nsamps; i+=4){                                         \
+        /* load from input */                                           \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
+                                                                        \
+        /* unpack + swap 16-bit pairs */                                \
+        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
+        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
+                                                                        \
+        /* convert and scale */                                         \
+        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \
+        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \
+                                                                        \
+        /* store to output */                                           \
+        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \
+        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \
+    }                                                                   \
+
+    size_t i = 0;
+
+    //dispatch according to alignment
+    switch (size_t(output) & 0xf){
+    case 0x8:
+        output[i] = item32_to_fc32(input[i], float(scale_factor)); i++;
+    case 0x0:
+        convert_item32_1_to_fc32_1_nswap_guts(_)
+        break;
+    default: convert_item32_1_to_fc32_1_nswap_guts(u_)
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = item32_to_fc32(input[i], float(scale_factor));
+    }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+
+    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
+    const __m128i zeroi = _mm_setzero_si128();
+
+    #define convert_item32_1_to_fc32_1_bswap_guts(_al_)                 \
+    for (; i+4 < nsamps; i+=4){                                         \
+        /* load from input */                                           \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
+                                                                        \
+        /* byteswap + unpack -> byteswap 16 bit words */                \
+        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
+        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
+                                                                        \
+        /* convert and scale */                                         \
+        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \
+        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \
+                                                                        \
+        /* store to output */                                           \
+        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \
+        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \
+    }                                                                   \
+
+    size_t i = 0;
+
+    //dispatch according to alignment
+    switch (size_t(output) & 0xf){
+    case 0x8:
+        output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++;
+    case 0x0:
+        convert_item32_1_to_fc32_1_bswap_guts(_)
+        break;
+    default: convert_item32_1_to_fc32_1_bswap_guts(u_)
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor));
+    }
+}
diff --git a/host/lib/convert/convert_fc64_with_sse2.cpp b/host/lib/convert/convert_fc64_with_sse2.cpp
new file mode 100644
index 000000000..4d28396a4
--- /dev/null
+++ b/host/lib/convert/convert_fc64_with_sse2.cpp
@@ -0,0 +1,212 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "convert_common.hpp"
+#include <uhd/utils/byteswap.hpp>
+#include <emmintrin.h>
+
+using namespace uhd::convert;
+
+DECLARE_CONVERTER(convert_fc64_1_to_item32_1_nswap, PRIORITY_CUSTOM){
+    const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]);
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    const __m128d scalar = _mm_set1_pd(scale_factor);
+
+    #define convert_fc64_1_to_item32_1_nswap_guts(_al_)                 \
+    for (; i+4 < nsamps; i+=4){                                         \
+        /* load from input */                                           \
+        __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \
+        __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \
+        __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \
+        __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \
+                                                                        \
+        /* convert and scale */                                         \
+        __m128i tmpi0 = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar));     \
+        __m128i tmpi1 = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar));     \
+        __m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1);              \
+        __m128i tmpi2 = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar));     \
+        __m128i tmpi3 = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar));     \
+        __m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3);              \
+                                                                        \
+        /* pack + swap 16-bit pairs */                                  \
+        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
+        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+                                                                        \
+        /* store to output */                                           \
+        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
+    }                                                                   \
+
+    size_t i = 0;
+
+    //dispatch according to alignment
+    if ((size_t(input) & 0xf) == 0){
+        convert_fc64_1_to_item32_1_nswap_guts(_)
+    }
+    else{
+        convert_fc64_1_to_item32_1_nswap_guts(u_)
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = fc64_to_item32(input[i], scale_factor);
+    }
+}
+
+DECLARE_CONVERTER(convert_fc64_1_to_item32_1_bswap, PRIORITY_CUSTOM){
+    const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]);
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    const __m128d scalar = _mm_set1_pd(scale_factor);
+
+    #define convert_fc64_1_to_item32_1_bswap_guts(_al_)                 \
+    for (; i+4 < nsamps; i+=4){                                         \
+        /* load from input */                                           \
+        __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \
+        __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \
+        __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \
+        __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \
+                                                                        \
+        /* convert and scale */                                         \
+        __m128i tmpi0 = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar));     \
+        __m128i tmpi1 = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar));     \
+        __m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1);              \
+        __m128i tmpi2 = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar));     \
+        __m128i tmpi3 = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar));     \
+        __m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3);              \
+                                                                        \
+        /* pack + byteswap -> byteswap 16 bit words */                  \
+        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
+        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+                                                                        \
+        /* store to output */                                           \
+        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
+    }                                                                   \
+
+    size_t i = 0;
+
+    //dispatch according to alignment
+    if ((size_t(input) & 0xf) == 0){
+        convert_fc64_1_to_item32_1_bswap_guts(_)
+    }
+    else{
+        convert_fc64_1_to_item32_1_bswap_guts(u_)
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = uhd::byteswap(fc64_to_item32(input[i], scale_factor));
+    }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_fc64_1_nswap, PRIORITY_CUSTOM){
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]);
+
+    const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 16));
+    const __m128i zeroi = _mm_setzero_si128();
+
+    #define convert_item32_1_to_fc64_1_nswap_guts(_al_)                 \
+    for (; i+4 < nsamps; i+=4){                                         \
+        /* load from input */                                           \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
+                                                                        \
+        /* unpack + swap 16-bit pairs */                                \
+        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
+        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
+                                                                        \
+        /* convert and scale */                                         \
+        __m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
+        tmpilo = _mm_unpackhi_epi64(tmpilo, zeroi);                     \
+        __m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
+        __m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
+        tmpihi = _mm_unpackhi_epi64(tmpihi, zeroi);                     \
+        __m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
+                                                                        \
+        /* store to output */                                           \
+        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+0), tmp0); \
+        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+1), tmp1); \
+        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+2), tmp2); \
+        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+3), tmp3); \
+    }                                                                   \
+
+    size_t i = 0;
+
+    //dispatch according to alignment
+    if ((size_t(output) & 0xf) == 0){
+        convert_item32_1_to_fc64_1_nswap_guts(_)
+    }
+    else{
+        convert_item32_1_to_fc64_1_nswap_guts(u_)
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = item32_to_fc64(input[i], scale_factor);
+    }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_fc64_1_bswap, PRIORITY_CUSTOM){
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]);
+
+    const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 16));
+    const __m128i zeroi = _mm_setzero_si128();
+
+    #define convert_item32_1_to_fc64_1_bswap_guts(_al_)                 \
+    for (; i+4 < nsamps; i+=4){                                         \
+        /* load from input */                                           \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
+                                                                        \
+        /* byteswap + unpack -> byteswap 16 bit words */                \
+        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
+        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
+                                                                        \
+        /* convert and scale */                                         \
+        __m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
+        tmpilo = _mm_unpackhi_epi64(tmpilo, zeroi);                     \
+        __m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
+        __m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
+        tmpihi = _mm_unpackhi_epi64(tmpihi, zeroi);                     \
+        __m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
+                                                                        \
+        /* store to output */                                           \
+        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+0), tmp0); \
+        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+1), tmp1); \
+        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+2), tmp2); \
+        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+3), tmp3); \
+    }                                                                   \
+
+    size_t i = 0;
+
+    //dispatch according to alignment
+    if ((size_t(output) & 0xf) == 0){
+        convert_item32_1_to_fc64_1_bswap_guts(_)
+    }
+    else{
+        convert_item32_1_to_fc64_1_bswap_guts(u_)
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = item32_to_fc64(uhd::byteswap(input[i]), scale_factor);
+    }
+}
diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp
deleted file mode 100644
index 0649baab4..000000000
--- a/host/lib/convert/convert_with_sse2.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-//
-// Copyright 2011-2011 Ettus Research LLC
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program.  If not, see <http://www.gnu.org/licenses/>.
-//
-
-#include "convert_common.hpp"
-#include <uhd/utils/byteswap.hpp>
-#include <emmintrin.h>
-
-using namespace uhd::convert;
-
-DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){
-    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
-
-    const __m128 scalar = _mm_set_ps1(float(scale_factor));
-
-    #define convert_fc32_1_to_item32_1_nswap_guts(_al_)                 \
-    for (; i+4 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
-        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
-                                                                        \
-        /* convert and scale */ \
-        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
-        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \
-                                                                        \
-        /* pack + swap 16-bit pairs */                                  \
-        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
-        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
-        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
-                                                                        \
-        /* store to output */                                           \
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
-    }                                                                   \
-
-    size_t i = 0;
-
-    //dispatch according to alignment
-    switch (size_t(input) & 0xf){
-    case 0x8:
-        output[i] = fc32_to_item32(input[i], float(scale_factor)); i++;
-    case 0x0:
-        convert_fc32_1_to_item32_1_nswap_guts(_)
-        break;
-    default: convert_fc32_1_to_item32_1_nswap_guts(u_)
-    }
-
-    //convert remainder
-    for (; i < nsamps; i++){
-        output[i] = fc32_to_item32(input[i], float(scale_factor));
-    }
-}
-
-DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){
-    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
-
-    const __m128 scalar = _mm_set_ps1(float(scale_factor));
-
-    #define convert_fc32_1_to_item32_1_bswap_guts(_al_)                 \
-    for (; i+4 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
-        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
-                                                                        \
-        /* convert and scale */ \
-        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
-        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \
-                                                                        \
-        /* pack + byteswap -> byteswap 16 bit words */                  \
-        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
-        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
-                                                                        \
-        /* store to output */                                           \
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
-    }                                                                   \
-
-    size_t i = 0;
-
-    //dispatch according to alignment
-    switch (size_t(input) & 0xf){
-    case 0x8:
-        output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor))); i++;
-    case 0x0:
-        convert_fc32_1_to_item32_1_bswap_guts(_)
-        break;
-    default: convert_fc32_1_to_item32_1_bswap_guts(u_)
-    }
-
-    //convert remainder
-    for (; i < nsamps; i++){
-        output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor)));
-    }
-}
-
-DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){
-    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
-    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
-
-    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
-    const __m128i zeroi = _mm_setzero_si128();
-
-    #define convert_item32_1_to_fc32_1_nswap_guts(_al_)                 \
-    for (; i+4 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
-                                                                        \
-        /* unpack + swap 16-bit pairs */                                \
-        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
-        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
-        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
-        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
-                                                                        \
-        /* convert and scale */                                         \
-        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \
-        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \
-    }                                                                   \
-
-    size_t i = 0;
-
-    //dispatch according to alignment
-    switch (size_t(output) & 0xf){
-    case 0x8:
-        output[i] = item32_to_fc32(input[i], float(scale_factor)); i++;
-    case 0x0:
-        convert_item32_1_to_fc32_1_nswap_guts(_)
-        break;
-    default: convert_item32_1_to_fc32_1_nswap_guts(u_)
-    }
-
-    //convert remainder
-    for (; i < nsamps; i++){
-        output[i] = item32_to_fc32(input[i], float(scale_factor));
-    }
-}
-
-DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){
-    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
-    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
-
-    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
-    const __m128i zeroi = _mm_setzero_si128();
-
-    #define convert_item32_1_to_fc32_1_bswap_guts(_al_)                 \
-    for (; i+4 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
-                                                                        \
-        /* byteswap + unpack -> byteswap 16 bit words */                \
-        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
-        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
-        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
-                                                                        \
-        /* convert and scale */                                         \
-        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \
-        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \
-    }                                                                   \
-
-    size_t i = 0;
-
-    //dispatch according to alignment
-    switch (size_t(output) & 0xf){
-    case 0x8:
-        output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++;
-    case 0x0:
-        convert_item32_1_to_fc32_1_bswap_guts(_)
-        break;
-    default: convert_item32_1_to_fc32_1_bswap_guts(u_)
-    }
-
-    //convert remainder
-    for (; i < nsamps; i++){
-        output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor));
-    }
-}
-- 
cgit v1.2.3


From 32bd5b3f2fe0a2d7924972ae28177c08753219fc Mon Sep 17 00:00:00 2001
From: Josh Blum <josh@joshknows.com>
Date: Sun, 17 Jul 2011 18:40:03 -0700
Subject: uhd: some header changes from next

---
 host/include/uhd/exception.hpp | 6 +++---
 host/include/uhd/utils/msg.hpp | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'host')

diff --git a/host/include/uhd/exception.hpp b/host/include/uhd/exception.hpp
index 10cd8f501..c05861788 100644
--- a/host/include/uhd/exception.hpp
+++ b/host/include/uhd/exception.hpp
@@ -15,8 +15,8 @@
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 //
 
-#ifndef INCLUDED_UHD_UTILS_EXCEPTION_HPP
-#define INCLUDED_UHD_UTILS_EXCEPTION_HPP
+#ifndef INCLUDED_UHD_EXCEPTION_HPP
+#define INCLUDED_UHD_EXCEPTION_HPP
 
 #include <uhd/config.hpp>
 #include <boost/current_function.hpp>
@@ -163,4 +163,4 @@ namespace uhd{
 
 } //namespace uhd
 
-#endif /* INCLUDED_UHD_UTILS_EXCEPTION_HPP */
+#endif /* INCLUDED_UHD_EXCEPTION_HPP */
diff --git a/host/include/uhd/utils/msg.hpp b/host/include/uhd/utils/msg.hpp
index 71d2cb35e..b0f00e13d 100644
--- a/host/include/uhd/utils/msg.hpp
+++ b/host/include/uhd/utils/msg.hpp
@@ -30,6 +30,9 @@
 #define UHD_MSG(type) \
     uhd::msg::_msg(uhd::msg::type)()
 
+//! Helpful debug tool to print site info
+#define UHD_HERE() \
+    UHD_MSG(status) << __FILE__ << ":" << __LINE__ << std::endl
 
 namespace uhd{ namespace msg{
 
-- 
cgit v1.2.3


From 88f3da8fff01c505d4d2d5524f34c12d2a09030f Mon Sep 17 00:00:00 2001
From: Jason Abele <jason@ettus.com>
Date: Fri, 15 Jul 2011 16:26:19 -0700
Subject: Added notes on latency and through put tuning for UDP

---
 host/docs/transport.rst | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'host')

diff --git a/host/docs/transport.rst b/host/docs/transport.rst
index e7c2f1885..f28d1efcb 100644
--- a/host/docs/transport.rst
+++ b/host/docs/transport.rst
@@ -38,6 +38,14 @@ increase or decrease the maximum number of samples per packet.
 The frame sizes default to an MTU of 1472 bytes per IP/UDP packet,
 and may be increased if permitted by your network hardware.
 
+**Note3:** For lower latency at low sample rates, use smaller buffers
+
+**Note4:** For overall lower latency, look for Interrupt Coalescing settings
+for your OS and ethernet chipset.  It seems the Intel ethernet chipsets offer
+fine-grained control in Linux.  Also, consult:
+
+* http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.prftungd/doc/prftungd/interrupt_coal.htm
+
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Flow control parameters
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -77,6 +85,16 @@ To change the maximum values, run the following commands:
 
 Set the values permanently by editing */etc/sysctl.conf*
 
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Windows specific notes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+On Windows, it is important to change the default UDP behavior such that
+1500 byte packets still travel through the fast path of the sockets stack.
+
+FastSendDatagramThreshold registry key to change documented here:
+
+* http://www.microsoft.com/windows/windowsmedia/howto/articles/optimize_web.aspx#appendix_e
+
 ------------------------------------------------------------------------
 USB transport (libusb)
 ------------------------------------------------------------------------
-- 
cgit v1.2.3


From 60edbd7f59c873ce69ddb5b337e640f479a28321 Mon Sep 17 00:00:00 2001
From: Josh Blum <josh@joshknows.com>
Date: Sun, 17 Jul 2011 20:12:56 -0700
Subject: udp: squashed the wsa work and added documentation work

---
 host/docs/transport.rst                  |  42 +++--
 host/lib/transport/CMakeLists.txt        |  10 +-
 host/lib/transport/udp_wsa_zero_copy.cpp | 281 +++++++++++++++++++++++++++++++
 3 files changed, 320 insertions(+), 13 deletions(-)
 create mode 100644 host/lib/transport/udp_wsa_zero_copy.cpp

(limited to 'host')

diff --git a/host/docs/transport.rst b/host/docs/transport.rst
index f28d1efcb..b601cd8ff 100644
--- a/host/docs/transport.rst
+++ b/host/docs/transport.rst
@@ -19,7 +19,10 @@ The transport parameters are defined below for the various transports in the UHD
 ------------------------------------------------------------------------
 UDP transport (sockets)
 ------------------------------------------------------------------------
-The UDP transport is implemented with standard user-space/Berkeley sockets.
+The UDP transport is implemented with user-space sockets:
+
+* **UNIX:** standard Berkeley sockets API using send()/recv()
+* **Windows:** Windows Sockets API (WSA) using overlapped IO
 
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Transport parameters
@@ -31,21 +34,18 @@ The following parameters can be used to alter the transport's default behavior:
 * **send_frame_size:** The size of a single send buffer in bytes
 * **num_send_frames:** The number of send buffers to allocate
 
-**Note1:** num_recv_frames and num_send_frames do not affect performance.
+**Note1:**
+num_recv_frames does not affect performance (all platforms).
+
+**Note2:**
+num_send_frames does not affect performance (UNIX only).
 
-**Note2:** recv_frame_size and send_frame_size can be used to
+**Note3:**
+recv_frame_size and send_frame_size can be used to
 increase or decrease the maximum number of samples per packet.
 The frame sizes default to an MTU of 1472 bytes per IP/UDP packet,
 and may be increased if permitted by your network hardware.
 
-**Note3:** For lower latency at low sample rates, use smaller buffers
-
-**Note4:** For overall lower latency, look for Interrupt Coalescing settings
-for your OS and ethernet chipset.  It seems the Intel ethernet chipsets offer
-fine-grained control in Linux.  Also, consult:
-
-* http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.prftungd/doc/prftungd/interrupt_coal.htm
-
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Flow control parameters
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -72,6 +72,25 @@ The following parameters can be used to alter socket's buffer sizes:
 
 **Note:** Large send buffers tend to decrease transmit performance.
 
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Latency Optimization
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Latency is a measurement of the time it takes a sample to travel between the host and device.
+Most computer hardware and software is bandwidth optimized which may negatively affect latency.
+If your application has strict latency requirements, please consider the following notes:
+
+**Note1:**
+The time taken by the device to populate a packet is proportional to the sample rate.
+Therefore, to improve receive latency, configure the transport for a smaller frame size.
+
+**Note2:**
+For overall latency improvements,
+look for "Interrupt Coalescing" settings for your OS and ethernet chipset.
+It seems the Intel ethernet chipsets offer fine-grained control in Linux.
+Also, consult:
+
+* http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.prftungd/doc/prftungd/interrupt_coal.htm
+
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Linux specific notes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -90,7 +109,6 @@ Windows specific notes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 On Windows, it is important to change the default UDP behavior such that
 1500 byte packets still travel through the fast path of the sockets stack.
-
 FastSendDatagramThreshold registry key to change documented here:
 
 * http://www.microsoft.com/windows/windowsmedia/howto/articles/optimize_web.aspx#appendix_e
diff --git a/host/lib/transport/CMakeLists.txt b/host/lib/transport/CMakeLists.txt
index b1821956c..866ade75f 100644
--- a/host/lib/transport/CMakeLists.txt
+++ b/host/lib/transport/CMakeLists.txt
@@ -79,6 +79,15 @@ SET_SOURCE_FILES_PROPERTIES(
     PROPERTIES COMPILE_DEFINITIONS "${IF_ADDRS_DEFS}"
 )
 
+########################################################################
+# Setup UDP
+########################################################################
+IF(WIN32)
+    LIBUHD_APPEND_SOURCES(${CMAKE_CURRENT_SOURCE_DIR}/udp_wsa_zero_copy.cpp)
+ELSE()
+    LIBUHD_APPEND_SOURCES(${CMAKE_CURRENT_SOURCE_DIR}/udp_zero_copy.cpp)
+ENDIF()
+
 #On windows, the boost asio implementation uses the winsock2 library.
 #Note: we exclude the .lib extension for cygwin and mingw platforms.
 IF(WIN32)
@@ -97,6 +106,5 @@ LIBUHD_APPEND_SOURCES(
     ${CMAKE_CURRENT_SOURCE_DIR}/buffer_pool.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/if_addrs.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/udp_simple.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/udp_zero_copy.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/usb_zero_copy_wrapper.cpp
 )
diff --git a/host/lib/transport/udp_wsa_zero_copy.cpp b/host/lib/transport/udp_wsa_zero_copy.cpp
new file mode 100644
index 000000000..ccfed38ea
--- /dev/null
+++ b/host/lib/transport/udp_wsa_zero_copy.cpp
@@ -0,0 +1,281 @@
+//
+// Copyright 2010-2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "udp_common.hpp"
+#include <uhd/transport/udp_zero_copy.hpp>
+#include <uhd/transport/udp_simple.hpp> //mtu
+#include <uhd/transport/bounded_buffer.hpp>
+#include <uhd/transport/buffer_pool.hpp>
+#include <uhd/utils/msg.hpp>
+#include <uhd/utils/log.hpp>
+#include <boost/format.hpp>
+#include <vector>
+
+using namespace uhd;
+using namespace uhd::transport;
+namespace asio = boost::asio;
+
+//A reasonable number of frames for send/recv and async/sync
+static const size_t DEFAULT_NUM_FRAMES = 32;
+
+/***********************************************************************
+ * Static initialization to take care of WSA init and cleanup
+ **********************************************************************/
+struct uhd_wsa_control{
+    uhd_wsa_control(void){
+        WSADATA wsaData;
+        WSAStartup(MAKEWORD(2, 2), &wsaData); /*windows socket startup */
+    }
+
+    ~uhd_wsa_control(void){
+        WSACleanup();
+    }
+};
+
+/***********************************************************************
+ * Reusable managed receiver buffer:
+ *  - Initialize with memory and a release callback.
+ *  - Call get new with a length in bytes to re-use.
+ **********************************************************************/
+class udp_zero_copy_asio_mrb : public managed_recv_buffer{
+public:
+    udp_zero_copy_asio_mrb(void *mem, bounded_buffer<udp_zero_copy_asio_mrb *> &pending):
+        _mem(mem), _len(0), _pending(pending){/* NOP */}
+
+    void release(void){
+        if (_len == 0) return;
+        _pending.push_with_haste(this);
+        _len = 0;
+    }
+
+    sptr get_new(size_t len){
+        _len = len;
+        return make_managed_buffer(this);
+    }
+
+    template <class T> T cast(void) const{return static_cast<T>(_mem);}
+
+private:
+    const void *get_buff(void) const{return _mem;}
+    size_t get_size(void) const{return _len;}
+
+    void *_mem;
+    size_t _len;
+    bounded_buffer<udp_zero_copy_asio_mrb *> &_pending;
+};
+
+/***********************************************************************
+ * Reusable managed send buffer:
+ *  - committing the buffer calls the asynchronous socket send
+ *  - getting a new buffer performs the blocking wait for completion
+ **********************************************************************/
+class udp_zero_copy_asio_msb : public managed_send_buffer{
+public:
+    udp_zero_copy_asio_msb(void *mem, int sock_fd, const size_t frame_size):
+        _sock_fd(sock_fd), _frame_size(frame_size), _committed(false)
+    {
+        _wsa_buff.buf = reinterpret_cast<char *>(mem);
+        ZeroMemory(&_overlapped, sizeof(_overlapped));
+        _overlapped.hEvent = WSACreateEvent();
+        UHD_ASSERT_THROW(_overlapped.hEvent != WSA_INVALID_EVENT);
+        this->commit(0); //makes buffer available via get_new
+    }
+
+    ~udp_zero_copy_asio_msb(void){
+        WSACloseEvent(_overlapped.hEvent);
+    }
+
+    UHD_INLINE void commit(size_t len){
+        if (_committed) return;
+        _committed = true;
+        _wsa_buff.len = len;
+        if (len == 0) WSASetEvent(_overlapped.hEvent);
+        else WSASend(_sock_fd, &_wsa_buff, 1, NULL, 0, &_overlapped, NULL);
+    }
+
+    UHD_INLINE sptr get_new(const double timeout, size_t &index){
+        const DWORD result = WSAWaitForMultipleEvents(
+            1, &_overlapped.hEvent, true, DWORD(timeout*1000), true
+        );
+        if (result == WSA_WAIT_TIMEOUT) return managed_send_buffer::sptr();
+        index++; //advances the caller's buffer
+
+        WSAResetEvent(_overlapped.hEvent);
+        _committed = false;
+        _wsa_buff.len = _frame_size;
+        return make_managed_buffer(this);
+    }
+
+private:
+    void *get_buff(void) const{return _wsa_buff.buf;}
+    size_t get_size(void) const{return _wsa_buff.len;}
+
+    int _sock_fd;
+    const size_t _frame_size;
+    bool _committed;
+    WSAOVERLAPPED _overlapped;
+    WSABUF _wsa_buff;
+};
+
+/***********************************************************************
+ * Zero Copy UDP implementation with WSA:
+ *
+ *   This is not a true zero copy implementation as each
+ *   send and recv requires a copy operation to/from userspace.
+ *
+ *   For receive, use a blocking recv() call on the socket.
+ *   This has better performance than the overlapped IO.
+ *   For send, use overlapped IO to submit async sends.
+ **********************************************************************/
+class udp_zero_copy_wsa_impl : public udp_zero_copy{
+public:
+    typedef boost::shared_ptr<udp_zero_copy_wsa_impl> sptr;
+
+    udp_zero_copy_wsa_impl(
+        const std::string &addr,
+        const std::string &port,
+        const device_addr_t &hints
+    ):
+        _recv_frame_size(size_t(hints.cast<double>("recv_frame_size", udp_simple::mtu))),
+        _num_recv_frames(size_t(hints.cast<double>("num_recv_frames", DEFAULT_NUM_FRAMES))),
+        _send_frame_size(size_t(hints.cast<double>("send_frame_size", udp_simple::mtu))),
+        _num_send_frames(size_t(hints.cast<double>("num_send_frames", DEFAULT_NUM_FRAMES))),
+        _recv_buffer_pool(buffer_pool::make(_num_recv_frames, _recv_frame_size)),
+        _send_buffer_pool(buffer_pool::make(_num_send_frames, _send_frame_size)),
+        _pending_recv_buffs(_num_recv_frames),
+        _next_send_buff_index(0)
+    {
+        UHD_MSG(status) << boost::format("Creating WSA UDP transport for %s:%s") % addr % port << std::endl;
+        static uhd_wsa_control uhd_wsa; //makes wsa start happen via lazy initialization
+
+        UHD_ASSERT_THROW(_num_send_frames <= WSA_MAXIMUM_WAIT_EVENTS);
+
+        //resolve the address
+        asio::io_service io_service;
+        asio::ip::udp::resolver resolver(io_service);
+        asio::ip::udp::resolver::query query(asio::ip::udp::v4(), addr, port);
+        asio::ip::udp::endpoint receiver_endpoint = *resolver.resolve(query);
+
+        //create the socket
+        _sock_fd = WSASocket(AF_INET, SOCK_DGRAM, IPPROTO_UDP, NULL, 0, WSA_FLAG_OVERLAPPED);
+        if (_sock_fd == INVALID_SOCKET){
+            const DWORD error = WSAGetLastError();
+            throw uhd::os_error(str(boost::format("WSASocket() failed with error %d") % error));
+        }
+
+        //set the socket non-blocking for recv
+        u_long mode = 1;
+        ioctlsocket(_sock_fd, FIONBIO, &mode);
+
+        //resize the socket buffers
+        const int recv_buff_size = int(hints.cast<double>("recv_buff_size", 0.0));
+        const int send_buff_size = int(hints.cast<double>("send_buff_size", 0.0));
+        if (recv_buff_size > 0) setsockopt(_sock_fd, SOL_SOCKET, SO_RCVBUF, (const char *)&recv_buff_size, sizeof(recv_buff_size));
+        if (send_buff_size > 0) setsockopt(_sock_fd, SOL_SOCKET, SO_SNDBUF, (const char *)&send_buff_size, sizeof(send_buff_size));
+
+        //connect the socket so we can send/recv
+        const asio::ip::udp::endpoint::data_type &servaddr = *receiver_endpoint.data();
+        if (WSAConnect(_sock_fd, (const struct sockaddr *)&servaddr, sizeof(servaddr), NULL, NULL, NULL, NULL) != 0){
+            const DWORD error = WSAGetLastError();
+            closesocket(_sock_fd);
+            throw uhd::os_error(str(boost::format("WSAConnect() failed with error %d") % error));
+        }
+
+        //allocate re-usable managed receive buffers
+        for (size_t i = 0; i < get_num_recv_frames(); i++){
+            _mrb_pool.push_back(boost::shared_ptr<udp_zero_copy_asio_mrb>(
+                new udp_zero_copy_asio_mrb(_recv_buffer_pool->at(i), _pending_recv_buffs)
+            ));
+            _pending_recv_buffs.push_with_haste(_mrb_pool.back().get());
+        }
+
+        //allocate re-usable managed send buffers
+        for (size_t i = 0; i < get_num_send_frames(); i++){
+            _msb_pool.push_back(boost::shared_ptr<udp_zero_copy_asio_msb>(
+                new udp_zero_copy_asio_msb(_send_buffer_pool->at(i), _sock_fd, get_send_frame_size())
+            ));
+        }
+    }
+
+    ~udp_zero_copy_wsa_impl(void){
+        closesocket(_sock_fd);
+    }
+
+    /*******************************************************************
+     * Receive implementation:
+     *
+     * Perform a non-blocking receive for performance,
+     * and then fall back to a blocking receive with timeout.
+     * Return the managed receive buffer with the new length.
+     * When the caller is finished with the managed buffer,
+     * the managed receive buffer is released back into the queue.
+     ******************************************************************/
+    managed_recv_buffer::sptr get_recv_buff(double timeout){
+        udp_zero_copy_asio_mrb *mrb = NULL;
+        if (_pending_recv_buffs.pop_with_timed_wait(mrb, timeout)){
+
+            ssize_t ret = ::recv(_sock_fd, mrb->cast<char *>(), _recv_frame_size, 0);
+            if (ret > 0) return mrb->get_new(ret);
+
+            if (wait_for_recv_ready(_sock_fd, timeout)) return mrb->get_new(
+                ::recv(_sock_fd, mrb->cast<char *>(), _recv_frame_size, 0)
+            );
+
+            _pending_recv_buffs.push_with_haste(mrb); //timeout: return the managed buffer to the queue
+        }
+        return managed_recv_buffer::sptr();
+    }
+
+    size_t get_num_recv_frames(void) const {return _num_recv_frames;}
+    size_t get_recv_frame_size(void) const {return _recv_frame_size;}
+
+    /*******************************************************************
+     * Send implementation:
+     * Block on the managed buffer's get call and advance the index.
+     ******************************************************************/
+    managed_send_buffer::sptr get_send_buff(double timeout){
+        if (_next_send_buff_index == _num_send_frames) _next_send_buff_index = 0;
+        return _msb_pool[_next_send_buff_index]->get_new(timeout, _next_send_buff_index);
+    }
+
+    size_t get_num_send_frames(void) const {return _num_send_frames;}
+    size_t get_send_frame_size(void) const {return _send_frame_size;}
+
+private:
+    //memory management -> buffers and fifos
+    const size_t _recv_frame_size, _num_recv_frames;
+    const size_t _send_frame_size, _num_send_frames;
+    buffer_pool::sptr _recv_buffer_pool, _send_buffer_pool;
+    std::vector<boost::shared_ptr<udp_zero_copy_asio_msb> > _msb_pool;
+    std::vector<boost::shared_ptr<udp_zero_copy_asio_mrb> > _mrb_pool;
+    bounded_buffer<udp_zero_copy_asio_mrb *> _pending_recv_buffs;
+    size_t _next_send_buff_index;
+
+    //socket guts
+    SOCKET                  _sock_fd;
+};
+
+/***********************************************************************
+ * UDP zero copy make function
+ **********************************************************************/
+udp_zero_copy::sptr udp_zero_copy::make(
+    const std::string &addr,
+    const std::string &port,
+    const device_addr_t &hints
+){
+    return sptr(new udp_zero_copy_wsa_impl(addr, port, hints));
+}
-- 
cgit v1.2.3