11 files changed, 925 insertions, 2 deletions
diff --git a/host/include/uhd/CMakeLists.txt b/host/include/uhd/CMakeLists.txt
index ad528c9fb..fee1270e9 100644
--- a/host/include/uhd/CMakeLists.txt
+++ b/host/include/uhd/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright 2010 Ettus Research LLC
+# Copyright 2010-2011 Ettus Research LLC
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@ ADD_SUBDIRECTORY(utils)
 
 INSTALL(FILES
     config.hpp
+    convert.hpp
     device.hpp
     device.ipp
     version.hpp
diff --git a/host/include/uhd/convert.hpp b/host/include/uhd/convert.hpp
new file mode 100644
index 000000000..488cba98e
--- /dev/null
+++ b/host/include/uhd/convert.hpp
@@ -0,0 +1,96 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#ifndef INCLUDED_UHD_CONVERT_HPP
+#define INCLUDED_UHD_CONVERT_HPP
+
+#include <uhd/config.hpp>
+#include <uhd/types/io_type.hpp>
+#include <uhd/types/otw_type.hpp>
+#include <boost/function.hpp>
+#include <string>
+#include <vector>
+
+namespace uhd{ namespace convert{
+
+    typedef std::vector<void *> output_type;
+    typedef std::vector<const void *> input_type;
+    typedef boost::function<void(input_type&, output_type&, size_t)> function_type;
+
+    /*!
+     * Describe the priority of a converter function.
+     * A higher priority function takes precedence.
+     * The general case function are the lowest.
+     * Next comes the liborc implementations.
+     * Custom intrinsics implementations are highest.
+     */
+    enum priority_type{
+        PRIORITY_GENERAL = 0,
+        PRIORITY_LIBORC = 1,
+        PRIORITY_CUSTOM = 2,
+        PRIORITY_EMPTY = -1,
+    };
+
+    /*!
+     * Register a converter function that converts cpu type to/from otw type.
+     * \param markup representing the signature
+     * \param fcn a pointer to the converter
+     * \param prio the function priority
+     */
+    UHD_API void register_converter(
+        const std::string &markup,
+        function_type fcn,
+        priority_type prio
+    );
+
+    /*!
+     * Convert IO samples to OWT samples:
+     *
+     * \param io_type the type of the input samples
+     * \param otw_type the type of the output samples
+     * \param input_buffs input buffers to read samples
+     * \param output_buffs output buffers to write samples
+     * \param nsamps_per_io_buff samples per IO buffer
+     */
+    UHD_API void io_type_to_otw_type(
+        const io_type_t &io_type,
+        const otw_type_t &otw_type,
+        input_type &input_buffs,
+        output_type &output_buffs,
+        size_t nsamps_per_io_buff
+    );
+
+    /*!
+     * Convert OTW samples to IO samples:
+     *
+     * \param io_type the type of the output samples
+     * \param otw_type the type of the input samples
+     * \param input_buffs input buffers to read samples
+     * \param output_buffs output buffers to write samples
+     * \param nsamps_per_io_buff samples per IO buffer
+     */
+    UHD_API void otw_type_to_io_type(
+        const io_type_t &io_type,
+        const otw_type_t &otw_type,
+        input_type &input_buffs,
+        output_type &output_buffs,
+        size_t nsamps_per_io_buff
+    );
+
+}} //namespace
+
+#endif /* INCLUDED_UHD_CONVERT_HPP */
diff --git a/host/lib/CMakeLists.txt b/host/lib/CMakeLists.txt
index 498841561..9ab121df5 100644
--- a/host/lib/CMakeLists.txt
+++ b/host/lib/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright 2010 Ettus Research LLC
+# Copyright 2010-2011 Ettus Research LLC
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -89,6 +89,7 @@ ENDMACRO(INCLUDE_SUBDIRECTORY)
 # Include subdirectories (different than add)
 ########################################################################
 INCLUDE_SUBDIRECTORY(ic_reg_maps)
+INCLUDE_SUBDIRECTORY(convert)
 INCLUDE_SUBDIRECTORY(transport)
 INCLUDE_SUBDIRECTORY(usrp)
 INCLUDE_SUBDIRECTORY(utils)
diff --git a/host/lib/convert/CMakeLists.txt b/host/lib/convert/CMakeLists.txt
new file mode 100644
index 000000000..9324a94b0
--- /dev/null
+++ b/host/lib/convert/CMakeLists.txt
@@ -0,0 +1,66 @@
+#
+# Copyright 2011 Ettus Research LLC
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+########################################################################
+# This file included, use CMake directory variables
+########################################################################
+INCLUDE(CheckIncludeFileCXX)
+MESSAGE(STATUS "")
+
+########################################################################
+# Check for SIMD headers
+########################################################################
+CHECK_INCLUDE_FILE_CXX(emmintrin.h HAVE_EMMINTRIN_H)
+IF(HAVE_EMMINTRIN_H)
+    LIBUHD_APPEND_SOURCES(
+        ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_sse2.cpp
+    )
+ENDIF(HAVE_EMMINTRIN_H)
+
+CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_ARM_NEON_H)
+IF(HAVE_ARM_NEON_H)
+    LIBUHD_APPEND_SOURCES(
+        ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_neon.cpp
+    )
+ENDIF(HAVE_ARM_NEON_H)
+
+########################################################################
+# Convert types generation
+########################################################################
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+
+LIBUHD_PYTHON_GEN_SOURCE(
+    ${CMAKE_CURRENT_SOURCE_DIR}/gen_convert_impl.py
+    ${CMAKE_CURRENT_BINARY_DIR}/convert_impl.hpp
+)
+
+INCLUDE(AddFileDependencies)
+ADD_FILE_DEPENDENCIES(
+    ${CMAKE_CURRENT_SOURCE_DIR}/convert.cpp
+    ${CMAKE_CURRENT_BINARY_DIR}/convert_impl.hpp
+)
+
+LIBUHD_PYTHON_GEN_SOURCE(
+    ${CMAKE_CURRENT_SOURCE_DIR}/gen_convert_general.py
+    ${CMAKE_CURRENT_BINARY_DIR}/convert_general.cpp
+)
+
+LIBUHD_APPEND_SOURCES(
+    ${CMAKE_CURRENT_SOURCE_DIR}/convert.cpp
+    ${CMAKE_CURRENT_BINARY_DIR}/convert_general.cpp
+)
diff --git a/host/lib/convert/convert.cpp b/host/lib/convert/convert.cpp
new file mode 100644
index 000000000..f635a1040
--- /dev/null
+++ b/host/lib/convert/convert.cpp
@@ -0,0 +1,117 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include <uhd/convert.hpp>
+#include <uhd/utils/static.hpp>
+#include <uhd/utils/exception.hpp>
+#include <iostream>
+
+using namespace uhd;
+
+#include "convert_impl.hpp"
+
+static const bool debug = false;
+
+/***********************************************************************
+ * Define types for the function tables
+ **********************************************************************/
+struct fcn_table_entry_type{
+    convert::priority_type prio;
+    convert::function_type fcn;
+    fcn_table_entry_type(void)
+    : prio(convert::PRIORITY_EMPTY), fcn(NULL){
+        /* NOP */
+    }
+};
+typedef std::vector<fcn_table_entry_type> fcn_table_type;
+
+/***********************************************************************
+ * Setup the table registry
+ **********************************************************************/
+UHD_SINGLETON_FCN(fcn_table_type, get_cpu_to_otw_table);
+UHD_SINGLETON_FCN(fcn_table_type, get_otw_to_cpu_table);
+
+fcn_table_type &get_table(dir_type dir){
+    switch(dir){
+    case DIR_OTW_TO_CPU: return get_otw_to_cpu_table();
+    case DIR_CPU_TO_OTW: return get_cpu_to_otw_table();
+    }
+    UHD_THROW_INVALID_CODE_PATH();
+}
+
+/***********************************************************************
+ * The registry functions
+ **********************************************************************/
+void uhd::convert::register_converter(
+    const std::string &markup,
+    function_type fcn,
+    priority_type prio
+){
+    //extract the predicate and direction from the markup
+    dir_type dir;
+    pred_type pred = make_pred(markup, dir);
+
+    //get a reference to the function table
+    fcn_table_type &table = get_table(dir);
+
+    //resize the table so that its at least pred+1
+    if (table.size() <= pred) table.resize(pred+1);
+
+    //register the function if higher priority
+    if (table[pred].prio < prio){
+        table[pred].fcn = fcn;
+        table[pred].prio = prio;
+    }
+
+    //----------------------------------------------------------------//
+    if (debug) std::cout << "register_converter: " << markup << std::endl
+        << "    prio: " << prio << std::endl
+        << "    pred: " << pred << std::endl
+        << "    dir: " << dir << std::endl
+        << std::endl
+    ;
+    //----------------------------------------------------------------//
+}
+
+/***********************************************************************
+ * The converter functions
+ **********************************************************************/
+void uhd::convert::io_type_to_otw_type(
+    const io_type_t &io_type,
+    const otw_type_t &otw_type,
+    input_type &input_buffs,
+    output_type &output_buffs,
+    size_t nsamps_per_io_buff
+){
+    pred_type pred = make_pred(io_type, otw_type, input_buffs.size(), output_buffs.size());
+    fcn_table_type table = get_cpu_to_otw_table();
+    function_type fcn = table.at(pred).fcn;
+    fcn(input_buffs, output_buffs, nsamps_per_io_buff);
+}
+
+void uhd::convert::otw_type_to_io_type(
+    const io_type_t &io_type,
+    const otw_type_t &otw_type,
+    input_type &input_buffs,
+    output_type &output_buffs,
+    size_t nsamps_per_io_buff
+){
+    pred_type pred = make_pred(io_type, otw_type, input_buffs.size(), output_buffs.size());
+    fcn_table_type table = get_otw_to_cpu_table();
+    function_type fcn = table.at(pred).fcn;
+    fcn(input_buffs, output_buffs, nsamps_per_io_buff);
+}
diff --git a/host/lib/convert/convert_common.hpp b/host/lib/convert/convert_common.hpp
new file mode 100644
index 000000000..1a653a56f
--- /dev/null
+++ b/host/lib/convert/convert_common.hpp
@@ -0,0 +1,90 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#ifndef INCLUDED_LIBUHD_CONVERT_COMMON_HPP
+#define INCLUDED_LIBUHD_CONVERT_COMMON_HPP
+
+#include <uhd/convert.hpp>
+#include <uhd/utils/static.hpp>
+#include <boost/cstdint.hpp>
+#include <complex>
+
+#define DECLARE_CONVERTER(fcn, prio) \
+    static void fcn( \
+        uhd::convert::input_type &inputs, \
+        uhd::convert::output_type &outputs, \
+        size_t nsamps \
+    ); \
+    UHD_STATIC_BLOCK(register_##fcn##_##prio){ \
+        uhd::convert::register_converter(#fcn, fcn, prio); \
+    } \
+    static void fcn( \
+        uhd::convert::input_type &inputs, \
+        uhd::convert::output_type &outputs, \
+        size_t nsamps \
+    )
+
+/***********************************************************************
+ * Typedefs
+ **********************************************************************/
+typedef std::complex<float>          fc32_t;
+typedef std::complex<boost::int16_t> sc16_t;
+typedef boost::uint32_t              item32_t;
+
+/***********************************************************************
+ * Convert complex short buffer to items32
+ **********************************************************************/
+static UHD_INLINE item32_t sc16_to_item32(sc16_t num){
+    boost::uint16_t real = num.real();
+    boost::uint16_t imag = num.imag();
+    return (item32_t(real) << 16) | (item32_t(imag) << 0);
+}
+
+/***********************************************************************
+ * Convert items32 buffer to complex short
+ **********************************************************************/
+static UHD_INLINE sc16_t item32_to_sc16(item32_t item){
+    return sc16_t(
+        boost::int16_t(item >> 16),
+        boost::int16_t(item >> 0)
+    );
+}
+
+/***********************************************************************
+ * Convert complex float buffer to items32 (no swap)
+ **********************************************************************/
+static const float shorts_per_float = float(32767);
+
+static UHD_INLINE item32_t fc32_to_item32(fc32_t num){
+    boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float);
+    boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float);
+    return (item32_t(real) << 16) | (item32_t(imag) << 0);
+}
+
+/***********************************************************************
+ * Convert items32 buffer to complex float
+ **********************************************************************/
+static const float floats_per_short = float(1.0/shorts_per_float);
+
+static UHD_INLINE fc32_t item32_to_fc32(item32_t item){
+    return fc32_t(
+        float(boost::int16_t(item >> 16)*floats_per_short),
+        float(boost::int16_t(item >> 0)*floats_per_short)
+    );
+}
+
+#endif /* INCLUDED_LIBUHD_CONVERT_COMMON_HPP */
diff --git a/host/lib/convert/convert_general.cpp b/host/lib/convert/convert_general.cpp
new file mode 100644
index 000000000..5e52acea2
--- /dev/null
+++ b/host/lib/convert/convert_general.cpp
@@ -0,0 +1,63 @@
+//
+// Copyright 2010 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "convert_common.hpp"
+#include <uhd/utils/byteswap.hpp>
+
+using namespace uhd::convert;
+
+/***********************************************************************
+ * Convert complex short buffer to items32
+ **********************************************************************/
+DECLARE_CONVERTER(convert_sc16_1_to_item32_1_nswap, PRIORITY_GENERAL){
+    const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]);
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    for (size_t i = 0; i < nsamps; i++){
+        output[i] = sc16_to_item32(input[i]);
+    }
+}
+
+DECLARE_CONVERTER(convert_sc16_1_to_item32_1_bswap, PRIORITY_GENERAL){
+    const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]);
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    for (size_t i = 0; i < nsamps; i++){
+        output[i] = uhd::byteswap(sc16_to_item32(input[i]));
+    }
+}
+
+/***********************************************************************
+ * Convert items32 buffer to complex short
+ **********************************************************************/
+DECLARE_CONVERTER(convert_item32_1_to_sc16_1_nswap, PRIORITY_GENERAL){
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]);
+
+    for (size_t i = 0; i < nsamps; i++){
+        output[i] = item32_to_sc16(input[i]);
+    }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_sc16_1_bswap, PRIORITY_GENERAL){
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]);
+
+    for (size_t i = 0; i < nsamps; i++){
+        output[i] = item32_to_sc16(uhd::byteswap(input[i]));
+    }
+}
diff --git a/host/lib/convert/convert_with_neon.cpp b/host/lib/convert/convert_with_neon.cpp
new file mode 100644
index 000000000..1ed841125
--- /dev/null
+++ b/host/lib/convert/convert_with_neon.cpp
@@ -0,0 +1,62 @@
+//
+// Copyright 2010-2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "convert_common.hpp"
+#include <uhd/utils/byteswap.hpp>
+#include <arm_neon.h>
+
+using namespace uhd::convert;
+
+DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){
+    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    size_t i;
+
+    float32x4_t Q0 = vdupq_n_f32(shorts_per_float);
+    for (i=0; i < (nsamps & ~0x03); i+=2) {
+        float32x4_t Q1 = vld1q_f32(reinterpret_cast<const float *>(&input[i]));
+        float32x4_t Q2 = vmulq_f32(Q1, Q0);
+        int32x4_t Q3 = vcvtq_s32_f32(Q2);
+        int16x4_t D8 = vmovn_s32(Q3);
+        int16x4_t D9 = vrev32_s16(D8);
+        vst1_s16((reinterpret_cast<int16_t *>(&output[i])), D9);
+    }
+
+    for (; i < nsamps; i++)
+        output[i] = fc32_to_item32(input[i]);
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+
+    size_t i;
+
+    float32x4_t Q1 = vdupq_n_f32(floats_per_short);
+    for (i=0; i < (nsamps & ~0x03); i+=2) {
+        int16x4_t D0 = vld1_s16(reinterpret_cast<const int16_t *>(&input[i]));
+        int16x4_t D1 = vrev32_s16(D0);
+        int32x4_t Q2 = vmovl_s16(D1);
+        float32x4_t Q3 = vcvtq_f32_s32(Q2);
+        float32x4_t Q4 = vmulq_f32(Q3, Q1);
+        vst1q_f32((reinterpret_cast<float *>(&output[i])), Q4);
+    }
+
+    for (; i < nsamps; i++)
+        output[i] = item32_to_fc32(input[i]);
+}
diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp
new file mode 100644
index 000000000..8d5a8a6a5
--- /dev/null
+++ b/host/lib/convert/convert_with_sse2.cpp
@@ -0,0 +1,148 @@
+//
+// Copyright 2010-2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "convert_common.hpp"
+#include <uhd/utils/byteswap.hpp>
+#include <emmintrin.h>
+
+using namespace uhd::convert;
+
+DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){
+    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    __m128 scalar = _mm_set_ps1(shorts_per_float);
+
+    //convert blocks of samples with intrinsics
+    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
+        //load from input
+        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
+        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
+
+        //convert and scale
+        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
+        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
+
+        //pack + swap 16-bit pairs
+        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
+        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
+        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
+
+        //store to output
+        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = fc32_to_item32(input[i]);
+    }
+}
+
+DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){
+    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    __m128 scalar = _mm_set_ps1(shorts_per_float);
+
+    //convert blocks of samples with intrinsics
+    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
+        //load from input
+        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
+        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
+
+        //convert and scale
+        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
+        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
+
+        //pack + byteswap -> byteswap 16 bit words
+        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
+        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
+
+        //store to output
+        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = uhd::byteswap(fc32_to_item32(input[i]));
+    }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+
+    __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
+    __m128i zeroi = _mm_setzero_si128();
+
+    //convert blocks of samples with intrinsics
+    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
+        //load from input
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
+
+        //unpack + swap 16-bit pairs
+        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
+        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
+        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
+        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
+
+        //convert and scale
+        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
+        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
+
+        //store to output
+        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
+        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = item32_to_fc32(input[i]);
+    }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+
+    __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
+    __m128i zeroi = _mm_setzero_si128();
+
+    //convert blocks of samples with intrinsics
+    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
+        //load from input
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
+
+        //byteswap + unpack -> byteswap 16 bit words
+        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
+        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
+        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
+
+        //convert and scale
+        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
+        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
+
+        //store to output
+        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
+        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
+    }
+
+    //convert remainder
+    for (; i < nsamps; i++){
+        output[i] = item32_to_fc32(uhd::byteswap(input[i]));
+    }
+}
diff --git a/host/lib/convert/gen_convert_general.py b/host/lib/convert/gen_convert_general.py
new file mode 100644
index 000000000..47c4cd7d0
--- /dev/null
+++ b/host/lib/convert/gen_convert_general.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+#
+# Copyright 2011 Ettus Research LLC
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+TMPL_HEADER = """
+#import time
+/***********************************************************************
+ * This file was generated by $file on $time.strftime("%c")
+ **********************************************************************/
+
+\#include "convert_common.hpp"
+\#include <uhd/utils/byteswap.hpp>
+
+using namespace uhd::convert;
+"""
+
+TMPL_CONV_TO_FROM_ITEM32_1 = """
+DECLARE_CONVERTER(convert_$(cpu_type)_1_to_item32_1_$(swap), PRIORITY_GENERAL){
+    const $(cpu_type)_t *input = reinterpret_cast<const $(cpu_type)_t *>(inputs[0]);
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    for (size_t i = 0; i < nsamps; i++){
+        output[i] = $(swap_fcn)($(cpu_type)_to_item32(input[i]));
+    }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_$(cpu_type)_1_$(swap), PRIORITY_GENERAL){
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    $(cpu_type)_t *output = reinterpret_cast<$(cpu_type)_t *>(outputs[0]);
+
+    for (size_t i = 0; i < nsamps; i++){
+        output[i] = item32_to_$(cpu_type)($(swap_fcn)(input[i]));
+    }
+}
+"""
+TMPL_CONV_TO_FROM_ITEM32_X = """
+DECLARE_CONVERTER(convert_$(cpu_type)_$(width)_to_item32_1_$(swap), PRIORITY_GENERAL){
+    #for $w in range($width)
+    const $(cpu_type)_t *input$(w) = reinterpret_cast<const $(cpu_type)_t *>(inputs[$(w)]);
+    #end for
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    for (size_t i = 0, j = 0; i < nsamps; i++){
+        #for $w in range($width)
+        output[j++] = $(swap_fcn)($(cpu_type)_to_item32(input$(w)[i]));
+        #end for
+    }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_$(cpu_type)_$(width)_$(swap), PRIORITY_GENERAL){
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    #for $w in range($width)
+    $(cpu_type)_t *output$(w) = reinterpret_cast<$(cpu_type)_t *>(outputs[$(w)]);
+    #end for
+
+    for (size_t i = 0, j = 0; i < nsamps; i++){
+        #for $w in range($width)
+        output$(w)[i] = item32_to_$(cpu_type)($(swap_fcn)(input[j++]));
+        #end for
+    }
+}
+"""
+
+def parse_tmpl(_tmpl_text, **kwargs):
+    from Cheetah.Template import Template
+    return str(Template(_tmpl_text, kwargs))
+
+if __name__ == '__main__':
+    import sys, os
+    file = os.path.basename(__file__)
+    output = parse_tmpl(TMPL_HEADER, file=file)
+    for width in 1, 2, 3, 4:
+        for swap, swap_fcn in (('nswap', ''), ('bswap', 'uhd::byteswap')):
+            for cpu_type in 'fc32', 'sc16':
+                output += parse_tmpl(
+                    TMPL_CONV_TO_FROM_ITEM32_1 if width == 1 else TMPL_CONV_TO_FROM_ITEM32_X,
+                    width=width, swap=swap, swap_fcn=swap_fcn, cpu_type=cpu_type
+                )
+    open(sys.argv[1], 'w').write(output)
diff --git a/host/lib/convert/gen_convert_impl.py b/host/lib/convert/gen_convert_impl.py
new file mode 100644
index 000000000..71095ab97
--- /dev/null
+++ b/host/lib/convert/gen_convert_impl.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+#
+# Copyright 2010-2011 Ettus Research LLC
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+TMPL_TEXT = """
+#import time
+/***********************************************************************
+ * This file was generated by $file on $time.strftime("%c")
+ **********************************************************************/
+typedef size_t pred_type;
+
+\#include <uhd/utils/algorithm.hpp>
+\#include <boost/lexical_cast.hpp>
+\#include <boost/detail/endian.hpp>
+\#include <stdexcept>
+
+enum dir_type{
+    DIR_OTW_TO_CPU = 0,
+    DIR_CPU_TO_OTW = 1
+};
+
+pred_type make_pred(const std::string &markup, dir_type &dir){
+    pred_type pred = 0;
+
+    try{
+        std::vector<std::string> tokens = std::split_string(markup, "_");
+        //token 0 is <convert>
+        std::string inp_type = tokens.at(1);
+        std::string num_inps = tokens.at(2);
+        //token 3 is <to>
+        std::string out_type = tokens.at(4);
+        std::string num_outs = tokens.at(5);
+        std::string swap_type = tokens.at(6);
+
+        std::string cpu_type, otw_type;
+        if (inp_type.find("item") == std::string::npos){
+            cpu_type = inp_type;
+            otw_type = out_type;
+            dir = DIR_CPU_TO_OTW;
+        }
+        else{
+            cpu_type = out_type;
+            otw_type = inp_type;
+            dir = DIR_OTW_TO_CPU;
+        }
+
+        if      (cpu_type == "fc32") pred |= $ph.fc32_p;
+        else if (cpu_type == "sc16") pred |= $ph.sc16_p;
+        else throw std::runtime_error("unhandled io type " + cpu_type);
+
+        if (otw_type == "item32") pred |= $ph.item32_p;
+        else throw std::runtime_error("unhandled otw type " + otw_type);
+
+        int num_inputs = boost::lexical_cast<int>(num_inps);
+        int num_outputs = boost::lexical_cast<int>(num_outs);
+
+        switch(num_inputs*num_outputs){ //FIXME treated as one value
+        case 1: pred |= $ph.chan1_p; break;
+        case 2: pred |= $ph.chan2_p; break;
+        case 3: pred |= $ph.chan3_p; break;
+        case 4: pred |= $ph.chan4_p; break;
+        default: throw std::runtime_error("unhandled number of channels");
+        }
+
+        if      (swap_type == "bswap") pred |= $ph.bswap_p;
+        else if (swap_type == "nswap") pred |= $ph.nswap_p;
+        else throw std::runtime_error("unhandled swap type");
+
+    }
+    catch(...){
+        throw std::runtime_error("convert::make_pred: could not parse markup: " + markup);
+    }
+
+    return pred;
+}
+
+UHD_INLINE pred_type make_pred(
+    const io_type_t &io_type,
+    const otw_type_t &otw_type,
+    size_t num_inputs,
+    size_t num_outputs
+){
+    pred_type pred = 0;
+
+    switch(otw_type.byteorder){
+    \#ifdef BOOST_BIG_ENDIAN
+    case otw_type_t::BO_BIG_ENDIAN:    pred |= $ph.nswap_p; break;
+    case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.bswap_p; break;
+    \#else
+    case otw_type_t::BO_BIG_ENDIAN:    pred |= $ph.bswap_p; break;
+    case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.nswap_p; break;
+    \#endif
+    case otw_type_t::BO_NATIVE:        pred |= $ph.nswap_p; break;
+    default: throw std::runtime_error("unhandled otw byteorder type");
+    }
+
+    switch(otw_type.get_sample_size()){
+    case sizeof(boost::uint32_t): pred |= $ph.item32_p; break;
+    default: throw std::runtime_error("unhandled otw sample size");
+    }
+
+    switch(io_type.tid){
+    case io_type_t::COMPLEX_FLOAT32: pred |= $ph.fc32_p; break;
+    case io_type_t::COMPLEX_INT16:   pred |= $ph.sc16_p; break;
+    default: throw std::runtime_error("unhandled io type id");
+    }
+
+    switch(num_inputs*num_outputs){ //FIXME treated as one value
+    case 1: pred |= $ph.chan1_p; break;
+    case 2: pred |= $ph.chan2_p; break;
+    case 3: pred |= $ph.chan3_p; break;
+    case 4: pred |= $ph.chan4_p; break;
+    default: throw std::runtime_error("unhandled number of channels");
+    }
+
+    return pred;
+}
+"""
+
+def parse_tmpl(_tmpl_text, **kwargs):
+    from Cheetah.Template import Template
+    return str(Template(_tmpl_text, kwargs))
+
+class ph:
+    bswap_p  = 0b00001
+    nswap_p  = 0b00000
+    item32_p = 0b00000
+    sc16_p   = 0b00010
+    fc32_p   = 0b00000
+    chan1_p  = 0b00000
+    chan2_p  = 0b00100
+    chan3_p  = 0b01000
+    chan4_p  = 0b01100
+
+    nbits = 4 #see above
+
+    @staticmethod
+    def has(pred, mask, flag): return (pred & mask) == flag
+
+    @staticmethod
+    def get_swap_type(pred):
+        mask = 0b1
+        if ph.has(pred, mask, ph.bswap_p): return 'bswap'
+        if ph.has(pred, mask, ph.nswap_p): return 'nswap'
+        raise NotImplementedError
+
+    @staticmethod
+    def get_dev_type(pred):
+        mask = 0b0
+        if ph.has(pred, mask, ph.item32_p): return 'item32'
+        raise NotImplementedError
+
+    @staticmethod
+    def get_host_type(pred):
+        mask = 0b10
+        if ph.has(pred, mask, ph.sc16_p): return 'sc16'
+        if ph.has(pred, mask, ph.fc32_p): return 'fc32'
+        raise NotImplementedError
+
+    @staticmethod
+    def get_num_chans(pred):
+        mask = 0b1100
+        if ph.has(pred, mask, ph.chan1_p): return 1
+        if ph.has(pred, mask, ph.chan2_p): return 2
+        if ph.has(pred, mask, ph.chan3_p): return 3
+        if ph.has(pred, mask, ph.chan4_p): return 4
+        raise NotImplementedError
+
+if __name__ == '__main__':
+    import sys, os
+    file = os.path.basename(__file__)
+    open(sys.argv[1], 'w').write(parse_tmpl(TMPL_TEXT, file=file, ph=ph))