5 files changed, 220 insertions, 30 deletions
diff --git a/host/lib/convert/CMakeLists.txt b/host/lib/convert/CMakeLists.txt
index 4cc421884..98907dc29 100644
--- a/host/lib/convert/CMakeLists.txt
+++ b/host/lib/convert/CMakeLists.txt
@@ -91,11 +91,7 @@ IF(CMAKE_COMPILER_IS_GNUCXX)
     UNSET(CMAKE_REQUIRED_FLAGS)
 ENDIF(CMAKE_COMPILER_IS_GNUCXX)
 
-IF(HAVE_ARM_NEON_H AND ENABLE_ORC)
-    #prefer orc support, its faster than the current intrinsic implementations
-    MESSAGE(STATUS "Enabled conversion support with ORC.")
-ELSEIF(HAVE_ARM_NEON_H)
-    MESSAGE(STATUS "Enabled conversion support with NEON intrinsics.")
+IF(HAVE_ARM_NEON_H)
     SET_SOURCE_FILES_PROPERTIES(
         ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_neon.cpp
         PROPERTIES COMPILE_FLAGS "${NEON_FLAGS}"
@@ -117,5 +113,6 @@ LIBUHD_PYTHON_GEN_SOURCE(
 )
 
 LIBUHD_APPEND_SOURCES(
+    ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_tables.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/convert_impl.cpp
 )
diff --git a/host/lib/convert/convert_common.hpp b/host/lib/convert/convert_common.hpp
index 34fb848c3..cc287114a 100644
--- a/host/lib/convert/convert_common.hpp
+++ b/host/lib/convert/convert_common.hpp
@@ -23,28 +23,43 @@
 #include <boost/cstdint.hpp>
 #include <complex>
 
-#define _DECLARE_CONVERTER(fcn, in_form, num_in, out_form, num_out, prio) \
-    static void fcn( \
-        const uhd::convert::input_type &inputs, \
-        const uhd::convert::output_type &outputs, \
-        const size_t nsamps, const double scale_factor \
-    ); \
-    UHD_STATIC_BLOCK(__register_##fcn##_##prio){ \
+#define _DECLARE_CONVERTER(name, in_form, num_in, out_form, num_out, prio) \
+    struct name : public uhd::convert::converter{ \
+        static sptr make(void){return sptr(new name());} \
+        double scale_factor; \
+        void set_scalar(const double s){scale_factor = s;} \
+        void operator()(const input_type&, const output_type&, const size_t); \
+    }; \
+    UHD_STATIC_BLOCK(__register_##name##_##prio){ \
         uhd::convert::id_type id; \
         id.input_format = #in_form; \
         id.num_inputs = num_in; \
         id.output_format = #out_form; \
         id.num_outputs = num_out; \
-        uhd::convert::register_converter(id, fcn, prio); \
+        uhd::convert::register_converter(id, &name::make, prio); \
     } \
-    static void fcn( \
-        const uhd::convert::input_type &inputs, \
-        const uhd::convert::output_type &outputs, \
-        const size_t nsamps, const double scale_factor \
+    void name::operator()( \
+        const input_type &inputs, const output_type &outputs, const size_t nsamps \
     )
 
 #define DECLARE_CONVERTER(in_form, num_in, out_form, num_out, prio) \
-    _DECLARE_CONVERTER(__convert_##in_form##_##num_in##_##out_form##_##num_out, in_form, num_in, out_form, num_out, prio)
+    _DECLARE_CONVERTER(__convert_##in_form##_##num_in##_##out_form##_##num_out##_##prio, in_form, num_in, out_form, num_out, prio)
+
+/***********************************************************************
+ * Setup priorities
+ **********************************************************************/
+static const int PRIORITY_GENERAL = 0;
+static const int PRIORITY_EMPTY = -1;
+
+#ifdef __ARM_NEON__
+static const int PRIORITY_LIBORC = 3;
+static const int PRIORITY_SIMD = 1; //neon conversions could be implemented better, orc wins
+static const int PRIORITY_TABLE = 2; //tables require large cache, so they are slower on arm
+#else
+static const int PRIORITY_LIBORC = 1;
+static const int PRIORITY_SIMD = 2;
+static const int PRIORITY_TABLE = 3;
+#endif
 
 /***********************************************************************
  * Typedefs
diff --git a/host/lib/convert/convert_impl.cpp b/host/lib/convert/convert_impl.cpp
index f05be6ada..12ad54486 100644
--- a/host/lib/convert/convert_impl.cpp
+++ b/host/lib/convert/convert_impl.cpp
@@ -69,8 +69,8 @@ UHD_SINGLETON_FCN(fcn_table_type, get_table);
  **********************************************************************/
 void uhd::convert::register_converter(
     const id_type &id,
-    function_type fcn,
-    priority_type prio
+    const function_type &fcn,
+    const priority_type prio
 ){
     //get a reference to the function table
     fcn_table_type &table = get_table();
diff --git a/host/lib/convert/convert_with_tables.cpp b/host/lib/convert/convert_with_tables.cpp
new file mode 100644
index 000000000..c45415d5d
--- /dev/null
+++ b/host/lib/convert/convert_with_tables.cpp
@@ -0,0 +1,188 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "convert_common.hpp"
+#include <uhd/utils/byteswap.hpp>
+#include <vector>
+
+using namespace uhd::convert;
+
+static const size_t sc16_table_len = size_t(1 << 16);
+
+typedef boost::uint16_t (*tohost16_type)(boost::uint16_t);
+
+/***********************************************************************
+ * Implementation for sc16 lookup table
+ *  - Lookup the real and imaginary parts individually
+ **********************************************************************/
+template <typename type, tohost16_type tohost, size_t re_shift, size_t im_shift>
+class convert_sc16_item32_1_to_fcxx_1 : public converter{
+public:
+    convert_sc16_item32_1_to_fcxx_1(void): _table(sc16_table_len){}
+
+    void set_scalar(const double scalar){
+        for (size_t i = 0; i < sc16_table_len; i++){
+            const boost::uint16_t val = tohost(boost::uint16_t(i & 0xffff));
+            _table[i] = type(boost::int16_t(val)*scalar);
+        }
+    }
+
+    void operator()(const input_type &inputs, const output_type &outputs, const size_t nsamps){
+        const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+        std::complex<type> *output = reinterpret_cast<std::complex<type> *>(outputs[0]);
+
+        for (size_t i = 0; i < nsamps; i++){
+            const item32_t item = input[i];
+            output[i] = std::complex<type>(
+                _table[boost::uint16_t(item >> re_shift)],
+                _table[boost::uint16_t(item >> im_shift)]
+            );
+        }
+    }
+
+private:
+    std::vector<type> _table;
+};
+
+/***********************************************************************
+ * Implementation for sc8 lookup table
+ *  - Lookup the real and imaginary parts together
+ **********************************************************************/
+template <typename type, tohost16_type tohost, size_t lo_shift, size_t hi_shift>
+class convert_sc8_item32_1_to_fcxx_1 : public converter{
+public:
+    convert_sc8_item32_1_to_fcxx_1(void): _table(sc16_table_len){}
+
+    void set_scalar(const double scalar){
+        for (size_t i = 0; i < sc16_table_len; i++){
+            const boost::uint16_t val = tohost(boost::uint16_t(i & 0xffff));
+            const type real = type(boost::int8_t(val >> 8)*scalar);
+            const type imag = type(boost::int8_t(val >> 0)*scalar);
+            _table[i] = std::complex<type>(real, imag);
+        }
+    }
+
+    void operator()(const input_type &inputs, const output_type &outputs, const size_t nsamps){
+        const item32_t *input = reinterpret_cast<const item32_t *>(size_t(inputs[0]) & ~0x3);
+        std::complex<type> *output = reinterpret_cast<std::complex<type> *>(outputs[0]);
+
+        size_t num_samps = nsamps;
+
+        if ((size_t(inputs[0]) & 0x3) != 0){
+            const item32_t item0 = *input++;
+            *output++ = _table[boost::uint16_t(item0 >> hi_shift)];
+            num_samps--;
+        }
+
+        const size_t num_pairs = num_samps/2;
+        for (size_t i = 0, j = 0; i < num_pairs; i++, j+=2){
+            const item32_t item_i = (input[i]);
+            output[j] = _table[boost::uint16_t(item_i >> lo_shift)];
+            output[j + 1] = _table[boost::uint16_t(item_i >> hi_shift)];
+        }
+
+        if (num_samps != num_pairs*2){
+            const item32_t item_n = input[num_pairs];
+            output[num_samps-1] = _table[boost::uint16_t(item_n >> lo_shift)];
+        }
+    }
+
+private:
+    std::vector<std::complex<type> > _table;
+};
+
+/***********************************************************************
+ * Factory functions and registration
+ **********************************************************************/
+
+#ifdef BOOST_BIG_ENDIAN
+#  define SHIFT_PAIR0 16, 0
+#  define SHIFT_PAIR1 0, 16
+#else
+#  define SHIFT_PAIR0 0, 16
+#  define SHIFT_PAIR1 16, 0
+#endif
+
+static converter::sptr make_convert_sc16_item32_be_1_to_fc32_1(void){
+    return converter::sptr(new convert_sc16_item32_1_to_fcxx_1<float, uhd::ntohx, SHIFT_PAIR0>());
+}
+
+static converter::sptr make_convert_sc16_item32_be_1_to_fc64_1(void){
+    return converter::sptr(new convert_sc16_item32_1_to_fcxx_1<double, uhd::ntohx, SHIFT_PAIR0>());
+}
+
+static converter::sptr make_convert_sc16_item32_le_1_to_fc32_1(void){
+    return converter::sptr(new convert_sc16_item32_1_to_fcxx_1<float, uhd::wtohx, SHIFT_PAIR1>());
+}
+
+static converter::sptr make_convert_sc16_item32_le_1_to_fc64_1(void){
+    return converter::sptr(new convert_sc16_item32_1_to_fcxx_1<double, uhd::wtohx, SHIFT_PAIR1>());
+}
+
+static converter::sptr make_convert_sc8_item32_be_1_to_fc32_1(void){
+    return converter::sptr(new convert_sc8_item32_1_to_fcxx_1<float, uhd::ntohx, SHIFT_PAIR1>());
+}
+
+static converter::sptr make_convert_sc8_item32_be_1_to_fc64_1(void){
+    return converter::sptr(new convert_sc8_item32_1_to_fcxx_1<double, uhd::ntohx, SHIFT_PAIR1>());
+}
+
+static converter::sptr make_convert_sc8_item32_le_1_to_fc32_1(void){
+    return converter::sptr(new convert_sc8_item32_1_to_fcxx_1<float, uhd::wtohx, SHIFT_PAIR0>());
+}
+
+static converter::sptr make_convert_sc8_item32_le_1_to_fc64_1(void){
+    return converter::sptr(new convert_sc8_item32_1_to_fcxx_1<double, uhd::wtohx, SHIFT_PAIR0>());
+}
+
+UHD_STATIC_BLOCK(register_convert_sc16_item32_1_to_fcxx_1){
+    uhd::convert::id_type id;
+    id.num_inputs = 1;
+    id.num_outputs = 1;
+
+    id.output_format = "fc32";
+    id.input_format = "sc16_item32_be";
+    uhd::convert::register_converter(id, &make_convert_sc16_item32_be_1_to_fc32_1, PRIORITY_TABLE);
+
+    id.output_format = "fc64";
+    id.input_format = "sc16_item32_be";
+    uhd::convert::register_converter(id, &make_convert_sc16_item32_be_1_to_fc64_1, PRIORITY_TABLE);
+
+    id.output_format = "fc32";
+    id.input_format = "sc16_item32_le";
+    uhd::convert::register_converter(id, &make_convert_sc16_item32_le_1_to_fc32_1, PRIORITY_TABLE);
+
+    id.output_format = "fc64";
+    id.input_format = "sc16_item32_le";
+    uhd::convert::register_converter(id, &make_convert_sc16_item32_le_1_to_fc64_1, PRIORITY_TABLE);
+
+    id.output_format = "fc32";
+    id.input_format = "sc8_item32_be";
+    uhd::convert::register_converter(id, &make_convert_sc8_item32_be_1_to_fc32_1, PRIORITY_TABLE);
+
+    id.output_format = "fc64";
+    id.input_format = "sc8_item32_be";
+    uhd::convert::register_converter(id, &make_convert_sc8_item32_be_1_to_fc64_1, PRIORITY_TABLE);
+
+    id.output_format = "fc32";
+    id.input_format = "sc8_item32_le";
+    uhd::convert::register_converter(id, &make_convert_sc8_item32_le_1_to_fc32_1, PRIORITY_TABLE);
+
+    id.output_format = "fc64";
+    id.input_format = "sc8_item32_le";
+    uhd::convert::register_converter(id, &make_convert_sc8_item32_le_1_to_fc64_1, PRIORITY_TABLE);
+}
diff --git a/host/lib/convert/gen_convert_general.py b/host/lib/convert/gen_convert_general.py
index 52b4212b4..a1bc7aaaf 100644
--- a/host/lib/convert/gen_convert_general.py
+++ b/host/lib/convert/gen_convert_general.py
@@ -33,8 +33,6 @@ DECLARE_CONVERTER(item32, 1, sc16_item32_$(end), 1, PRIORITY_GENERAL){
     const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
     item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
 
-    if (scale_factor == 0){} //avoids unused warning
-
     for (size_t i = 0; i < nsamps; i++){
         output[i] = $(to_wire)(input[i]);
     }
@@ -44,8 +42,6 @@ DECLARE_CONVERTER(sc16_item32_$(end), 1, item32, 1, PRIORITY_GENERAL){
     const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
     item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
 
-    if (scale_factor == 0){} //avoids unused warning
-
     for (size_t i = 0; i < nsamps; i++){
         output[i] = $(to_host)(input[i]);
     }
@@ -103,8 +99,6 @@ DECLARE_CONVERTER($(cpu_type), $(width), sc16_item16_usrp1, 1, PRIORITY_GENERAL)
     #end for
     boost::uint16_t *output = reinterpret_cast<boost::uint16_t *>(outputs[0]);
 
-    if (scale_factor == 0){} //avoids unused warning
-
     for (size_t i = 0, j = 0; i < nsamps; i++){
         #for $w in range($width)
         output[j++] = $(to_wire)(boost::int16_t(input$(w)[i].real()$(do_scale)));
@@ -119,8 +113,6 @@ DECLARE_CONVERTER(sc16_item16_usrp1, 1, $(cpu_type), $(width), PRIORITY_GENERAL)
     $(cpu_type)_t *output$(w) = reinterpret_cast<$(cpu_type)_t *>(outputs[$(w)]);
     #end for
 
-    if (scale_factor == 0){} //avoids unused warning
-
     for (size_t i = 0, j = 0; i < nsamps; i++){
         #for $w in range($width)
         output$(w)[i] = $(cpu_type)_t(
@@ -138,8 +130,6 @@ DECLARE_CONVERTER(sc8_item16_usrp1, 1, $(cpu_type), $(width), PRIORITY_GENERAL){
     $(cpu_type)_t *output$(w) = reinterpret_cast<$(cpu_type)_t *>(outputs[$(w)]);
     #end for
 
-    if (scale_factor == 0){} //avoids unused warning
-
     for (size_t i = 0, j = 0; i < nsamps; i++){
         #for $w in range($width)
         {