6 files changed, 141 insertions, 232 deletions
diff --git a/host/lib/convert/CMakeLists.txt b/host/lib/convert/CMakeLists.txt
index 5204c29ea..024c2260b 100644
--- a/host/lib/convert/CMakeLists.txt
+++ b/host/lib/convert/CMakeLists.txt
@@ -22,40 +22,6 @@ INCLUDE(CheckIncludeFileCXX)
 MESSAGE(STATUS "")
 
 ########################################################################
-# Look for Orc support
-########################################################################
-FIND_PACKAGE(ORC)
-
-IF(NOT ORCC_EXECUTABLE)
-    FIND_PROGRAM(ORCC_EXECUTABLE orcc)
-ENDIF()
-
-LIBUHD_REGISTER_COMPONENT("ORC" ENABLE_ORC ON "ENABLE_LIBUHD;ORC_FOUND;ORCC_EXECUTABLE" OFF)
-
-IF(ENABLE_ORC)
-    INCLUDE_DIRECTORIES(${ORC_INCLUDE_DIRS})
-    LINK_DIRECTORIES(${ORC_LIBRARY_DIRS})
-    ENABLE_LANGUAGE(C)
-
-    SET(orcc_src ${CMAKE_CURRENT_SOURCE_DIR}/convert_orc.orc)
-
-    GET_FILENAME_COMPONENT(orc_file_name_we ${orcc_src} NAME_WE)
-    SET(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c)
-    MESSAGE(STATUS "Orc found, enabling Orc support.")
-    ADD_CUSTOM_COMMAND(
-        COMMAND ${ORCC_EXECUTABLE} --implementation -o ${orcc_gen} ${orcc_src}
-        DEPENDS ${orcc_src} OUTPUT ${orcc_gen}
-    )
-    LIBUHD_APPEND_SOURCES(${orcc_gen})
-    LIBUHD_APPEND_SOURCES(
-        ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_orc.cpp
-    )
-    LIBUHD_APPEND_LIBS(${ORC_LIBRARIES})
-ELSE(ENABLE_ORC)
-    MESSAGE(STATUS "Orc not found, disabling orc support.")
-ENDIF(ENABLE_ORC)
-
-########################################################################
 # Check for SSE2 SIMD headers
 ########################################################################
 IF(CMAKE_COMPILER_IS_GNUCXX)
diff --git a/host/lib/convert/convert_common.hpp b/host/lib/convert/convert_common.hpp
index 6c2ea9fec..6e73e9436 100644
--- a/host/lib/convert/convert_common.hpp
+++ b/host/lib/convert/convert_common.hpp
@@ -65,11 +65,10 @@ static const int PRIORITY_GENERAL = 0;
 static const int PRIORITY_EMPTY = -1;
 
 #ifdef __ARM_NEON__
-static const int PRIORITY_LIBORC = 3;
-static const int PRIORITY_SIMD = 2; //neon conversions could be implemented better, orc wins
+static const int PRIORITY_SIMD = 2;
 static const int PRIORITY_TABLE = 1; //tables require large cache, so they are slower on arm
 #else
-static const int PRIORITY_LIBORC = 2;
+// We used to have ORC, too, so SIMD is 3
 static const int PRIORITY_SIMD = 3;
 static const int PRIORITY_TABLE = 1;
 #endif
@@ -87,6 +86,7 @@ typedef float                        f32_t;
 typedef boost::int32_t               s32_t;
 typedef boost::int16_t               s16_t;
 typedef boost::int8_t                s8_t;
+typedef boost::uint8_t               u8_t;
 
 typedef boost::uint32_t              item32_t;
 
diff --git a/host/lib/convert/convert_impl.cpp b/host/lib/convert/convert_impl.cpp
index 329e94a4d..d90bb9c94 100644
--- a/host/lib/convert/convert_impl.cpp
+++ b/host/lib/convert/convert_impl.cpp
@@ -43,10 +43,10 @@ bool convert::operator==(const convert::id_type &lhs, const convert::id_type &rh
 std::string convert::id_type::to_pp_string(void) const{
     return str(boost::format(
         "conversion ID\n"
-        "  Input format: %s\n"
-        "  Num inputs: %d\n"
+        "  Input format:  %s\n"
+        "  Num inputs:    %d\n"
         "  Output format: %s\n"
-        "  Num outputs: %d\n"
+        "  Num outputs:   %d\n"
     )
         % this->input_format
         % this->num_inputs
@@ -55,6 +55,15 @@ std::string convert::id_type::to_pp_string(void) const{
     );
 }
 
+std::string convert::id_type::to_string(void) const{
+    return str(boost::format("%s (%d) -> %s (%d)")
+        % this->input_format
+        % this->num_inputs
+        % this->output_format
+        % this->num_outputs
+    );
+}
+
 /***********************************************************************
  * Setup the table registry
  **********************************************************************/
@@ -92,7 +101,15 @@ convert::function_type convert::get_converter(
     //find a matching priority
     priority_type best_prio = -1;
     BOOST_FOREACH(priority_type prio_i, get_table()[id].keys()){
-        if (prio_i == prio) return get_table()[id][prio];
+        if (prio_i == prio) {
+            //----------------------------------------------------------------//
+            UHD_LOGV(always) << "get_converter: For converter ID: " << id.to_pp_string() << std::endl
+                << "Using prio: " << prio << std::endl
+                << std::endl
+            ;
+            //----------------------------------------------------------------//
+            return get_table()[id][prio];
+        }
         best_prio = std::max(best_prio, prio_i);
     }
 
@@ -100,6 +117,13 @@ convert::function_type convert::get_converter(
     if (prio != -1) throw uhd::key_error(
         "Cannot find a conversion routine [with prio] for " + id.to_pp_string());
 
+    //----------------------------------------------------------------//
+    UHD_LOGV(always) << "get_converter: For converter ID: " << id.to_pp_string() << std::endl
+        << "Using prio: " << best_prio << std::endl
+        << std::endl
+    ;
+    //----------------------------------------------------------------//
+
     //otherwise, return best prio
     return get_table()[id][best_prio];
 }
@@ -148,6 +172,7 @@ UHD_STATIC_BLOCK(convert_register_item_sizes){
     convert::register_bytes_per_item("s32", sizeof(boost::int32_t));
     convert::register_bytes_per_item("s16", sizeof(boost::int16_t));
     convert::register_bytes_per_item("s8", sizeof(boost::int8_t));
+    convert::register_bytes_per_item("u8", sizeof(boost::uint8_t));
 
     //register VITA types
     convert::register_bytes_per_item("item32", sizeof(boost::int32_t));
diff --git a/host/lib/convert/convert_orc.orc b/host/lib/convert/convert_orc.orc
deleted file mode 100644
index ffb298f26..000000000
--- a/host/lib/convert/convert_orc.orc
+++ /dev/null
@@ -1,79 +0,0 @@
-.function _convert_fc32_1_to_item32_1_nswap_orc
-.source 8 src
-.dest 4 dst
-.floatparam 4 scalar
-.temp 8 scaled
-.temp 8 converted
-.temp 4 short
-x2 mulf scaled, src, scalar
-x2 convfl converted, scaled
-x2 convlw short, converted
-swapl short, short
-x2 swapw dst, short
-
-.function _convert_fc32_1_to_item32_1_bswap_orc
-.source 8 src
-.dest 4 dst
-.floatparam 4 scalar
-.temp 8 scaled
-.temp 8 converted
-.temp 4 short
-x2 mulf scaled, src, scalar
-x2 convfl converted, scaled
-x2 convlw short, converted
-x2 swapw dst, short
-
-.function _convert_item32_1_to_fc32_1_nswap_orc
-.source 4 src
-.dest 8 dst
-.floatparam 4 scalar
-.temp 4 tmp1
-.temp 8 tmp2
-x2 swapw tmp1, src
-swapl tmp1, tmp1
-x2 convswl tmp2, tmp1
-x2 convlf tmp2, tmp2
-x2 mulf dst, tmp2, scalar
-
-.function _convert_item32_1_to_fc32_1_bswap_orc
-.source 4 src
-.dest 8 dst
-.floatparam 4 scalar
-.temp 4 tmp1
-.temp 8 tmp2
-x2 swapw tmp1, src
-x2 convswl tmp2, tmp1
-x2 convlf tmp2, tmp2
-x2 mulf dst, tmp2, scalar
-
-.function _convert_sc16_1_to_item32_1_nswap_orc
-.source 4 src
-.dest 4 dst
-.temp 4 tmp
-.floatparam 4 scalar
-swapl tmp, src
-x2 swapw dst, tmp
-
-.function _convert_item32_1_to_sc16_1_nswap_orc
-.source 4 src
-.dest 4 dst
-.floatparam 4 scalar
-.temp 4 tmp
-x2 swapw tmp, src
-swapl dst, tmp
-
-.function _convert_swap_byte_pairs_orc
-.source 4 src
-.dest 4 dst
-swapl dst, src
-
-.function _convert_fc32_1_to_sc8_1_nswap_orc
-.source 8 src
-.dest 2 dst
-.temp 8 tmp
-.temp 4 tmp2
-.floatparam 4 scalar
-x2 mulf tmp, src, scalar
-x2 convfl tmp, tmp
-x2 convlw tmp2, tmp
-x2 convwb dst, tmp2
diff --git a/host/lib/convert/convert_with_orc.cpp b/host/lib/convert/convert_with_orc.cpp
deleted file mode 100644
index 19755fa44..000000000
--- a/host/lib/convert/convert_with_orc.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-//
-// Copyright 2011-2013 Ettus Research LLC
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program.  If not, see <http://www.gnu.org/licenses/>.
-//
-
-#include "convert_common.hpp"
-#include <uhd/utils/byteswap.hpp>
-
-using namespace uhd::convert;
-
-extern "C" {
-extern void _convert_fc32_1_to_item32_1_nswap_orc(void *, const void *, float, int);
-extern void _convert_fc32_1_to_item32_1_bswap_orc(void *, const void *, float, int);
-extern void _convert_item32_1_to_fc32_1_nswap_orc(void *, const void *, float, int);
-extern void _convert_item32_1_to_fc32_1_bswap_orc(void *, const void *, float, int);
-extern void _convert_sc16_1_to_item32_1_nswap_orc(void *, const void *, float, int);
-extern void _convert_item32_1_to_sc16_1_nswap_orc(void *, const void *, float, int);
-extern void _convert_fc32_1_to_sc8_1_nswap_orc(void *, const void *, float, int);
-extern void _convert_swap_byte_pairs_orc(void *, const void *, int);
-}
-
-DECLARE_CONVERTER(fc32, 1, sc16_item32_le, 1, PRIORITY_LIBORC){
-    _convert_fc32_1_to_item32_1_nswap_orc(outputs[0], inputs[0], scale_factor, nsamps);
-}
-
-DECLARE_CONVERTER(fc32, 1, sc16_item32_be, 1, PRIORITY_LIBORC){
-    _convert_fc32_1_to_item32_1_bswap_orc(outputs[0], inputs[0], scale_factor, nsamps);
-}
-
-DECLARE_CONVERTER(sc16_item32_le, 1, fc32, 1, PRIORITY_LIBORC){
-    _convert_item32_1_to_fc32_1_nswap_orc(outputs[0], inputs[0], scale_factor, nsamps);
-}
-
-DECLARE_CONVERTER(sc16_item32_be, 1, fc32, 1, PRIORITY_LIBORC){
-    _convert_item32_1_to_fc32_1_bswap_orc(outputs[0], inputs[0], scale_factor, nsamps);
-}
-
-DECLARE_CONVERTER(sc16, 1, sc16_item32_le, 1, PRIORITY_LIBORC){
-    _convert_sc16_1_to_item32_1_nswap_orc(outputs[0], inputs[0], scale_factor, nsamps);
-}
-
-DECLARE_CONVERTER(sc16_item32_le, 1, sc16, 1, PRIORITY_LIBORC){
-    _convert_item32_1_to_sc16_1_nswap_orc(outputs[0], inputs[0], scale_factor, nsamps);
-}
-
-DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_LIBORC){
-    _convert_fc32_1_to_sc8_1_nswap_orc(outputs[0], inputs[0], scale_factor, nsamps);
-}
-
-DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_LIBORC){
-    _convert_fc32_1_to_sc8_1_nswap_orc(outputs[0], inputs[0], scale_factor, nsamps);
-    _convert_swap_byte_pairs_orc(outputs[0], outputs[0], (nsamps + 1)/2);
-}
diff --git a/host/lib/convert/gen_convert_general.py b/host/lib/convert/gen_convert_general.py
index b0790755a..4f9eeb747 100644
--- a/host/lib/convert/gen_convert_general.py
+++ b/host/lib/convert/gen_convert_general.py
@@ -17,92 +17,147 @@
 #
 
 TMPL_HEADER = """
-#import time
+<%
+    import time
+%>
 /***********************************************************************
- * This file was generated by $file on $time.strftime("%c")
+ * This file was generated by ${file} on ${time.strftime("%c")}
  **********************************************************************/
 
-\#include "convert_common.hpp"
-\#include <uhd/utils/byteswap.hpp>
+#include "convert_common.hpp"
+#include <uhd/utils/byteswap.hpp>
 
 using namespace uhd::convert;
+
+
+// item32 -> item32: Just a memcpy. No scaling possible.
+DECLARE_CONVERTER(item32, 1, item32, 1, PRIORITY_GENERAL) {
+    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+    memcpy(output, input, nsamps * sizeof(item32_t));
+}
 """
 
 TMPL_CONV_GEN2_ITEM32 = """
-DECLARE_CONVERTER(item32, 1, sc16_item32_$(end), 1, PRIORITY_GENERAL){
+DECLARE_CONVERTER(item32, 1, sc16_item32_{end}, 1, PRIORITY_GENERAL) {{
     const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
     item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
 
-    for (size_t i = 0; i < nsamps; i++){
-        output[i] = $(to_wire)(input[i]);
-    }
-}
+    for (size_t i = 0; i < nsamps; i++) {{
+        output[i] = {to_wire}(input[i]);
+    }}
+}}
 
-DECLARE_CONVERTER(sc16_item32_$(end), 1, item32, 1, PRIORITY_GENERAL){
+DECLARE_CONVERTER(sc16_item32_{end}, 1, item32, 1, PRIORITY_GENERAL) {{
     const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
     item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
 
-    for (size_t i = 0; i < nsamps; i++){
-        output[i] = $(to_host)(input[i]);
-    }
-}
+    for (size_t i = 0; i < nsamps; i++) {{
+        output[i] = {to_host}(input[i]);
+    }}
+}}
+"""
+
+TMPL_CONV_U8 = """
+DECLARE_CONVERTER(u8, 1, u8_item32_{end}, 1, PRIORITY_GENERAL) {{
+    const boost::uint32_t *input = reinterpret_cast<const boost::uint32_t *>(inputs[0]);
+    boost::uint32_t *output = reinterpret_cast<boost::uint32_t *>(outputs[0]);
+
+    // 1) Copy all the 4-byte tuples
+    size_t n_words = nsamps / 4;
+    for (size_t i = 0; i < n_words; i++) {{
+        output[i] = {to_wire}(input[i]);
+    }}
+    // 2) If nsamps was not a multiple of 4, copy the rest by hand
+    size_t bytes_left = nsamps % 4;
+    if (bytes_left) {{
+        const u8_t *last_input_word  = reinterpret_cast<const u8_t *>(&input[n_words]);
+        u8_t *last_output_word = reinterpret_cast<u8_t *>(&output[n_words]);
+        for (size_t k = 0; k < bytes_left; k++) {{
+            last_output_word[k] = last_input_word[k];
+        }}
+        output[n_words] = {to_wire}(output[n_words]);
+    }}
+}}
+
+DECLARE_CONVERTER(u8_item32_{end}, 1, u8, 1, PRIORITY_GENERAL) {{
+    const boost::uint32_t *input = reinterpret_cast<const boost::uint32_t *>(inputs[0]);
+    boost::uint32_t *output = reinterpret_cast<boost::uint32_t *>(outputs[0]);
+
+    // 1) Copy all the 4-byte tuples
+    size_t n_words = nsamps / 4;
+    for (size_t i = 0; i < n_words; i++) {{
+        output[i] = {to_host}(input[i]);
+    }}
+    // 2) If nsamps was not a multiple of 4, copy the rest by hand
+    size_t bytes_left = nsamps % 4;
+    if (bytes_left) {{
+        boost::uint32_t last_input_word = {to_host}(input[n_words]);
+        const u8_t *last_input_word_ptr = reinterpret_cast<const u8_t *>(&last_input_word);
+        u8_t *last_output_word = reinterpret_cast<u8_t *>(&output[n_words]);
+        for (size_t k = 0; k < bytes_left; k++) {{
+            last_output_word[k] = last_input_word_ptr[k];
+        }}
+    }}
+}}
 """
 
 TMPL_CONV_USRP1_COMPLEX = """
-DECLARE_CONVERTER($(cpu_type), $(width), sc16_item16_usrp1, 1, PRIORITY_GENERAL){
-    #for $w in range($width)
-    const $(cpu_type)_t *input$(w) = reinterpret_cast<const $(cpu_type)_t *>(inputs[$(w)]);
-    #end for
+DECLARE_CONVERTER(${cpu_type}, ${width}, sc16_item16_usrp1, 1, PRIORITY_GENERAL){
+    % for w in range(width):
+    const ${cpu_type}_t *input${w} = reinterpret_cast<const ${cpu_type}_t *>(inputs[${w}]);
+    % endfor
     boost::uint16_t *output = reinterpret_cast<boost::uint16_t *>(outputs[0]);
 
     for (size_t i = 0, j = 0; i < nsamps; i++){
-        #for $w in range($width)
-        output[j++] = $(to_wire)(boost::uint16_t(boost::int16_t(input$(w)[i].real()$(do_scale))));
-        output[j++] = $(to_wire)(boost::uint16_t(boost::int16_t(input$(w)[i].imag()$(do_scale))));
-        #end for
+        % for w in range(width):
+        output[j++] = ${to_wire}(boost::uint16_t(boost::int16_t(input${w}[i].real()${do_scale})));
+        output[j++] = ${to_wire}(boost::uint16_t(boost::int16_t(input${w}[i].imag()${do_scale})));
+        % endfor
     }
 }
 
-DECLARE_CONVERTER(sc16_item16_usrp1, 1, $(cpu_type), $(width), PRIORITY_GENERAL){
+DECLARE_CONVERTER(sc16_item16_usrp1, 1, ${cpu_type}, ${width}, PRIORITY_GENERAL){
     const boost::uint16_t *input = reinterpret_cast<const boost::uint16_t *>(inputs[0]);
-    #for $w in range($width)
-    $(cpu_type)_t *output$(w) = reinterpret_cast<$(cpu_type)_t *>(outputs[$(w)]);
-    #end for
+    % for w in range(width):
+    ${cpu_type}_t *output${w} = reinterpret_cast<${cpu_type}_t *>(outputs[${w}]);
+    % endfor
 
     for (size_t i = 0, j = 0; i < nsamps; i++){
-        #for $w in range($width)
-        output$(w)[i] = $(cpu_type)_t(
-            boost::int16_t($(to_host)(input[j+0]))$(do_scale),
-            boost::int16_t($(to_host)(input[j+1]))$(do_scale)
+        % for w in range(width):
+        output${w}[i] = ${cpu_type}_t(
+            boost::int16_t(${to_host}(input[j+0]))${do_scale},
+            boost::int16_t(${to_host}(input[j+1]))${do_scale}
         );
         j += 2;
-        #end for
+        % endfor
     }
 }
 
-DECLARE_CONVERTER(sc8_item16_usrp1, 1, $(cpu_type), $(width), PRIORITY_GENERAL){
+DECLARE_CONVERTER(sc8_item16_usrp1, 1, ${cpu_type}, ${width}, PRIORITY_GENERAL){
     const boost::uint16_t *input = reinterpret_cast<const boost::uint16_t *>(inputs[0]);
-    #for $w in range($width)
-    $(cpu_type)_t *output$(w) = reinterpret_cast<$(cpu_type)_t *>(outputs[$(w)]);
-    #end for
+    % for w in range(width):
+    ${cpu_type}_t *output${w} = reinterpret_cast<${cpu_type}_t *>(outputs[${w}]);
+    % endfor
 
     for (size_t i = 0, j = 0; i < nsamps; i++){
-        #for $w in range($width)
+        % for w in range(width):
         {
-        const boost::uint16_t num = $(to_host)(input[j++]);
-        output$(w)[i] = $(cpu_type)_t(
-            boost::int8_t(num)$(do_scale),
-            boost::int8_t(num >> 8)$(do_scale)
+        const boost::uint16_t num = ${to_host}(input[j++]);
+        output${w}[i] = ${cpu_type}_t(
+            boost::int8_t(num)${do_scale},
+            boost::int8_t(num >> 8)${do_scale}
         );
         }
-        #end for
+        % endfor
     }
 }
 """
 
 def parse_tmpl(_tmpl_text, **kwargs):
-    from Cheetah.Template import Template
-    return str(Template(_tmpl_text, kwargs))
+    from mako.template import Template
+    return Template(_tmpl_text).render(**kwargs)
 
 if __name__ == '__main__':
     import sys, os
@@ -114,12 +169,19 @@ if __name__ == '__main__':
         ('be', 'uhd::ntohx', 'uhd::htonx'),
         ('le', 'uhd::wtohx', 'uhd::htowx'),
     ):
-        output += parse_tmpl(
-                TMPL_CONV_GEN2_ITEM32,
+        output += TMPL_CONV_GEN2_ITEM32.format(
                 end=end, to_host=to_host, to_wire=to_wire
-            )
+        )
+    #generate raw (u8) converters:
+    for end, to_host, to_wire in (
+        ('be', 'uhd::ntohx', 'uhd::htonx'),
+        ('le', 'uhd::wtohx', 'uhd::htowx'),
+    ):
+        output += TMPL_CONV_U8.format(
+                end=end, to_host=to_host, to_wire=to_wire
+        )
 
-    #generate complex converters for usrp1 format
+    #generate complex converters for usrp1 format (requires Cheetah)
     for width in 1, 2, 4:
         for cpu_type, do_scale in (
             ('fc64', '*scale_factor'),