summaryrefslogtreecommitdiffstats
path: root/host/lib
diff options
context:
space:
mode:
authorJosh Blum <josh@joshknows.com>2011-01-04 17:11:52 -0800
committerJosh Blum <josh@joshknows.com>2011-01-04 17:11:52 -0800
commit91790751b614b86393dd7963f1a4476d0e60ed4a (patch)
tree65792ea10b39ecb46d6cb9685ed46ab38e88933f /host/lib
parentea20cc274cdd7ea15df2347e9d1a3539bd819bed (diff)
downloaduhd-91790751b614b86393dd7963f1a4476d0e60ed4a.tar.gz
uhd-91790751b614b86393dd7963f1a4476d0e60ed4a.tar.bz2
uhd-91790751b614b86393dd7963f1a4476d0e60ed4a.zip
uhd: added new convert directory with type conversion registry (needs testing)
Diffstat (limited to 'host/lib')
-rw-r--r--host/lib/CMakeLists.txt3
-rw-r--r--host/lib/convert/CMakeLists.txt66
-rw-r--r--host/lib/convert/convert.cpp117
-rw-r--r--host/lib/convert/convert_common.hpp90
-rw-r--r--host/lib/convert/convert_general.cpp63
-rw-r--r--host/lib/convert/convert_with_neon.cpp62
-rw-r--r--host/lib/convert/convert_with_sse2.cpp148
-rw-r--r--host/lib/convert/gen_convert_general.py93
-rw-r--r--host/lib/convert/gen_convert_impl.py186
9 files changed, 827 insertions, 1 deletions
diff --git a/host/lib/CMakeLists.txt b/host/lib/CMakeLists.txt
index 498841561..9ab121df5 100644
--- a/host/lib/CMakeLists.txt
+++ b/host/lib/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# Copyright 2010 Ettus Research LLC
+# Copyright 2010-2011 Ettus Research LLC
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -89,6 +89,7 @@ ENDMACRO(INCLUDE_SUBDIRECTORY)
# Include subdirectories (different than add)
########################################################################
INCLUDE_SUBDIRECTORY(ic_reg_maps)
+INCLUDE_SUBDIRECTORY(convert)
INCLUDE_SUBDIRECTORY(transport)
INCLUDE_SUBDIRECTORY(usrp)
INCLUDE_SUBDIRECTORY(utils)
diff --git a/host/lib/convert/CMakeLists.txt b/host/lib/convert/CMakeLists.txt
new file mode 100644
index 000000000..9324a94b0
--- /dev/null
+++ b/host/lib/convert/CMakeLists.txt
@@ -0,0 +1,66 @@
+#
+# Copyright 2011 Ettus Research LLC
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+########################################################################
+# This file included, use CMake directory variables
+########################################################################
+INCLUDE(CheckIncludeFileCXX)
+MESSAGE(STATUS "")
+
+########################################################################
+# Check for SIMD headers
+########################################################################
+CHECK_INCLUDE_FILE_CXX(emmintrin.h HAVE_EMMINTRIN_H)
+IF(HAVE_EMMINTRIN_H)
+ LIBUHD_APPEND_SOURCES(
+ ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_sse2.cpp
+ )
+ENDIF(HAVE_EMMINTRIN_H)
+
+CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_ARM_NEON_H)
+IF(HAVE_ARM_NEON_H)
+ LIBUHD_APPEND_SOURCES(
+ ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_neon.cpp
+ )
+ENDIF(HAVE_ARM_NEON_H)
+
+########################################################################
+# Convert types generation
+########################################################################
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+
+LIBUHD_PYTHON_GEN_SOURCE(
+ ${CMAKE_CURRENT_SOURCE_DIR}/gen_convert_impl.py
+ ${CMAKE_CURRENT_BINARY_DIR}/convert_impl.hpp
+)
+
+INCLUDE(AddFileDependencies)
+ADD_FILE_DEPENDENCIES(
+ ${CMAKE_CURRENT_SOURCE_DIR}/convert.cpp
+ ${CMAKE_CURRENT_BINARY_DIR}/convert_impl.hpp
+)
+
+LIBUHD_PYTHON_GEN_SOURCE(
+ ${CMAKE_CURRENT_SOURCE_DIR}/gen_convert_general.py
+ ${CMAKE_CURRENT_BINARY_DIR}/convert_general.cpp
+)
+
+LIBUHD_APPEND_SOURCES(
+ ${CMAKE_CURRENT_SOURCE_DIR}/convert.cpp
+ ${CMAKE_CURRENT_BINARY_DIR}/convert_general.cpp
+)
diff --git a/host/lib/convert/convert.cpp b/host/lib/convert/convert.cpp
new file mode 100644
index 000000000..f635a1040
--- /dev/null
+++ b/host/lib/convert/convert.cpp
@@ -0,0 +1,117 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include <uhd/convert.hpp>
+#include <uhd/utils/static.hpp>
+#include <uhd/utils/exception.hpp>
+#include <iostream>
+
+using namespace uhd;
+
+#include "convert_impl.hpp"
+
+static const bool debug = false;
+
+/***********************************************************************
+ * Define types for the function tables
+ **********************************************************************/
+struct fcn_table_entry_type{
+ convert::priority_type prio;
+ convert::function_type fcn;
+ fcn_table_entry_type(void)
+ : prio(convert::PRIORITY_EMPTY), fcn(NULL){
+ /* NOP */
+ }
+};
+typedef std::vector<fcn_table_entry_type> fcn_table_type;
+
+/***********************************************************************
+ * Setup the table registry
+ **********************************************************************/
+UHD_SINGLETON_FCN(fcn_table_type, get_cpu_to_otw_table);
+UHD_SINGLETON_FCN(fcn_table_type, get_otw_to_cpu_table);
+
+fcn_table_type &get_table(dir_type dir){
+ switch(dir){
+ case DIR_OTW_TO_CPU: return get_otw_to_cpu_table();
+ case DIR_CPU_TO_OTW: return get_cpu_to_otw_table();
+ }
+ UHD_THROW_INVALID_CODE_PATH();
+}
+
+/***********************************************************************
+ * The registry functions
+ **********************************************************************/
+void uhd::convert::register_converter(
+ const std::string &markup,
+ function_type fcn,
+ priority_type prio
+){
+ //extract the predicate and direction from the markup
+ dir_type dir;
+ pred_type pred = make_pred(markup, dir);
+
+ //get a reference to the function table
+ fcn_table_type &table = get_table(dir);
+
+ //resize the table so that its at least pred+1
+ if (table.size() <= pred) table.resize(pred+1);
+
+ //register the function if higher priority
+ if (table[pred].prio < prio){
+ table[pred].fcn = fcn;
+ table[pred].prio = prio;
+ }
+
+ //----------------------------------------------------------------//
+ if (debug) std::cout << "register_converter: " << markup << std::endl
+ << " prio: " << prio << std::endl
+ << " pred: " << pred << std::endl
+ << " dir: " << dir << std::endl
+ << std::endl
+ ;
+ //----------------------------------------------------------------//
+}
+
+/***********************************************************************
+ * The converter functions
+ **********************************************************************/
+void uhd::convert::io_type_to_otw_type(
+ const io_type_t &io_type,
+ const otw_type_t &otw_type,
+ input_type &input_buffs,
+ output_type &output_buffs,
+ size_t nsamps_per_io_buff
+){
+ pred_type pred = make_pred(io_type, otw_type, input_buffs.size(), output_buffs.size());
+ fcn_table_type table = get_cpu_to_otw_table();
+ function_type fcn = table.at(pred).fcn;
+ fcn(input_buffs, output_buffs, nsamps_per_io_buff);
+}
+
+void uhd::convert::otw_type_to_io_type(
+ const io_type_t &io_type,
+ const otw_type_t &otw_type,
+ input_type &input_buffs,
+ output_type &output_buffs,
+ size_t nsamps_per_io_buff
+){
+ pred_type pred = make_pred(io_type, otw_type, input_buffs.size(), output_buffs.size());
+ fcn_table_type table = get_otw_to_cpu_table();
+ function_type fcn = table.at(pred).fcn;
+ fcn(input_buffs, output_buffs, nsamps_per_io_buff);
+}
diff --git a/host/lib/convert/convert_common.hpp b/host/lib/convert/convert_common.hpp
new file mode 100644
index 000000000..1a653a56f
--- /dev/null
+++ b/host/lib/convert/convert_common.hpp
@@ -0,0 +1,90 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//
+
+#ifndef INCLUDED_LIBUHD_CONVERT_COMMON_HPP
+#define INCLUDED_LIBUHD_CONVERT_COMMON_HPP
+
+#include <uhd/convert.hpp>
+#include <uhd/utils/static.hpp>
+#include <boost/cstdint.hpp>
+#include <complex>
+
+#define DECLARE_CONVERTER(fcn, prio) \
+ static void fcn( \
+ uhd::convert::input_type &inputs, \
+ uhd::convert::output_type &outputs, \
+ size_t nsamps \
+ ); \
+ UHD_STATIC_BLOCK(register_##fcn##_##prio){ \
+ uhd::convert::register_converter(#fcn, fcn, prio); \
+ } \
+ static void fcn( \
+ uhd::convert::input_type &inputs, \
+ uhd::convert::output_type &outputs, \
+ size_t nsamps \
+ )
+
+/***********************************************************************
+ * Typedefs
+ **********************************************************************/
+typedef std::complex<float> fc32_t;
+typedef std::complex<boost::int16_t> sc16_t;
+typedef boost::uint32_t item32_t;
+
+/***********************************************************************
+ * Convert complex short buffer to items32
+ **********************************************************************/
+static UHD_INLINE item32_t sc16_to_item32(sc16_t num){
+ boost::uint16_t real = num.real();
+ boost::uint16_t imag = num.imag();
+ return (item32_t(real) << 16) | (item32_t(imag) << 0);
+}
+
+/***********************************************************************
+ * Convert items32 buffer to complex short
+ **********************************************************************/
+static UHD_INLINE sc16_t item32_to_sc16(item32_t item){
+ return sc16_t(
+ boost::int16_t(item >> 16),
+ boost::int16_t(item >> 0)
+ );
+}
+
+/***********************************************************************
+ * Convert complex float buffer to items32 (no swap)
+ **********************************************************************/
+static const float shorts_per_float = float(32767);
+
+static UHD_INLINE item32_t fc32_to_item32(fc32_t num){
+ boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float);
+ boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float);
+ return (item32_t(real) << 16) | (item32_t(imag) << 0);
+}
+
+/***********************************************************************
+ * Convert items32 buffer to complex float
+ **********************************************************************/
+static const float floats_per_short = float(1.0/shorts_per_float);
+
+static UHD_INLINE fc32_t item32_to_fc32(item32_t item){
+ return fc32_t(
+ float(boost::int16_t(item >> 16)*floats_per_short),
+ float(boost::int16_t(item >> 0)*floats_per_short)
+ );
+}
+
+#endif /* INCLUDED_LIBUHD_CONVERT_COMMON_HPP */
diff --git a/host/lib/convert/convert_general.cpp b/host/lib/convert/convert_general.cpp
new file mode 100644
index 000000000..5e52acea2
--- /dev/null
+++ b/host/lib/convert/convert_general.cpp
@@ -0,0 +1,63 @@
+//
+// Copyright 2010 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "convert_common.hpp"
+#include <uhd/utils/byteswap.hpp>
+
+using namespace uhd::convert;
+
+/***********************************************************************
+ * Convert complex short buffer to items32
+ **********************************************************************/
+DECLARE_CONVERTER(convert_sc16_1_to_item32_1_nswap, PRIORITY_GENERAL){
+ const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]);
+ item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+ for (size_t i = 0; i < nsamps; i++){
+ output[i] = sc16_to_item32(input[i]);
+ }
+}
+
+DECLARE_CONVERTER(convert_sc16_1_to_item32_1_bswap, PRIORITY_GENERAL){
+ const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]);
+ item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+ for (size_t i = 0; i < nsamps; i++){
+ output[i] = uhd::byteswap(sc16_to_item32(input[i]));
+ }
+}
+
+/***********************************************************************
+ * Convert items32 buffer to complex short
+ **********************************************************************/
+DECLARE_CONVERTER(convert_item32_1_to_sc16_1_nswap, PRIORITY_GENERAL){
+ const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+ sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]);
+
+ for (size_t i = 0; i < nsamps; i++){
+ output[i] = item32_to_sc16(input[i]);
+ }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_sc16_1_bswap, PRIORITY_GENERAL){
+ const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+ sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]);
+
+ for (size_t i = 0; i < nsamps; i++){
+ output[i] = item32_to_sc16(uhd::byteswap(input[i]));
+ }
+}
diff --git a/host/lib/convert/convert_with_neon.cpp b/host/lib/convert/convert_with_neon.cpp
new file mode 100644
index 000000000..1ed841125
--- /dev/null
+++ b/host/lib/convert/convert_with_neon.cpp
@@ -0,0 +1,62 @@
+//
+// Copyright 2010-2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "convert_common.hpp"
+#include <uhd/utils/byteswap.hpp>
+#include <arm_neon.h>
+
+using namespace uhd::convert;
+
+DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){
+ const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
+ item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+ size_t i;
+
+ float32x4_t Q0 = vdupq_n_f32(shorts_per_float);
+ for (i=0; i < (nsamps & ~0x03); i+=2) {
+ float32x4_t Q1 = vld1q_f32(reinterpret_cast<const float *>(&input[i]));
+ float32x4_t Q2 = vmulq_f32(Q1, Q0);
+ int32x4_t Q3 = vcvtq_s32_f32(Q2);
+ int16x4_t D8 = vmovn_s32(Q3);
+ int16x4_t D9 = vrev32_s16(D8);
+ vst1_s16((reinterpret_cast<int16_t *>(&output[i])), D9);
+ }
+
+ for (; i < nsamps; i++)
+ output[i] = fc32_to_item32(input[i]);
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){
+ const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+ fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+
+ size_t i;
+
+ float32x4_t Q1 = vdupq_n_f32(floats_per_short);
+ for (i=0; i < (nsamps & ~0x03); i+=2) {
+ int16x4_t D0 = vld1_s16(reinterpret_cast<const int16_t *>(&input[i]));
+ int16x4_t D1 = vrev32_s16(D0);
+ int32x4_t Q2 = vmovl_s16(D1);
+ float32x4_t Q3 = vcvtq_f32_s32(Q2);
+ float32x4_t Q4 = vmulq_f32(Q3, Q1);
+ vst1q_f32((reinterpret_cast<float *>(&output[i])), Q4);
+ }
+
+ for (; i < nsamps; i++)
+ output[i] = item32_to_fc32(input[i]);
+}
diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp
new file mode 100644
index 000000000..8d5a8a6a5
--- /dev/null
+++ b/host/lib/convert/convert_with_sse2.cpp
@@ -0,0 +1,148 @@
+//
+// Copyright 2010-2011 Ettus Research LLC
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include "convert_common.hpp"
+#include <uhd/utils/byteswap.hpp>
+#include <emmintrin.h>
+
+using namespace uhd::convert;
+
+DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){
+ const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
+ item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+ __m128 scalar = _mm_set_ps1(shorts_per_float);
+
+ //convert blocks of samples with intrinsics
+ size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
+ //load from input
+ __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
+ __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
+
+ //convert and scale
+ __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
+ __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
+
+ //pack + swap 16-bit pairs
+ __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
+ tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
+ tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
+
+ //store to output
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
+ }
+
+ //convert remainder
+ for (; i < nsamps; i++){
+ output[i] = fc32_to_item32(input[i]);
+ }
+}
+
+DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){
+ const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
+ item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+ __m128 scalar = _mm_set_ps1(shorts_per_float);
+
+ //convert blocks of samples with intrinsics
+ size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
+ //load from input
+ __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
+ __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
+
+ //convert and scale
+ __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
+ __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
+
+ //pack + byteswap -> byteswap 16 bit words
+ __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
+ tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
+
+ //store to output
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
+ }
+
+ //convert remainder
+ for (; i < nsamps; i++){
+ output[i] = uhd::byteswap(fc32_to_item32(input[i]));
+ }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){
+ const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+ fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+
+ __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
+ __m128i zeroi = _mm_setzero_si128();
+
+ //convert blocks of samples with intrinsics
+ size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
+ //load from input
+ __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
+
+ //unpack + swap 16-bit pairs
+ tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
+ tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
+ __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
+ __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
+
+ //convert and scale
+ __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
+ __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
+
+ //store to output
+ _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
+ _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
+ }
+
+ //convert remainder
+ for (; i < nsamps; i++){
+ output[i] = item32_to_fc32(input[i]);
+ }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){
+ const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+ fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+
+ __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
+ __m128i zeroi = _mm_setzero_si128();
+
+ //convert blocks of samples with intrinsics
+ size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
+ //load from input
+ __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
+
+ //byteswap + unpack -> byteswap 16 bit words
+ tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
+ __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
+ __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
+
+ //convert and scale
+ __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
+ __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
+
+ //store to output
+ _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
+ _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
+ }
+
+ //convert remainder
+ for (; i < nsamps; i++){
+ output[i] = item32_to_fc32(uhd::byteswap(input[i]));
+ }
+}
diff --git a/host/lib/convert/gen_convert_general.py b/host/lib/convert/gen_convert_general.py
new file mode 100644
index 000000000..47c4cd7d0
--- /dev/null
+++ b/host/lib/convert/gen_convert_general.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+#
+# Copyright 2011 Ettus Research LLC
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+TMPL_HEADER = """
+#import time
+/***********************************************************************
+ * This file was generated by $file on $time.strftime("%c")
+ **********************************************************************/
+
+\#include "convert_common.hpp"
+\#include <uhd/utils/byteswap.hpp>
+
+using namespace uhd::convert;
+"""
+
+TMPL_CONV_TO_FROM_ITEM32_1 = """
+DECLARE_CONVERTER(convert_$(cpu_type)_1_to_item32_1_$(swap), PRIORITY_GENERAL){
+ const $(cpu_type)_t *input = reinterpret_cast<const $(cpu_type)_t *>(inputs[0]);
+ item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+ for (size_t i = 0; i < nsamps; i++){
+ output[i] = $(swap_fcn)($(cpu_type)_to_item32(input[i]));
+ }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_$(cpu_type)_1_$(swap), PRIORITY_GENERAL){
+ const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+ $(cpu_type)_t *output = reinterpret_cast<$(cpu_type)_t *>(outputs[0]);
+
+ for (size_t i = 0; i < nsamps; i++){
+ output[i] = item32_to_$(cpu_type)($(swap_fcn)(input[i]));
+ }
+}
+"""
+TMPL_CONV_TO_FROM_ITEM32_X = """
+DECLARE_CONVERTER(convert_$(cpu_type)_$(width)_to_item32_1_$(swap), PRIORITY_GENERAL){
+ #for $w in range($width)
+ const $(cpu_type)_t *input$(w) = reinterpret_cast<const $(cpu_type)_t *>(inputs[$(w)]);
+ #end for
+ item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+
+ for (size_t i = 0, j = 0; i < nsamps; i++){
+ #for $w in range($width)
+ output[j++] = $(swap_fcn)($(cpu_type)_to_item32(input$(w)[i]));
+ #end for
+ }
+}
+
+DECLARE_CONVERTER(convert_item32_1_to_$(cpu_type)_$(width)_$(swap), PRIORITY_GENERAL){
+ const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
+ #for $w in range($width)
+ $(cpu_type)_t *output$(w) = reinterpret_cast<$(cpu_type)_t *>(outputs[$(w)]);
+ #end for
+
+ for (size_t i = 0, j = 0; i < nsamps; i++){
+ #for $w in range($width)
+ output$(w)[i] = item32_to_$(cpu_type)($(swap_fcn)(input[j++]));
+ #end for
+ }
+}
+"""
+
+def parse_tmpl(_tmpl_text, **kwargs):
+ from Cheetah.Template import Template
+ return str(Template(_tmpl_text, kwargs))
+
+if __name__ == '__main__':
+ import sys, os
+ file = os.path.basename(__file__)
+ output = parse_tmpl(TMPL_HEADER, file=file)
+ for width in 1, 2, 3, 4:
+ for swap, swap_fcn in (('nswap', ''), ('bswap', 'uhd::byteswap')):
+ for cpu_type in 'fc32', 'sc16':
+ output += parse_tmpl(
+ TMPL_CONV_TO_FROM_ITEM32_1 if width == 1 else TMPL_CONV_TO_FROM_ITEM32_X,
+ width=width, swap=swap, swap_fcn=swap_fcn, cpu_type=cpu_type
+ )
+ open(sys.argv[1], 'w').write(output)
diff --git a/host/lib/convert/gen_convert_impl.py b/host/lib/convert/gen_convert_impl.py
new file mode 100644
index 000000000..71095ab97
--- /dev/null
+++ b/host/lib/convert/gen_convert_impl.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+#
+# Copyright 2010-2011 Ettus Research LLC
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+TMPL_TEXT = """
+#import time
+/***********************************************************************
+ * This file was generated by $file on $time.strftime("%c")
+ **********************************************************************/
+typedef size_t pred_type;
+
+\#include <uhd/utils/algorithm.hpp>
+\#include <boost/lexical_cast.hpp>
+\#include <boost/detail/endian.hpp>
+\#include <stdexcept>
+
+enum dir_type{
+ DIR_OTW_TO_CPU = 0,
+ DIR_CPU_TO_OTW = 1
+};
+
+pred_type make_pred(const std::string &markup, dir_type &dir){
+ pred_type pred = 0;
+
+ try{
+ std::vector<std::string> tokens = std::split_string(markup, "_");
+ //token 0 is <convert>
+ std::string inp_type = tokens.at(1);
+ std::string num_inps = tokens.at(2);
+ //token 3 is <to>
+ std::string out_type = tokens.at(4);
+ std::string num_outs = tokens.at(5);
+ std::string swap_type = tokens.at(6);
+
+ std::string cpu_type, otw_type;
+ if (inp_type.find("item") == std::string::npos){
+ cpu_type = inp_type;
+ otw_type = out_type;
+ dir = DIR_CPU_TO_OTW;
+ }
+ else{
+ cpu_type = out_type;
+ otw_type = inp_type;
+ dir = DIR_OTW_TO_CPU;
+ }
+
+ if (cpu_type == "fc32") pred |= $ph.fc32_p;
+ else if (cpu_type == "sc16") pred |= $ph.sc16_p;
+ else throw std::runtime_error("unhandled io type " + cpu_type);
+
+ if (otw_type == "item32") pred |= $ph.item32_p;
+ else throw std::runtime_error("unhandled otw type " + otw_type);
+
+ int num_inputs = boost::lexical_cast<int>(num_inps);
+ int num_outputs = boost::lexical_cast<int>(num_outs);
+
+ switch(num_inputs*num_outputs){ //FIXME treated as one value
+ case 1: pred |= $ph.chan1_p; break;
+ case 2: pred |= $ph.chan2_p; break;
+ case 3: pred |= $ph.chan3_p; break;
+ case 4: pred |= $ph.chan4_p; break;
+ default: throw std::runtime_error("unhandled number of channels");
+ }
+
+ if (swap_type == "bswap") pred |= $ph.bswap_p;
+ else if (swap_type == "nswap") pred |= $ph.nswap_p;
+ else throw std::runtime_error("unhandled swap type");
+
+ }
+ catch(...){
+ throw std::runtime_error("convert::make_pred: could not parse markup: " + markup);
+ }
+
+ return pred;
+}
+
+UHD_INLINE pred_type make_pred(
+ const io_type_t &io_type,
+ const otw_type_t &otw_type,
+ size_t num_inputs,
+ size_t num_outputs
+){
+ pred_type pred = 0;
+
+ switch(otw_type.byteorder){
+ \#ifdef BOOST_BIG_ENDIAN
+ case otw_type_t::BO_BIG_ENDIAN: pred |= $ph.nswap_p; break;
+ case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.bswap_p; break;
+ \#else
+ case otw_type_t::BO_BIG_ENDIAN: pred |= $ph.bswap_p; break;
+ case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.nswap_p; break;
+ \#endif
+ case otw_type_t::BO_NATIVE: pred |= $ph.nswap_p; break;
+ default: throw std::runtime_error("unhandled otw byteorder type");
+ }
+
+ switch(otw_type.get_sample_size()){
+ case sizeof(boost::uint32_t): pred |= $ph.item32_p; break;
+ default: throw std::runtime_error("unhandled otw sample size");
+ }
+
+ switch(io_type.tid){
+ case io_type_t::COMPLEX_FLOAT32: pred |= $ph.fc32_p; break;
+ case io_type_t::COMPLEX_INT16: pred |= $ph.sc16_p; break;
+ default: throw std::runtime_error("unhandled io type id");
+ }
+
+ switch(num_inputs*num_outputs){ //FIXME treated as one value
+ case 1: pred |= $ph.chan1_p; break;
+ case 2: pred |= $ph.chan2_p; break;
+ case 3: pred |= $ph.chan3_p; break;
+ case 4: pred |= $ph.chan4_p; break;
+ default: throw std::runtime_error("unhandled number of channels");
+ }
+
+ return pred;
+}
+"""
+
+def parse_tmpl(_tmpl_text, **kwargs):
+ from Cheetah.Template import Template
+ return str(Template(_tmpl_text, kwargs))
+
+class ph:
+ bswap_p = 0b00001
+ nswap_p = 0b00000
+ item32_p = 0b00000
+ sc16_p = 0b00010
+ fc32_p = 0b00000
+ chan1_p = 0b00000
+ chan2_p = 0b00100
+ chan3_p = 0b01000
+ chan4_p = 0b01100
+
+ nbits = 4 #see above
+
+ @staticmethod
+ def has(pred, mask, flag): return (pred & mask) == flag
+
+ @staticmethod
+ def get_swap_type(pred):
+ mask = 0b1
+ if ph.has(pred, mask, ph.bswap_p): return 'bswap'
+ if ph.has(pred, mask, ph.nswap_p): return 'nswap'
+ raise NotImplementedError
+
+ @staticmethod
+ def get_dev_type(pred):
+ mask = 0b0
+ if ph.has(pred, mask, ph.item32_p): return 'item32'
+ raise NotImplementedError
+
+ @staticmethod
+ def get_host_type(pred):
+ mask = 0b10
+ if ph.has(pred, mask, ph.sc16_p): return 'sc16'
+ if ph.has(pred, mask, ph.fc32_p): return 'fc32'
+ raise NotImplementedError
+
+ @staticmethod
+ def get_num_chans(pred):
+ mask = 0b1100
+ if ph.has(pred, mask, ph.chan1_p): return 1
+ if ph.has(pred, mask, ph.chan2_p): return 2
+ if ph.has(pred, mask, ph.chan3_p): return 3
+ if ph.has(pred, mask, ph.chan4_p): return 4
+ raise NotImplementedError
+
+if __name__ == '__main__':
+ import sys, os
+ file = os.path.basename(__file__)
+ open(sys.argv[1], 'w').write(parse_tmpl(TMPL_TEXT, file=file, ph=ph))