diff options
-rw-r--r-- | host/include/uhd/CMakeLists.txt | 3 | ||||
-rw-r--r-- | host/include/uhd/convert.hpp | 96 | ||||
-rw-r--r-- | host/lib/CMakeLists.txt | 3 | ||||
-rw-r--r-- | host/lib/convert/CMakeLists.txt | 66 | ||||
-rw-r--r-- | host/lib/convert/convert.cpp | 117 | ||||
-rw-r--r-- | host/lib/convert/convert_common.hpp | 90 | ||||
-rw-r--r-- | host/lib/convert/convert_general.cpp | 63 | ||||
-rw-r--r-- | host/lib/convert/convert_with_neon.cpp | 62 | ||||
-rw-r--r-- | host/lib/convert/convert_with_sse2.cpp | 148 | ||||
-rw-r--r-- | host/lib/convert/gen_convert_general.py | 93 | ||||
-rw-r--r-- | host/lib/convert/gen_convert_impl.py | 186 | ||||
-rw-r--r-- | host/lib/transport/vrt_packet_handler.hpp | 12 | ||||
-rw-r--r-- | host/test/CMakeLists.txt | 2 | ||||
-rw-r--r-- | host/test/convert_test.cpp (renamed from host/test/convert_types_test.cpp) | 85 |
14 files changed, 970 insertions, 56 deletions
diff --git a/host/include/uhd/CMakeLists.txt b/host/include/uhd/CMakeLists.txt index ad528c9fb..fee1270e9 100644 --- a/host/include/uhd/CMakeLists.txt +++ b/host/include/uhd/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright 2010 Ettus Research LLC +# Copyright 2010-2011 Ettus Research LLC # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,6 +23,7 @@ ADD_SUBDIRECTORY(utils) INSTALL(FILES config.hpp + convert.hpp device.hpp device.ipp version.hpp diff --git a/host/include/uhd/convert.hpp b/host/include/uhd/convert.hpp new file mode 100644 index 000000000..488cba98e --- /dev/null +++ b/host/include/uhd/convert.hpp @@ -0,0 +1,96 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#ifndef INCLUDED_UHD_CONVERT_HPP +#define INCLUDED_UHD_CONVERT_HPP + +#include <uhd/config.hpp> +#include <uhd/types/io_type.hpp> +#include <uhd/types/otw_type.hpp> +#include <boost/function.hpp> +#include <string> +#include <vector> + +namespace uhd{ namespace convert{ + + typedef std::vector<void *> output_type; + typedef std::vector<const void *> input_type; + typedef boost::function<void(input_type&, output_type&, size_t)> function_type; + + /*! + * Describe the priority of a converter function. + * A higher priority function takes precedence. + * The general case function are the lowest. + * Next comes the liborc implementations. + * Custom intrinsics implementations are highest. + */ + enum priority_type{ + PRIORITY_GENERAL = 0, + PRIORITY_LIBORC = 1, + PRIORITY_CUSTOM = 2, + PRIORITY_EMPTY = -1, + }; + + /*! + * Register a converter function that converts cpu type to/from otw type. + * \param markup representing the signature + * \param fcn a pointer to the converter + * \param prio the function priority + */ + UHD_API void register_converter( + const std::string &markup, + function_type fcn, + priority_type prio + ); + + /*! + * Convert IO samples to OWT samples: + * + * \param io_type the type of the input samples + * \param otw_type the type of the output samples + * \param input_buffs input buffers to read samples + * \param output_buffs output buffers to write samples + * \param nsamps_per_io_buff samples per IO buffer + */ + UHD_API void io_type_to_otw_type( + const io_type_t &io_type, + const otw_type_t &otw_type, + input_type &input_buffs, + output_type &output_buffs, + size_t nsamps_per_io_buff + ); + + /*! + * Convert OTW samples to IO samples: + * + * \param io_type the type of the output samples + * \param otw_type the type of the input samples + * \param input_buffs input buffers to read samples + * \param output_buffs output buffers to write samples + * \param nsamps_per_io_buff samples per IO buffer + */ + UHD_API void otw_type_to_io_type( + const io_type_t &io_type, + const otw_type_t &otw_type, + input_type &input_buffs, + output_type &output_buffs, + size_t nsamps_per_io_buff + ); + +}} //namespace + +#endif /* INCLUDED_UHD_CONVERT_HPP */ diff --git a/host/lib/CMakeLists.txt b/host/lib/CMakeLists.txt index 498841561..9ab121df5 100644 --- a/host/lib/CMakeLists.txt +++ b/host/lib/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright 2010 Ettus Research LLC +# Copyright 2010-2011 Ettus Research LLC # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -89,6 +89,7 @@ ENDMACRO(INCLUDE_SUBDIRECTORY) # Include subdirectories (different than add) ######################################################################## INCLUDE_SUBDIRECTORY(ic_reg_maps) +INCLUDE_SUBDIRECTORY(convert) INCLUDE_SUBDIRECTORY(transport) INCLUDE_SUBDIRECTORY(usrp) INCLUDE_SUBDIRECTORY(utils) diff --git a/host/lib/convert/CMakeLists.txt b/host/lib/convert/CMakeLists.txt new file mode 100644 index 000000000..9324a94b0 --- /dev/null +++ b/host/lib/convert/CMakeLists.txt @@ -0,0 +1,66 @@ +# +# Copyright 2011 Ettus Research LLC +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +######################################################################## +# This file included, use CMake directory variables +######################################################################## +INCLUDE(CheckIncludeFileCXX) +MESSAGE(STATUS "") + +######################################################################## +# Check for SIMD headers +######################################################################## +CHECK_INCLUDE_FILE_CXX(emmintrin.h HAVE_EMMINTRIN_H) +IF(HAVE_EMMINTRIN_H) + LIBUHD_APPEND_SOURCES( + ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_sse2.cpp + ) +ENDIF(HAVE_EMMINTRIN_H) + +CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_ARM_NEON_H) +IF(HAVE_ARM_NEON_H) + LIBUHD_APPEND_SOURCES( + ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_neon.cpp + ) +ENDIF(HAVE_ARM_NEON_H) + +######################################################################## +# Convert types generation +######################################################################## +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) + +LIBUHD_PYTHON_GEN_SOURCE( + ${CMAKE_CURRENT_SOURCE_DIR}/gen_convert_impl.py + ${CMAKE_CURRENT_BINARY_DIR}/convert_impl.hpp +) + +INCLUDE(AddFileDependencies) +ADD_FILE_DEPENDENCIES( + ${CMAKE_CURRENT_SOURCE_DIR}/convert.cpp + ${CMAKE_CURRENT_BINARY_DIR}/convert_impl.hpp +) + +LIBUHD_PYTHON_GEN_SOURCE( + ${CMAKE_CURRENT_SOURCE_DIR}/gen_convert_general.py + ${CMAKE_CURRENT_BINARY_DIR}/convert_general.cpp +) + +LIBUHD_APPEND_SOURCES( + ${CMAKE_CURRENT_SOURCE_DIR}/convert.cpp + ${CMAKE_CURRENT_BINARY_DIR}/convert_general.cpp +) diff --git a/host/lib/convert/convert.cpp b/host/lib/convert/convert.cpp new file mode 100644 index 000000000..f635a1040 --- /dev/null +++ b/host/lib/convert/convert.cpp @@ -0,0 +1,117 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#include <uhd/convert.hpp> +#include <uhd/utils/static.hpp> +#include <uhd/utils/exception.hpp> +#include <iostream> + +using namespace uhd; + +#include "convert_impl.hpp" + +static const bool debug = false; + +/*********************************************************************** + * Define types for the function tables + **********************************************************************/ +struct fcn_table_entry_type{ + convert::priority_type prio; + convert::function_type fcn; + fcn_table_entry_type(void) + : prio(convert::PRIORITY_EMPTY), fcn(NULL){ + /* NOP */ + } +}; +typedef std::vector<fcn_table_entry_type> fcn_table_type; + +/*********************************************************************** + * Setup the table registry + **********************************************************************/ +UHD_SINGLETON_FCN(fcn_table_type, get_cpu_to_otw_table); +UHD_SINGLETON_FCN(fcn_table_type, get_otw_to_cpu_table); + +fcn_table_type &get_table(dir_type dir){ + switch(dir){ + case DIR_OTW_TO_CPU: return get_otw_to_cpu_table(); + case DIR_CPU_TO_OTW: return get_cpu_to_otw_table(); + } + UHD_THROW_INVALID_CODE_PATH(); +} + +/*********************************************************************** + * The registry functions + **********************************************************************/ +void uhd::convert::register_converter( + const std::string &markup, + function_type fcn, + priority_type prio +){ + //extract the predicate and direction from the markup + dir_type dir; + pred_type pred = make_pred(markup, dir); + + //get a reference to the function table + fcn_table_type &table = get_table(dir); + + //resize the table so that its at least pred+1 + if (table.size() <= pred) table.resize(pred+1); + + //register the function if higher priority + if (table[pred].prio < prio){ + table[pred].fcn = fcn; + table[pred].prio = prio; + } + + //----------------------------------------------------------------// + if (debug) std::cout << "register_converter: " << markup << std::endl + << " prio: " << prio << std::endl + << " pred: " << pred << std::endl + << " dir: " << dir << std::endl + << std::endl + ; + //----------------------------------------------------------------// +} + +/*********************************************************************** + * The converter functions + **********************************************************************/ +void uhd::convert::io_type_to_otw_type( + const io_type_t &io_type, + const otw_type_t &otw_type, + input_type &input_buffs, + output_type &output_buffs, + size_t nsamps_per_io_buff +){ + pred_type pred = make_pred(io_type, otw_type, input_buffs.size(), output_buffs.size()); + fcn_table_type table = get_cpu_to_otw_table(); + function_type fcn = table.at(pred).fcn; + fcn(input_buffs, output_buffs, nsamps_per_io_buff); +} + +void uhd::convert::otw_type_to_io_type( + const io_type_t &io_type, + const otw_type_t &otw_type, + input_type &input_buffs, + output_type &output_buffs, + size_t nsamps_per_io_buff +){ + pred_type pred = make_pred(io_type, otw_type, input_buffs.size(), output_buffs.size()); + fcn_table_type table = get_otw_to_cpu_table(); + function_type fcn = table.at(pred).fcn; + fcn(input_buffs, output_buffs, nsamps_per_io_buff); +} diff --git a/host/lib/convert/convert_common.hpp b/host/lib/convert/convert_common.hpp new file mode 100644 index 000000000..1a653a56f --- /dev/null +++ b/host/lib/convert/convert_common.hpp @@ -0,0 +1,90 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#ifndef INCLUDED_LIBUHD_CONVERT_COMMON_HPP +#define INCLUDED_LIBUHD_CONVERT_COMMON_HPP + +#include <uhd/convert.hpp> +#include <uhd/utils/static.hpp> +#include <boost/cstdint.hpp> +#include <complex> + +#define DECLARE_CONVERTER(fcn, prio) \ + static void fcn( \ + uhd::convert::input_type &inputs, \ + uhd::convert::output_type &outputs, \ + size_t nsamps \ + ); \ + UHD_STATIC_BLOCK(register_##fcn##_##prio){ \ + uhd::convert::register_converter(#fcn, fcn, prio); \ + } \ + static void fcn( \ + uhd::convert::input_type &inputs, \ + uhd::convert::output_type &outputs, \ + size_t nsamps \ + ) + +/*********************************************************************** + * Typedefs + **********************************************************************/ +typedef std::complex<float> fc32_t; +typedef std::complex<boost::int16_t> sc16_t; +typedef boost::uint32_t item32_t; + +/*********************************************************************** + * Convert complex short buffer to items32 + **********************************************************************/ +static UHD_INLINE item32_t sc16_to_item32(sc16_t num){ + boost::uint16_t real = num.real(); + boost::uint16_t imag = num.imag(); + return (item32_t(real) << 16) | (item32_t(imag) << 0); +} + +/*********************************************************************** + * Convert items32 buffer to complex short + **********************************************************************/ +static UHD_INLINE sc16_t item32_to_sc16(item32_t item){ + return sc16_t( + boost::int16_t(item >> 16), + boost::int16_t(item >> 0) + ); +} + +/*********************************************************************** + * Convert complex float buffer to items32 (no swap) + **********************************************************************/ +static const float shorts_per_float = float(32767); + +static UHD_INLINE item32_t fc32_to_item32(fc32_t num){ + boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float); + boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float); + return (item32_t(real) << 16) | (item32_t(imag) << 0); +} + +/*********************************************************************** + * Convert items32 buffer to complex float + **********************************************************************/ +static const float floats_per_short = float(1.0/shorts_per_float); + +static UHD_INLINE fc32_t item32_to_fc32(item32_t item){ + return fc32_t( + float(boost::int16_t(item >> 16)*floats_per_short), + float(boost::int16_t(item >> 0)*floats_per_short) + ); +} + +#endif /* INCLUDED_LIBUHD_CONVERT_COMMON_HPP */ diff --git a/host/lib/convert/convert_general.cpp b/host/lib/convert/convert_general.cpp new file mode 100644 index 000000000..5e52acea2 --- /dev/null +++ b/host/lib/convert/convert_general.cpp @@ -0,0 +1,63 @@ +// +// Copyright 2010 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#include "convert_common.hpp" +#include <uhd/utils/byteswap.hpp> + +using namespace uhd::convert; + +/*********************************************************************** + * Convert complex short buffer to items32 + **********************************************************************/ +DECLARE_CONVERTER(convert_sc16_1_to_item32_1_nswap, PRIORITY_GENERAL){ + const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + for (size_t i = 0; i < nsamps; i++){ + output[i] = sc16_to_item32(input[i]); + } +} + +DECLARE_CONVERTER(convert_sc16_1_to_item32_1_bswap, PRIORITY_GENERAL){ + const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + for (size_t i = 0; i < nsamps; i++){ + output[i] = uhd::byteswap(sc16_to_item32(input[i])); + } +} + +/*********************************************************************** + * Convert items32 buffer to complex short + **********************************************************************/ +DECLARE_CONVERTER(convert_item32_1_to_sc16_1_nswap, PRIORITY_GENERAL){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]); + + for (size_t i = 0; i < nsamps; i++){ + output[i] = item32_to_sc16(input[i]); + } +} + +DECLARE_CONVERTER(convert_item32_1_to_sc16_1_bswap, PRIORITY_GENERAL){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]); + + for (size_t i = 0; i < nsamps; i++){ + output[i] = item32_to_sc16(uhd::byteswap(input[i])); + } +} diff --git a/host/lib/convert/convert_with_neon.cpp b/host/lib/convert/convert_with_neon.cpp new file mode 100644 index 000000000..1ed841125 --- /dev/null +++ b/host/lib/convert/convert_with_neon.cpp @@ -0,0 +1,62 @@ +// +// Copyright 2010-2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#include "convert_common.hpp" +#include <uhd/utils/byteswap.hpp> +#include <arm_neon.h> + +using namespace uhd::convert; + +DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){ + const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + size_t i; + + float32x4_t Q0 = vdupq_n_f32(shorts_per_float); + for (i=0; i < (nsamps & ~0x03); i+=2) { + float32x4_t Q1 = vld1q_f32(reinterpret_cast<const float *>(&input[i])); + float32x4_t Q2 = vmulq_f32(Q1, Q0); + int32x4_t Q3 = vcvtq_s32_f32(Q2); + int16x4_t D8 = vmovn_s32(Q3); + int16x4_t D9 = vrev32_s16(D8); + vst1_s16((reinterpret_cast<int16_t *>(&output[i])), D9); + } + + for (; i < nsamps; i++) + output[i] = fc32_to_item32(input[i]); +} + +DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); + + size_t i; + + float32x4_t Q1 = vdupq_n_f32(floats_per_short); + for (i=0; i < (nsamps & ~0x03); i+=2) { + int16x4_t D0 = vld1_s16(reinterpret_cast<const int16_t *>(&input[i])); + int16x4_t D1 = vrev32_s16(D0); + int32x4_t Q2 = vmovl_s16(D1); + float32x4_t Q3 = vcvtq_f32_s32(Q2); + float32x4_t Q4 = vmulq_f32(Q3, Q1); + vst1q_f32((reinterpret_cast<float *>(&output[i])), Q4); + } + + for (; i < nsamps; i++) + output[i] = item32_to_fc32(input[i]); +} diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp new file mode 100644 index 000000000..8d5a8a6a5 --- /dev/null +++ b/host/lib/convert/convert_with_sse2.cpp @@ -0,0 +1,148 @@ +// +// Copyright 2010-2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#include "convert_common.hpp" +#include <uhd/utils/byteswap.hpp> +#include <emmintrin.h> + +using namespace uhd::convert; + +DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){ + const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + __m128 scalar = _mm_set_ps1(shorts_per_float); + + //convert blocks of samples with intrinsics + size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ + //load from input + __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); + __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); + + //convert and scale + __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); + __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); + + //pack + swap 16-bit pairs + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); + + //store to output + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = fc32_to_item32(input[i]); + } +} + +DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){ + const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + __m128 scalar = _mm_set_ps1(shorts_per_float); + + //convert blocks of samples with intrinsics + size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ + //load from input + __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); + __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); + + //convert and scale + __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); + __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); + + //pack + byteswap -> byteswap 16 bit words + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); + + //store to output + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = uhd::byteswap(fc32_to_item32(input[i])); + } +} + +DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); + + __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16)); + __m128i zeroi = _mm_setzero_si128(); + + //convert blocks of samples with intrinsics + size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ + //load from input + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); + + //unpack + swap 16-bit pairs + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); + + //convert and scale + __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); + __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); + + //store to output + _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); + _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = item32_to_fc32(input[i]); + } +} + +DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); + + __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16)); + __m128i zeroi = _mm_setzero_si128(); + + //convert blocks of samples with intrinsics + size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ + //load from input + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); + + //byteswap + unpack -> byteswap 16 bit words + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); + + //convert and scale + __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); + __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); + + //store to output + _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); + _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = item32_to_fc32(uhd::byteswap(input[i])); + } +} diff --git a/host/lib/convert/gen_convert_general.py b/host/lib/convert/gen_convert_general.py new file mode 100644 index 000000000..47c4cd7d0 --- /dev/null +++ b/host/lib/convert/gen_convert_general.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +# +# Copyright 2011 Ettus Research LLC +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +TMPL_HEADER = """ +#import time +/*********************************************************************** + * This file was generated by $file on $time.strftime("%c") + **********************************************************************/ + +\#include "convert_common.hpp" +\#include <uhd/utils/byteswap.hpp> + +using namespace uhd::convert; +""" + +TMPL_CONV_TO_FROM_ITEM32_1 = """ +DECLARE_CONVERTER(convert_$(cpu_type)_1_to_item32_1_$(swap), PRIORITY_GENERAL){ + const $(cpu_type)_t *input = reinterpret_cast<const $(cpu_type)_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + for (size_t i = 0; i < nsamps; i++){ + output[i] = $(swap_fcn)($(cpu_type)_to_item32(input[i])); + } +} + +DECLARE_CONVERTER(convert_item32_1_to_$(cpu_type)_1_$(swap), PRIORITY_GENERAL){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + $(cpu_type)_t *output = reinterpret_cast<$(cpu_type)_t *>(outputs[0]); + + for (size_t i = 0; i < nsamps; i++){ + output[i] = item32_to_$(cpu_type)($(swap_fcn)(input[i])); + } +} +""" +TMPL_CONV_TO_FROM_ITEM32_X = """ +DECLARE_CONVERTER(convert_$(cpu_type)_$(width)_to_item32_1_$(swap), PRIORITY_GENERAL){ + #for $w in range($width) + const $(cpu_type)_t *input$(w) = reinterpret_cast<const $(cpu_type)_t *>(inputs[$(w)]); + #end for + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + for (size_t i = 0, j = 0; i < nsamps; i++){ + #for $w in range($width) + output[j++] = $(swap_fcn)($(cpu_type)_to_item32(input$(w)[i])); + #end for + } +} + +DECLARE_CONVERTER(convert_item32_1_to_$(cpu_type)_$(width)_$(swap), PRIORITY_GENERAL){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + #for $w in range($width) + $(cpu_type)_t *output$(w) = reinterpret_cast<$(cpu_type)_t *>(outputs[$(w)]); + #end for + + for (size_t i = 0, j = 0; i < nsamps; i++){ + #for $w in range($width) + output$(w)[i] = item32_to_$(cpu_type)($(swap_fcn)(input[j++])); + #end for + } +} +""" + +def parse_tmpl(_tmpl_text, **kwargs): + from Cheetah.Template import Template + return str(Template(_tmpl_text, kwargs)) + +if __name__ == '__main__': + import sys, os + file = os.path.basename(__file__) + output = parse_tmpl(TMPL_HEADER, file=file) + for width in 1, 2, 3, 4: + for swap, swap_fcn in (('nswap', ''), ('bswap', 'uhd::byteswap')): + for cpu_type in 'fc32', 'sc16': + output += parse_tmpl( + TMPL_CONV_TO_FROM_ITEM32_1 if width == 1 else TMPL_CONV_TO_FROM_ITEM32_X, + width=width, swap=swap, swap_fcn=swap_fcn, cpu_type=cpu_type + ) + open(sys.argv[1], 'w').write(output) diff --git a/host/lib/convert/gen_convert_impl.py b/host/lib/convert/gen_convert_impl.py new file mode 100644 index 000000000..71095ab97 --- /dev/null +++ b/host/lib/convert/gen_convert_impl.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python +# +# Copyright 2010-2011 Ettus Research LLC +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +TMPL_TEXT = """ +#import time +/*********************************************************************** + * This file was generated by $file on $time.strftime("%c") + **********************************************************************/ +typedef size_t pred_type; + +\#include <uhd/utils/algorithm.hpp> +\#include <boost/lexical_cast.hpp> +\#include <boost/detail/endian.hpp> +\#include <stdexcept> + +enum dir_type{ + DIR_OTW_TO_CPU = 0, + DIR_CPU_TO_OTW = 1 +}; + +pred_type make_pred(const std::string &markup, dir_type &dir){ + pred_type pred = 0; + + try{ + std::vector<std::string> tokens = std::split_string(markup, "_"); + //token 0 is <convert> + std::string inp_type = tokens.at(1); + std::string num_inps = tokens.at(2); + //token 3 is <to> + std::string out_type = tokens.at(4); + std::string num_outs = tokens.at(5); + std::string swap_type = tokens.at(6); + + std::string cpu_type, otw_type; + if (inp_type.find("item") == std::string::npos){ + cpu_type = inp_type; + otw_type = out_type; + dir = DIR_CPU_TO_OTW; + } + else{ + cpu_type = out_type; + otw_type = inp_type; + dir = DIR_OTW_TO_CPU; + } + + if (cpu_type == "fc32") pred |= $ph.fc32_p; + else if (cpu_type == "sc16") pred |= $ph.sc16_p; + else throw std::runtime_error("unhandled io type " + cpu_type); + + if (otw_type == "item32") pred |= $ph.item32_p; + else throw std::runtime_error("unhandled otw type " + otw_type); + + int num_inputs = boost::lexical_cast<int>(num_inps); + int num_outputs = boost::lexical_cast<int>(num_outs); + + switch(num_inputs*num_outputs){ //FIXME treated as one value + case 1: pred |= $ph.chan1_p; break; + case 2: pred |= $ph.chan2_p; break; + case 3: pred |= $ph.chan3_p; break; + case 4: pred |= $ph.chan4_p; break; + default: throw std::runtime_error("unhandled number of channels"); + } + + if (swap_type == "bswap") pred |= $ph.bswap_p; + else if (swap_type == "nswap") pred |= $ph.nswap_p; + else throw std::runtime_error("unhandled swap type"); + + } + catch(...){ + throw std::runtime_error("convert::make_pred: could not parse markup: " + markup); + } + + return pred; +} + +UHD_INLINE pred_type make_pred( + const io_type_t &io_type, + const otw_type_t &otw_type, + size_t num_inputs, + size_t num_outputs +){ + pred_type pred = 0; + + switch(otw_type.byteorder){ + \#ifdef BOOST_BIG_ENDIAN + case otw_type_t::BO_BIG_ENDIAN: pred |= $ph.nswap_p; break; + case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.bswap_p; break; + \#else + case otw_type_t::BO_BIG_ENDIAN: pred |= $ph.bswap_p; break; + case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.nswap_p; break; + \#endif + case otw_type_t::BO_NATIVE: pred |= $ph.nswap_p; break; + default: throw std::runtime_error("unhandled otw byteorder type"); + } + + switch(otw_type.get_sample_size()){ + case sizeof(boost::uint32_t): pred |= $ph.item32_p; break; + default: throw std::runtime_error("unhandled otw sample size"); + } + + switch(io_type.tid){ + case io_type_t::COMPLEX_FLOAT32: pred |= $ph.fc32_p; break; + case io_type_t::COMPLEX_INT16: pred |= $ph.sc16_p; break; + default: throw std::runtime_error("unhandled io type id"); + } + + switch(num_inputs*num_outputs){ //FIXME treated as one value + case 1: pred |= $ph.chan1_p; break; + case 2: pred |= $ph.chan2_p; break; + case 3: pred |= $ph.chan3_p; break; + case 4: pred |= $ph.chan4_p; break; + default: throw std::runtime_error("unhandled number of channels"); + } + + return pred; +} +""" + +def parse_tmpl(_tmpl_text, **kwargs): + from Cheetah.Template import Template + return str(Template(_tmpl_text, kwargs)) + +class ph: + bswap_p = 0b00001 + nswap_p = 0b00000 + item32_p = 0b00000 + sc16_p = 0b00010 + fc32_p = 0b00000 + chan1_p = 0b00000 + chan2_p = 0b00100 + chan3_p = 0b01000 + chan4_p = 0b01100 + + nbits = 4 #see above + + @staticmethod + def has(pred, mask, flag): return (pred & mask) == flag + + @staticmethod + def get_swap_type(pred): + mask = 0b1 + if ph.has(pred, mask, ph.bswap_p): return 'bswap' + if ph.has(pred, mask, ph.nswap_p): return 'nswap' + raise NotImplementedError + + @staticmethod + def get_dev_type(pred): + mask = 0b0 + if ph.has(pred, mask, ph.item32_p): return 'item32' + raise NotImplementedError + + @staticmethod + def get_host_type(pred): + mask = 0b10 + if ph.has(pred, mask, ph.sc16_p): return 'sc16' + if ph.has(pred, mask, ph.fc32_p): return 'fc32' + raise NotImplementedError + + @staticmethod + def get_num_chans(pred): + mask = 0b1100 + if ph.has(pred, mask, ph.chan1_p): return 1 + if ph.has(pred, mask, ph.chan2_p): return 2 + if ph.has(pred, mask, ph.chan3_p): return 3 + if ph.has(pred, mask, ph.chan4_p): return 4 + raise NotImplementedError + +if __name__ == '__main__': + import sys, os + file = os.path.basename(__file__) + open(sys.argv[1], 'w').write(parse_tmpl(TMPL_TEXT, file=file, ph=ph)) diff --git a/host/lib/transport/vrt_packet_handler.hpp b/host/lib/transport/vrt_packet_handler.hpp index 7f8d84308..e4c5539d1 100644 --- a/host/lib/transport/vrt_packet_handler.hpp +++ b/host/lib/transport/vrt_packet_handler.hpp @@ -26,7 +26,7 @@ #include <uhd/types/otw_type.hpp> #include <uhd/types/metadata.hpp> #include <uhd/transport/vrt_if_packet.hpp> -#include <uhd/transport/convert_types.hpp> +#include <uhd/convert.hpp> #include <uhd/transport/zero_copy.hpp> #include <boost/function.hpp> #include <stdexcept> @@ -199,8 +199,9 @@ template <typename T> UHD_INLINE T get_context_code( } //copy-convert the samples from the recv buffer - uhd::transport::convert_otw_type_to_io_type( - state.copy_buffs[i], otw_type, io_buffs, io_type, nsamps_to_copy_per_io_buff + uhd::convert::input_type otw_buffs(1, state.copy_buffs[i]); + uhd::convert::otw_type_to_io_type( + io_type, otw_type, otw_buffs, io_buffs, nsamps_to_copy_per_io_buff ); //update the rx copy buffer to reflect the bytes copied @@ -338,8 +339,9 @@ template <typename T> UHD_INLINE T get_context_code( otw_mem += if_packet_info.num_header_words32; //copy-convert the samples into the send buffer - uhd::transport::convert_io_type_to_otw_type( - io_buffs, io_type, otw_mem, otw_type, num_samps + uhd::convert::output_type otw_buffs(1, otw_mem); + uhd::convert::io_type_to_otw_type( + io_type, otw_type, io_buffs, otw_buffs, num_samps ); //commit the samples to the zero-copy interface diff --git a/host/test/CMakeLists.txt b/host/test/CMakeLists.txt index bdbde4b2c..581799d98 100644 --- a/host/test/CMakeLists.txt +++ b/host/test/CMakeLists.txt @@ -22,7 +22,7 @@ SET(test_sources addr_test.cpp buffer_test.cpp byteswap_test.cpp - convert_types_test.cpp + convert_test.cpp dict_test.cpp error_test.cpp gain_group_test.cpp diff --git a/host/test/convert_types_test.cpp b/host/test/convert_test.cpp index 378e184de..de0245c1d 100644 --- a/host/test/convert_types_test.cpp +++ b/host/test/convert_test.cpp @@ -15,14 +15,14 @@ // along with this program. If not, see <http://www.gnu.org/licenses/>. // -#include <uhd/transport/convert_types.hpp> +#include <uhd/convert.hpp> #include <boost/test/unit_test.hpp> #include <boost/foreach.hpp> #include <boost/cstdint.hpp> -#include <boost/asio/buffer.hpp> #include <complex> #include <vector> #include <cstdlib> +#include <iostream> using namespace uhd; @@ -30,14 +30,6 @@ using namespace uhd; typedef std::complex<boost::int16_t> sc16_t; typedef std::complex<float> fc32_t; -//extract pointer to POD since using &vector.front() throws in MSVC -template <typename T> void * pod2ptr(T &pod){ - return boost::asio::buffer_cast<void *>(boost::asio::buffer(pod)); -} -template <typename T> const void * pod2ptr(const T &pod){ - return boost::asio::buffer_cast<const void *>(boost::asio::buffer(pod)); -} - #define MY_CHECK_CLOSE(a, b, f) if ((std::abs(a) > (f) and std::abs(b) > (f))) \ BOOST_CHECK_CLOSE_FRACTION(a, b, f) @@ -54,20 +46,19 @@ template <typename Range> static void loopback( Range &output ){ //item32 is largest device type - std::vector<boost::uint32_t> dev(nsamps); + std::vector<boost::uint32_t> interm(nsamps); - //convert to dev type - transport::convert_io_type_to_otw_type( - pod2ptr(input), io_type, - pod2ptr(dev), otw_type, - nsamps + convert::input_type input0(1, &input[0]), input1(1, &interm[0]); + convert::output_type output0(1, &interm[0]), output1(1, &output[0]); + + //convert to intermediate type + convert::io_type_to_otw_type( + io_type, otw_type, input0, output0, nsamps ); //convert back to host type - transport::convert_otw_type_to_io_type( - pod2ptr(dev), otw_type, - pod2ptr(output), io_type, - nsamps + convert::otw_type_to_io_type( + io_type, otw_type, input1, output1, nsamps ); } @@ -98,7 +89,7 @@ BOOST_AUTO_TEST_CASE(test_convert_types_be_sc16){ otw_type.width = 16; //try various lengths to test edge cases - for (size_t nsamps = 0; nsamps < 16; nsamps++){ + for (size_t nsamps = 1; nsamps < 16; nsamps++){ test_convert_types_sc16(nsamps, io_type, otw_type); } } @@ -110,7 +101,7 @@ BOOST_AUTO_TEST_CASE(test_convert_types_le_sc16){ otw_type.width = 16; //try various lengths to test edge cases - for (size_t nsamps = 0; nsamps < 16; nsamps++){ + for (size_t nsamps = 1; nsamps < 16; nsamps++){ test_convert_types_sc16(nsamps, io_type, otw_type); } } @@ -145,7 +136,7 @@ BOOST_AUTO_TEST_CASE(test_convert_types_be_fc32){ otw_type.width = 16; //try various lengths to test edge cases - for (size_t nsamps = 0; nsamps < 16; nsamps++){ + for (size_t nsamps = 1; nsamps < 16; nsamps++){ test_convert_types_fc32(nsamps, io_type, otw_type); } } @@ -157,7 +148,7 @@ BOOST_AUTO_TEST_CASE(test_convert_types_le_fc32){ otw_type.width = 16; //try various lengths to test edge cases - for (size_t nsamps = 0; nsamps < 16; nsamps++){ + for (size_t nsamps = 1; nsamps < 16; nsamps++){ test_convert_types_fc32(nsamps, io_type, otw_type); } } @@ -179,21 +170,20 @@ BOOST_AUTO_TEST_CASE(test_convert_types_fc32_to_sc16){ (std::rand()/float(RAND_MAX/2)) - 1, (std::rand()/float(RAND_MAX/2)) - 1 ); + std::vector<boost::uint32_t> interm(nsamps); + std::vector<sc16_t> output(nsamps); - //convert float to dev - std::vector<boost::uint32_t> tmp(nsamps); - transport::convert_io_type_to_otw_type( - pod2ptr(input), io_type_in, - pod2ptr(tmp), otw_type, - nsamps + convert::input_type input0(1, &input[0]), input1(1, &interm[0]); + convert::output_type output0(1, &interm[0]), output1(1, &output[0]); + + //convert float to intermediate + convert::io_type_to_otw_type( + io_type_in, otw_type, input0, output0, nsamps ); - //convert dev to short - std::vector<sc16_t> output(nsamps); - transport::convert_otw_type_to_io_type( - pod2ptr(tmp), otw_type, - pod2ptr(output), io_type_out, - nsamps + //convert intermediate to short + convert::otw_type_to_io_type( + io_type_out, otw_type, input1, output1, nsamps ); //test that the inputs and outputs match @@ -220,21 +210,20 @@ BOOST_AUTO_TEST_CASE(test_convert_types_sc16_to_fc32){ std::rand()-(RAND_MAX/2), std::rand()-(RAND_MAX/2) ); + std::vector<boost::uint32_t> interm(nsamps); + std::vector<fc32_t> output(nsamps); - //convert short to dev - std::vector<boost::uint32_t> tmp(nsamps); - transport::convert_io_type_to_otw_type( - pod2ptr(input), io_type_in, - pod2ptr(tmp), otw_type, - nsamps + convert::input_type input0(1, &input[0]), input1(1, &interm[0]); + convert::output_type output0(1, &interm[0]), output1(1, &output[0]); + + //convert short to intermediate + convert::io_type_to_otw_type( + io_type_in, otw_type, input0, output0, nsamps ); - //convert dev to float - std::vector<fc32_t> output(nsamps); - transport::convert_otw_type_to_io_type( - pod2ptr(tmp), otw_type, - pod2ptr(output), io_type_out, - nsamps + //convert intermediate to float + convert::otw_type_to_io_type( + io_type_out, otw_type, input1, output1, nsamps ); //test that the inputs and outputs match |