diff options
Diffstat (limited to 'host/lib')
| -rw-r--r-- | host/lib/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | host/lib/convert/CMakeLists.txt | 66 | ||||
| -rw-r--r-- | host/lib/convert/convert.cpp | 117 | ||||
| -rw-r--r-- | host/lib/convert/convert_common.hpp | 90 | ||||
| -rw-r--r-- | host/lib/convert/convert_general.cpp | 63 | ||||
| -rw-r--r-- | host/lib/convert/convert_with_neon.cpp | 62 | ||||
| -rw-r--r-- | host/lib/convert/convert_with_sse2.cpp | 148 | ||||
| -rw-r--r-- | host/lib/convert/gen_convert_general.py | 93 | ||||
| -rw-r--r-- | host/lib/convert/gen_convert_impl.py | 186 | ||||
| -rw-r--r-- | host/lib/transport/vrt_packet_handler.hpp | 12 | 
10 files changed, 834 insertions, 6 deletions
| diff --git a/host/lib/CMakeLists.txt b/host/lib/CMakeLists.txt index 498841561..9ab121df5 100644 --- a/host/lib/CMakeLists.txt +++ b/host/lib/CMakeLists.txt @@ -1,5 +1,5 @@  # -# Copyright 2010 Ettus Research LLC +# Copyright 2010-2011 Ettus Research LLC  #  # This program is free software: you can redistribute it and/or modify  # it under the terms of the GNU General Public License as published by @@ -89,6 +89,7 @@ ENDMACRO(INCLUDE_SUBDIRECTORY)  # Include subdirectories (different than add)  ########################################################################  INCLUDE_SUBDIRECTORY(ic_reg_maps) +INCLUDE_SUBDIRECTORY(convert)  INCLUDE_SUBDIRECTORY(transport)  INCLUDE_SUBDIRECTORY(usrp)  INCLUDE_SUBDIRECTORY(utils) diff --git a/host/lib/convert/CMakeLists.txt b/host/lib/convert/CMakeLists.txt new file mode 100644 index 000000000..9324a94b0 --- /dev/null +++ b/host/lib/convert/CMakeLists.txt @@ -0,0 +1,66 @@ +# +# Copyright 2011 Ettus Research LLC +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. +# + +######################################################################## +# This file included, use CMake directory variables +######################################################################## +INCLUDE(CheckIncludeFileCXX) +MESSAGE(STATUS "") + +######################################################################## +# Check for SIMD headers +######################################################################## +CHECK_INCLUDE_FILE_CXX(emmintrin.h HAVE_EMMINTRIN_H) +IF(HAVE_EMMINTRIN_H) +    LIBUHD_APPEND_SOURCES( +        ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_sse2.cpp +    ) +ENDIF(HAVE_EMMINTRIN_H) + +CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_ARM_NEON_H) +IF(HAVE_ARM_NEON_H) +    LIBUHD_APPEND_SOURCES( +        ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_neon.cpp +    ) +ENDIF(HAVE_ARM_NEON_H) + +######################################################################## +# Convert types generation +######################################################################## +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) + +LIBUHD_PYTHON_GEN_SOURCE( +    ${CMAKE_CURRENT_SOURCE_DIR}/gen_convert_impl.py +    ${CMAKE_CURRENT_BINARY_DIR}/convert_impl.hpp +) + +INCLUDE(AddFileDependencies) +ADD_FILE_DEPENDENCIES( +    ${CMAKE_CURRENT_SOURCE_DIR}/convert.cpp +    ${CMAKE_CURRENT_BINARY_DIR}/convert_impl.hpp +) + +LIBUHD_PYTHON_GEN_SOURCE( +    ${CMAKE_CURRENT_SOURCE_DIR}/gen_convert_general.py +    ${CMAKE_CURRENT_BINARY_DIR}/convert_general.cpp +) + +LIBUHD_APPEND_SOURCES( +    ${CMAKE_CURRENT_SOURCE_DIR}/convert.cpp +    ${CMAKE_CURRENT_BINARY_DIR}/convert_general.cpp +) diff --git a/host/lib/convert/convert.cpp b/host/lib/convert/convert.cpp new file mode 100644 index 000000000..f635a1040 --- /dev/null +++ b/host/lib/convert/convert.cpp @@ -0,0 +1,117 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. +// + +#include <uhd/convert.hpp> +#include <uhd/utils/static.hpp> +#include <uhd/utils/exception.hpp> +#include <iostream> + +using namespace uhd; + +#include "convert_impl.hpp" + +static const bool debug = false; + +/*********************************************************************** + * Define types for the function tables + **********************************************************************/ +struct fcn_table_entry_type{ +    convert::priority_type prio; +    convert::function_type fcn; +    fcn_table_entry_type(void) +    : prio(convert::PRIORITY_EMPTY), fcn(NULL){ +        /* NOP */ +    } +}; +typedef std::vector<fcn_table_entry_type> fcn_table_type; + +/*********************************************************************** + * Setup the table registry + **********************************************************************/ +UHD_SINGLETON_FCN(fcn_table_type, get_cpu_to_otw_table); +UHD_SINGLETON_FCN(fcn_table_type, get_otw_to_cpu_table); + +fcn_table_type &get_table(dir_type dir){ +    switch(dir){ +    case DIR_OTW_TO_CPU: return get_otw_to_cpu_table(); +    case DIR_CPU_TO_OTW: return get_cpu_to_otw_table(); +    } +    UHD_THROW_INVALID_CODE_PATH(); +} + +/*********************************************************************** + * The registry functions + **********************************************************************/ +void uhd::convert::register_converter( +    const std::string &markup, +    function_type fcn, +    priority_type prio +){ +    //extract the predicate and direction from the markup +    dir_type dir; +    pred_type pred = make_pred(markup, dir); + +    //get a reference to the function table +    fcn_table_type &table = get_table(dir); + +    //resize the table so that its at least pred+1 +    if (table.size() <= pred) table.resize(pred+1); + +    //register the function if higher priority +    if (table[pred].prio < prio){ +        table[pred].fcn = fcn; +        table[pred].prio = prio; +    } + +    //----------------------------------------------------------------// +    if (debug) std::cout << "register_converter: " << markup << std::endl +        << "    prio: " << prio << std::endl +        << "    pred: " << pred << std::endl +        << "    dir: " << dir << std::endl +        << std::endl +    ; +    //----------------------------------------------------------------// +} + +/*********************************************************************** + * The converter functions + **********************************************************************/ +void uhd::convert::io_type_to_otw_type( +    const io_type_t &io_type, +    const otw_type_t &otw_type, +    input_type &input_buffs, +    output_type &output_buffs, +    size_t nsamps_per_io_buff +){ +    pred_type pred = make_pred(io_type, otw_type, input_buffs.size(), output_buffs.size()); +    fcn_table_type table = get_cpu_to_otw_table(); +    function_type fcn = table.at(pred).fcn; +    fcn(input_buffs, output_buffs, nsamps_per_io_buff); +} + +void uhd::convert::otw_type_to_io_type( +    const io_type_t &io_type, +    const otw_type_t &otw_type, +    input_type &input_buffs, +    output_type &output_buffs, +    size_t nsamps_per_io_buff +){ +    pred_type pred = make_pred(io_type, otw_type, input_buffs.size(), output_buffs.size()); +    fcn_table_type table = get_otw_to_cpu_table(); +    function_type fcn = table.at(pred).fcn; +    fcn(input_buffs, output_buffs, nsamps_per_io_buff); +} diff --git a/host/lib/convert/convert_common.hpp b/host/lib/convert/convert_common.hpp new file mode 100644 index 000000000..1a653a56f --- /dev/null +++ b/host/lib/convert/convert_common.hpp @@ -0,0 +1,90 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. +// + +#ifndef INCLUDED_LIBUHD_CONVERT_COMMON_HPP +#define INCLUDED_LIBUHD_CONVERT_COMMON_HPP + +#include <uhd/convert.hpp> +#include <uhd/utils/static.hpp> +#include <boost/cstdint.hpp> +#include <complex> + +#define DECLARE_CONVERTER(fcn, prio) \ +    static void fcn( \ +        uhd::convert::input_type &inputs, \ +        uhd::convert::output_type &outputs, \ +        size_t nsamps \ +    ); \ +    UHD_STATIC_BLOCK(register_##fcn##_##prio){ \ +        uhd::convert::register_converter(#fcn, fcn, prio); \ +    } \ +    static void fcn( \ +        uhd::convert::input_type &inputs, \ +        uhd::convert::output_type &outputs, \ +        size_t nsamps \ +    ) + +/*********************************************************************** + * Typedefs + **********************************************************************/ +typedef std::complex<float>          fc32_t; +typedef std::complex<boost::int16_t> sc16_t; +typedef boost::uint32_t              item32_t; + +/*********************************************************************** + * Convert complex short buffer to items32 + **********************************************************************/ +static UHD_INLINE item32_t sc16_to_item32(sc16_t num){ +    boost::uint16_t real = num.real(); +    boost::uint16_t imag = num.imag(); +    return (item32_t(real) << 16) | (item32_t(imag) << 0); +} + +/*********************************************************************** + * Convert items32 buffer to complex short + **********************************************************************/ +static UHD_INLINE sc16_t item32_to_sc16(item32_t item){ +    return sc16_t( +        boost::int16_t(item >> 16), +        boost::int16_t(item >> 0) +    ); +} + +/*********************************************************************** + * Convert complex float buffer to items32 (no swap) + **********************************************************************/ +static const float shorts_per_float = float(32767); + +static UHD_INLINE item32_t fc32_to_item32(fc32_t num){ +    boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float); +    boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float); +    return (item32_t(real) << 16) | (item32_t(imag) << 0); +} + +/*********************************************************************** + * Convert items32 buffer to complex float + **********************************************************************/ +static const float floats_per_short = float(1.0/shorts_per_float); + +static UHD_INLINE fc32_t item32_to_fc32(item32_t item){ +    return fc32_t( +        float(boost::int16_t(item >> 16)*floats_per_short), +        float(boost::int16_t(item >> 0)*floats_per_short) +    ); +} + +#endif /* INCLUDED_LIBUHD_CONVERT_COMMON_HPP */ diff --git a/host/lib/convert/convert_general.cpp b/host/lib/convert/convert_general.cpp new file mode 100644 index 000000000..5e52acea2 --- /dev/null +++ b/host/lib/convert/convert_general.cpp @@ -0,0 +1,63 @@ +// +// Copyright 2010 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. +// + +#include "convert_common.hpp" +#include <uhd/utils/byteswap.hpp> + +using namespace uhd::convert; + +/*********************************************************************** + * Convert complex short buffer to items32 + **********************************************************************/ +DECLARE_CONVERTER(convert_sc16_1_to_item32_1_nswap, PRIORITY_GENERAL){ +    const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]); +    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + +    for (size_t i = 0; i < nsamps; i++){ +        output[i] = sc16_to_item32(input[i]); +    } +} + +DECLARE_CONVERTER(convert_sc16_1_to_item32_1_bswap, PRIORITY_GENERAL){ +    const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]); +    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + +    for (size_t i = 0; i < nsamps; i++){ +        output[i] = uhd::byteswap(sc16_to_item32(input[i])); +    } +} + +/*********************************************************************** + * Convert items32 buffer to complex short + **********************************************************************/ +DECLARE_CONVERTER(convert_item32_1_to_sc16_1_nswap, PRIORITY_GENERAL){ +    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); +    sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]); + +    for (size_t i = 0; i < nsamps; i++){ +        output[i] = item32_to_sc16(input[i]); +    } +} + +DECLARE_CONVERTER(convert_item32_1_to_sc16_1_bswap, PRIORITY_GENERAL){ +    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); +    sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]); + +    for (size_t i = 0; i < nsamps; i++){ +        output[i] = item32_to_sc16(uhd::byteswap(input[i])); +    } +} diff --git a/host/lib/convert/convert_with_neon.cpp b/host/lib/convert/convert_with_neon.cpp new file mode 100644 index 000000000..1ed841125 --- /dev/null +++ b/host/lib/convert/convert_with_neon.cpp @@ -0,0 +1,62 @@ +// +// Copyright 2010-2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. +// + +#include "convert_common.hpp" +#include <uhd/utils/byteswap.hpp> +#include <arm_neon.h> + +using namespace uhd::convert; + +DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){ +    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); +    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + +    size_t i; + +    float32x4_t Q0 = vdupq_n_f32(shorts_per_float); +    for (i=0; i < (nsamps & ~0x03); i+=2) { +        float32x4_t Q1 = vld1q_f32(reinterpret_cast<const float *>(&input[i])); +        float32x4_t Q2 = vmulq_f32(Q1, Q0); +        int32x4_t Q3 = vcvtq_s32_f32(Q2); +        int16x4_t D8 = vmovn_s32(Q3); +        int16x4_t D9 = vrev32_s16(D8); +        vst1_s16((reinterpret_cast<int16_t *>(&output[i])), D9); +    } + +    for (; i < nsamps; i++) +        output[i] = fc32_to_item32(input[i]); +} + +DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){ +    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); +    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); + +    size_t i; + +    float32x4_t Q1 = vdupq_n_f32(floats_per_short); +    for (i=0; i < (nsamps & ~0x03); i+=2) { +        int16x4_t D0 = vld1_s16(reinterpret_cast<const int16_t *>(&input[i])); +        int16x4_t D1 = vrev32_s16(D0); +        int32x4_t Q2 = vmovl_s16(D1); +        float32x4_t Q3 = vcvtq_f32_s32(Q2); +        float32x4_t Q4 = vmulq_f32(Q3, Q1); +        vst1q_f32((reinterpret_cast<float *>(&output[i])), Q4); +    } + +    for (; i < nsamps; i++) +        output[i] = item32_to_fc32(input[i]); +} diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp new file mode 100644 index 000000000..8d5a8a6a5 --- /dev/null +++ b/host/lib/convert/convert_with_sse2.cpp @@ -0,0 +1,148 @@ +// +// Copyright 2010-2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. +// + +#include "convert_common.hpp" +#include <uhd/utils/byteswap.hpp> +#include <emmintrin.h> + +using namespace uhd::convert; + +DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){ +    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); +    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + +    __m128 scalar = _mm_set_ps1(shorts_per_float); + +    //convert blocks of samples with intrinsics +    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ +        //load from input +        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); +        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); + +        //convert and scale +        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); +        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); + +        //pack + swap 16-bit pairs +        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); +        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); +        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); + +        //store to output +        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); +    } + +    //convert remainder +    for (; i < nsamps; i++){ +        output[i] = fc32_to_item32(input[i]); +    } +} + +DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){ +    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); +    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + +    __m128 scalar = _mm_set_ps1(shorts_per_float); + +    //convert blocks of samples with intrinsics +    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ +        //load from input +        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); +        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); + +        //convert and scale +        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); +        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); + +        //pack + byteswap -> byteswap 16 bit words +        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); +        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); + +        //store to output +        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); +    } + +    //convert remainder +    for (; i < nsamps; i++){ +        output[i] = uhd::byteswap(fc32_to_item32(input[i])); +    } +} + +DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){ +    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); +    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); + +    __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16)); +    __m128i zeroi = _mm_setzero_si128(); + +    //convert blocks of samples with intrinsics +    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ +        //load from input +        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); + +        //unpack + swap 16-bit pairs +        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); +        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); +        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits +        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); + +        //convert and scale +        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); +        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); + +        //store to output +        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); +        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); +    } + +    //convert remainder +    for (; i < nsamps; i++){ +        output[i] = item32_to_fc32(input[i]); +    } +} + +DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){ +    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); +    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); + +    __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16)); +    __m128i zeroi = _mm_setzero_si128(); + +    //convert blocks of samples with intrinsics +    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ +        //load from input +        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); + +        //byteswap + unpack -> byteswap 16 bit words +        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); +        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits +        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); + +        //convert and scale +        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); +        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); + +        //store to output +        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); +        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); +    } + +    //convert remainder +    for (; i < nsamps; i++){ +        output[i] = item32_to_fc32(uhd::byteswap(input[i])); +    } +} diff --git a/host/lib/convert/gen_convert_general.py b/host/lib/convert/gen_convert_general.py new file mode 100644 index 000000000..47c4cd7d0 --- /dev/null +++ b/host/lib/convert/gen_convert_general.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +# +# Copyright 2011 Ettus Research LLC +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. +# + +TMPL_HEADER = """ +#import time +/*********************************************************************** + * This file was generated by $file on $time.strftime("%c") + **********************************************************************/ + +\#include "convert_common.hpp" +\#include <uhd/utils/byteswap.hpp> + +using namespace uhd::convert; +""" + +TMPL_CONV_TO_FROM_ITEM32_1 = """ +DECLARE_CONVERTER(convert_$(cpu_type)_1_to_item32_1_$(swap), PRIORITY_GENERAL){ +    const $(cpu_type)_t *input = reinterpret_cast<const $(cpu_type)_t *>(inputs[0]); +    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + +    for (size_t i = 0; i < nsamps; i++){ +        output[i] = $(swap_fcn)($(cpu_type)_to_item32(input[i])); +    } +} + +DECLARE_CONVERTER(convert_item32_1_to_$(cpu_type)_1_$(swap), PRIORITY_GENERAL){ +    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); +    $(cpu_type)_t *output = reinterpret_cast<$(cpu_type)_t *>(outputs[0]); + +    for (size_t i = 0; i < nsamps; i++){ +        output[i] = item32_to_$(cpu_type)($(swap_fcn)(input[i])); +    } +} +""" +TMPL_CONV_TO_FROM_ITEM32_X = """ +DECLARE_CONVERTER(convert_$(cpu_type)_$(width)_to_item32_1_$(swap), PRIORITY_GENERAL){ +    #for $w in range($width) +    const $(cpu_type)_t *input$(w) = reinterpret_cast<const $(cpu_type)_t *>(inputs[$(w)]); +    #end for +    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + +    for (size_t i = 0, j = 0; i < nsamps; i++){ +        #for $w in range($width) +        output[j++] = $(swap_fcn)($(cpu_type)_to_item32(input$(w)[i])); +        #end for +    } +} + +DECLARE_CONVERTER(convert_item32_1_to_$(cpu_type)_$(width)_$(swap), PRIORITY_GENERAL){ +    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); +    #for $w in range($width) +    $(cpu_type)_t *output$(w) = reinterpret_cast<$(cpu_type)_t *>(outputs[$(w)]); +    #end for + +    for (size_t i = 0, j = 0; i < nsamps; i++){ +        #for $w in range($width) +        output$(w)[i] = item32_to_$(cpu_type)($(swap_fcn)(input[j++])); +        #end for +    } +} +""" + +def parse_tmpl(_tmpl_text, **kwargs): +    from Cheetah.Template import Template +    return str(Template(_tmpl_text, kwargs)) + +if __name__ == '__main__': +    import sys, os +    file = os.path.basename(__file__) +    output = parse_tmpl(TMPL_HEADER, file=file) +    for width in 1, 2, 3, 4: +        for swap, swap_fcn in (('nswap', ''), ('bswap', 'uhd::byteswap')): +            for cpu_type in 'fc32', 'sc16': +                output += parse_tmpl( +                    TMPL_CONV_TO_FROM_ITEM32_1 if width == 1 else TMPL_CONV_TO_FROM_ITEM32_X, +                    width=width, swap=swap, swap_fcn=swap_fcn, cpu_type=cpu_type +                ) +    open(sys.argv[1], 'w').write(output) diff --git a/host/lib/convert/gen_convert_impl.py b/host/lib/convert/gen_convert_impl.py new file mode 100644 index 000000000..71095ab97 --- /dev/null +++ b/host/lib/convert/gen_convert_impl.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python +# +# Copyright 2010-2011 Ettus Research LLC +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. +# + +TMPL_TEXT = """ +#import time +/*********************************************************************** + * This file was generated by $file on $time.strftime("%c") + **********************************************************************/ +typedef size_t pred_type; + +\#include <uhd/utils/algorithm.hpp> +\#include <boost/lexical_cast.hpp> +\#include <boost/detail/endian.hpp> +\#include <stdexcept> + +enum dir_type{ +    DIR_OTW_TO_CPU = 0, +    DIR_CPU_TO_OTW = 1 +}; + +pred_type make_pred(const std::string &markup, dir_type &dir){ +    pred_type pred = 0; + +    try{ +        std::vector<std::string> tokens = std::split_string(markup, "_"); +        //token 0 is <convert> +        std::string inp_type = tokens.at(1); +        std::string num_inps = tokens.at(2); +        //token 3 is <to> +        std::string out_type = tokens.at(4); +        std::string num_outs = tokens.at(5); +        std::string swap_type = tokens.at(6); + +        std::string cpu_type, otw_type; +        if (inp_type.find("item") == std::string::npos){ +            cpu_type = inp_type; +            otw_type = out_type; +            dir = DIR_CPU_TO_OTW; +        } +        else{ +            cpu_type = out_type; +            otw_type = inp_type; +            dir = DIR_OTW_TO_CPU; +        } + +        if      (cpu_type == "fc32") pred |= $ph.fc32_p; +        else if (cpu_type == "sc16") pred |= $ph.sc16_p; +        else throw std::runtime_error("unhandled io type " + cpu_type); + +        if (otw_type == "item32") pred |= $ph.item32_p; +        else throw std::runtime_error("unhandled otw type " + otw_type); + +        int num_inputs = boost::lexical_cast<int>(num_inps); +        int num_outputs = boost::lexical_cast<int>(num_outs); + +        switch(num_inputs*num_outputs){ //FIXME treated as one value +        case 1: pred |= $ph.chan1_p; break; +        case 2: pred |= $ph.chan2_p; break; +        case 3: pred |= $ph.chan3_p; break; +        case 4: pred |= $ph.chan4_p; break; +        default: throw std::runtime_error("unhandled number of channels"); +        } + +        if      (swap_type == "bswap") pred |= $ph.bswap_p; +        else if (swap_type == "nswap") pred |= $ph.nswap_p; +        else throw std::runtime_error("unhandled swap type"); + +    } +    catch(...){ +        throw std::runtime_error("convert::make_pred: could not parse markup: " + markup); +    } + +    return pred; +} + +UHD_INLINE pred_type make_pred( +    const io_type_t &io_type, +    const otw_type_t &otw_type, +    size_t num_inputs, +    size_t num_outputs +){ +    pred_type pred = 0; + +    switch(otw_type.byteorder){ +    \#ifdef BOOST_BIG_ENDIAN +    case otw_type_t::BO_BIG_ENDIAN:    pred |= $ph.nswap_p; break; +    case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.bswap_p; break; +    \#else +    case otw_type_t::BO_BIG_ENDIAN:    pred |= $ph.bswap_p; break; +    case otw_type_t::BO_LITTLE_ENDIAN: pred |= $ph.nswap_p; break; +    \#endif +    case otw_type_t::BO_NATIVE:        pred |= $ph.nswap_p; break; +    default: throw std::runtime_error("unhandled otw byteorder type"); +    } + +    switch(otw_type.get_sample_size()){ +    case sizeof(boost::uint32_t): pred |= $ph.item32_p; break; +    default: throw std::runtime_error("unhandled otw sample size"); +    } + +    switch(io_type.tid){ +    case io_type_t::COMPLEX_FLOAT32: pred |= $ph.fc32_p; break; +    case io_type_t::COMPLEX_INT16:   pred |= $ph.sc16_p; break; +    default: throw std::runtime_error("unhandled io type id"); +    } + +    switch(num_inputs*num_outputs){ //FIXME treated as one value +    case 1: pred |= $ph.chan1_p; break; +    case 2: pred |= $ph.chan2_p; break; +    case 3: pred |= $ph.chan3_p; break; +    case 4: pred |= $ph.chan4_p; break; +    default: throw std::runtime_error("unhandled number of channels"); +    } + +    return pred; +} +""" + +def parse_tmpl(_tmpl_text, **kwargs): +    from Cheetah.Template import Template +    return str(Template(_tmpl_text, kwargs)) + +class ph: +    bswap_p  = 0b00001 +    nswap_p  = 0b00000 +    item32_p = 0b00000 +    sc16_p   = 0b00010 +    fc32_p   = 0b00000 +    chan1_p  = 0b00000 +    chan2_p  = 0b00100 +    chan3_p  = 0b01000 +    chan4_p  = 0b01100 + +    nbits = 4 #see above + +    @staticmethod +    def has(pred, mask, flag): return (pred & mask) == flag + +    @staticmethod +    def get_swap_type(pred): +        mask = 0b1 +        if ph.has(pred, mask, ph.bswap_p): return 'bswap' +        if ph.has(pred, mask, ph.nswap_p): return 'nswap' +        raise NotImplementedError + +    @staticmethod +    def get_dev_type(pred): +        mask = 0b0 +        if ph.has(pred, mask, ph.item32_p): return 'item32' +        raise NotImplementedError + +    @staticmethod +    def get_host_type(pred): +        mask = 0b10 +        if ph.has(pred, mask, ph.sc16_p): return 'sc16' +        if ph.has(pred, mask, ph.fc32_p): return 'fc32' +        raise NotImplementedError + +    @staticmethod +    def get_num_chans(pred): +        mask = 0b1100 +        if ph.has(pred, mask, ph.chan1_p): return 1 +        if ph.has(pred, mask, ph.chan2_p): return 2 +        if ph.has(pred, mask, ph.chan3_p): return 3 +        if ph.has(pred, mask, ph.chan4_p): return 4 +        raise NotImplementedError + +if __name__ == '__main__': +    import sys, os +    file = os.path.basename(__file__) +    open(sys.argv[1], 'w').write(parse_tmpl(TMPL_TEXT, file=file, ph=ph)) diff --git a/host/lib/transport/vrt_packet_handler.hpp b/host/lib/transport/vrt_packet_handler.hpp index 7f8d84308..e4c5539d1 100644 --- a/host/lib/transport/vrt_packet_handler.hpp +++ b/host/lib/transport/vrt_packet_handler.hpp @@ -26,7 +26,7 @@  #include <uhd/types/otw_type.hpp>  #include <uhd/types/metadata.hpp>  #include <uhd/transport/vrt_if_packet.hpp> -#include <uhd/transport/convert_types.hpp> +#include <uhd/convert.hpp>  #include <uhd/transport/zero_copy.hpp>  #include <boost/function.hpp>  #include <stdexcept> @@ -199,8 +199,9 @@ template <typename T> UHD_INLINE T get_context_code(              }              //copy-convert the samples from the recv buffer -            uhd::transport::convert_otw_type_to_io_type( -                state.copy_buffs[i], otw_type, io_buffs, io_type, nsamps_to_copy_per_io_buff +            uhd::convert::input_type otw_buffs(1, state.copy_buffs[i]); +            uhd::convert::otw_type_to_io_type( +                io_type, otw_type, otw_buffs, io_buffs, nsamps_to_copy_per_io_buff              );              //update the rx copy buffer to reflect the bytes copied @@ -338,8 +339,9 @@ template <typename T> UHD_INLINE T get_context_code(              otw_mem += if_packet_info.num_header_words32;              //copy-convert the samples into the send buffer -            uhd::transport::convert_io_type_to_otw_type( -                io_buffs, io_type, otw_mem, otw_type, num_samps +            uhd::convert::output_type otw_buffs(1, otw_mem); +            uhd::convert::io_type_to_otw_type( +                io_type, otw_type, io_buffs, otw_buffs, num_samps              );              //commit the samples to the zero-copy interface | 
