diff options
Diffstat (limited to 'host/lib/convert')
-rw-r--r-- | host/lib/convert/CMakeLists.txt | 132 | ||||
-rw-r--r-- | host/lib/convert/convert_common.hpp | 107 | ||||
-rw-r--r-- | host/lib/convert/convert_fc32_with_sse2.cpp | 196 | ||||
-rw-r--r-- | host/lib/convert/convert_fc64_with_sse2.cpp | 212 | ||||
-rw-r--r-- | host/lib/convert/convert_impl.cpp | 109 | ||||
-rw-r--r-- | host/lib/convert/convert_orc.orc | 63 | ||||
-rw-r--r-- | host/lib/convert/convert_with_neon.cpp | 61 | ||||
-rw-r--r-- | host/lib/convert/convert_with_orc.cpp | 54 | ||||
-rw-r--r-- | host/lib/convert/gen_convert_general.py | 93 | ||||
-rw-r--r-- | host/lib/convert/gen_convert_pred.py | 185 |
10 files changed, 1212 insertions, 0 deletions
diff --git a/host/lib/convert/CMakeLists.txt b/host/lib/convert/CMakeLists.txt new file mode 100644 index 000000000..b260cb247 --- /dev/null +++ b/host/lib/convert/CMakeLists.txt @@ -0,0 +1,132 @@ +# +# Copyright 2011 Ettus Research LLC +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +######################################################################## +# This file included, use CMake directory variables +######################################################################## +INCLUDE(CheckIncludeFileCXX) +MESSAGE(STATUS "") + +######################################################################## +# Look for Orc support +######################################################################## +FIND_PACKAGE(PkgConfig) +IF(PKG_CONFIG_FOUND) +PKG_CHECK_MODULES(ORC "orc-0.4 > 0.4.11") +ENDIF(PKG_CONFIG_FOUND) + +FIND_PROGRAM(ORCC_EXECUTABLE orcc) + +LIBUHD_REGISTER_COMPONENT("ORC" ENABLE_ORC ON "ENABLE_LIBUHD;ORC_FOUND;ORCC_EXECUTABLE" OFF) + +IF(ENABLE_ORC) + INCLUDE_DIRECTORIES(${ORC_INCLUDE_DIRS}) + LINK_DIRECTORIES(${ORC_LIBRARY_DIRS}) + ENABLE_LANGUAGE(C) + + SET(orcc_src ${CMAKE_CURRENT_SOURCE_DIR}/convert_orc.orc) + + GET_FILENAME_COMPONENT(orc_file_name_we ${orcc_src} NAME_WE) + SET(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c) + MESSAGE(STATUS "Orc found, enabling Orc support.") + ADD_CUSTOM_COMMAND( + COMMAND ${ORCC_EXECUTABLE} --implementation -o ${orcc_gen} ${orcc_src} + DEPENDS ${orcc_src} OUTPUT ${orcc_gen} + ) + LIBUHD_APPEND_SOURCES(${orcc_gen}) + LIBUHD_APPEND_SOURCES( + ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_orc.cpp + ) + LIBUHD_APPEND_LIBS(${ORC_LIBRARIES}) +ELSE(ENABLE_ORC) + MESSAGE(STATUS "Orc not found, disabling orc support.") +ENDIF(ENABLE_ORC) + +######################################################################## +# Check for SSE2 SIMD headers +######################################################################## +IF(CMAKE_COMPILER_IS_GNUCXX) + SET(EMMINTRIN_FLAGS -msse2) +ELSEIF(MSVC) + SET(EMMINTRIN_FLAGS /arch:SSE2) +ENDIF() + +SET(CMAKE_REQUIRED_FLAGS ${EMMINTRIN_FLAGS}) +CHECK_INCLUDE_FILE_CXX(emmintrin.h HAVE_EMMINTRIN_H) +UNSET(CMAKE_REQUIRED_FLAGS) + +IF(HAVE_EMMINTRIN_H) + SET(convert_with_sse2_sources + ${CMAKE_CURRENT_SOURCE_DIR}/convert_fc32_with_sse2.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/convert_fc64_with_sse2.cpp + ) + SET_SOURCE_FILES_PROPERTIES( + ${convert_with_sse2_sources} + PROPERTIES COMPILE_FLAGS "${EMMINTRIN_FLAGS}" + ) + LIBUHD_APPEND_SOURCES(${convert_with_sse2_sources}) +ENDIF(HAVE_EMMINTRIN_H) + +######################################################################## +# Check for NEON SIMD headers +######################################################################## +IF(CMAKE_COMPILER_IS_GNUCXX) + SET(NEON_FLAGS "-mfloat-abi=softfp -mfpu=neon") + SET(CMAKE_REQUIRED_FLAGS ${NEON_FLAGS}) + CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_ARM_NEON_H) + UNSET(CMAKE_REQUIRED_FLAGS) +ENDIF(CMAKE_COMPILER_IS_GNUCXX) + +IF(HAVE_ARM_NEON_H AND ENABLE_ORC) + #prefer orc support, its faster than the current intrinsic implementations + MESSAGE(STATUS "Enabled conversion support with ORC.") +ELSEIF(HAVE_ARM_NEON_H) + MESSAGE(STATUS "Enabled conversion support with NEON intrinsics.") + SET_SOURCE_FILES_PROPERTIES( + ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_neon.cpp + PROPERTIES COMPILE_FLAGS "${NEON_FLAGS}" + ) + LIBUHD_APPEND_SOURCES( + ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_neon.cpp + ) +ENDIF() + +######################################################################## +# Convert types generation +######################################################################## +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) + +LIBUHD_PYTHON_GEN_SOURCE( + ${CMAKE_CURRENT_SOURCE_DIR}/gen_convert_pred.py + ${CMAKE_CURRENT_BINARY_DIR}/convert_pred.hpp +) + +INCLUDE(AddFileDependencies) +ADD_FILE_DEPENDENCIES( + ${CMAKE_CURRENT_SOURCE_DIR}/convert_impl.cpp + ${CMAKE_CURRENT_BINARY_DIR}/convert_pred.hpp +) + +LIBUHD_PYTHON_GEN_SOURCE( + ${CMAKE_CURRENT_SOURCE_DIR}/gen_convert_general.py + ${CMAKE_CURRENT_BINARY_DIR}/convert_general.cpp +) + +LIBUHD_APPEND_SOURCES( + ${CMAKE_CURRENT_SOURCE_DIR}/convert_impl.cpp +) diff --git a/host/lib/convert/convert_common.hpp b/host/lib/convert/convert_common.hpp new file mode 100644 index 000000000..7f513b124 --- /dev/null +++ b/host/lib/convert/convert_common.hpp @@ -0,0 +1,107 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#ifndef INCLUDED_LIBUHD_CONVERT_COMMON_HPP +#define INCLUDED_LIBUHD_CONVERT_COMMON_HPP + +#include <uhd/convert.hpp> +#include <uhd/utils/static.hpp> +#include <boost/cstdint.hpp> +#include <complex> + +#define DECLARE_CONVERTER(fcn, prio) \ + static void fcn( \ + const uhd::convert::input_type &inputs, \ + const uhd::convert::output_type &outputs, \ + size_t nsamps, double scale_factor \ + ); \ + UHD_STATIC_BLOCK(register_##fcn##_##prio){ \ + uhd::convert::register_converter(#fcn, fcn, prio); \ + } \ + static void fcn( \ + const uhd::convert::input_type &inputs, \ + const uhd::convert::output_type &outputs, \ + size_t nsamps, double scale_factor \ + ) + +/*********************************************************************** + * Typedefs + **********************************************************************/ +typedef std::complex<double> fc64_t; +typedef std::complex<float> fc32_t; +typedef std::complex<boost::int16_t> sc16_t; +typedef std::complex<boost::int8_t> sc8_t; +typedef boost::uint32_t item32_t; + +/*********************************************************************** + * Convert complex short buffer to items32 + **********************************************************************/ +static UHD_INLINE item32_t sc16_to_item32(sc16_t num, double){ + boost::uint16_t real = num.real(); + boost::uint16_t imag = num.imag(); + return (item32_t(real) << 16) | (item32_t(imag) << 0); +} + +/*********************************************************************** + * Convert items32 buffer to complex short + **********************************************************************/ +static UHD_INLINE sc16_t item32_to_sc16(item32_t item, double){ + return sc16_t( + boost::int16_t(item >> 16), + boost::int16_t(item >> 0) + ); +} + +/*********************************************************************** + * Convert complex float buffer to items32 (no swap) + **********************************************************************/ +static UHD_INLINE item32_t fc32_to_item32(fc32_t num, float scale_factor){ + boost::uint16_t real = boost::int16_t(num.real()*scale_factor); + boost::uint16_t imag = boost::int16_t(num.imag()*scale_factor); + return (item32_t(real) << 16) | (item32_t(imag) << 0); +} + +/*********************************************************************** + * Convert items32 buffer to complex float + **********************************************************************/ +static UHD_INLINE fc32_t item32_to_fc32(item32_t item, float scale_factor){ + return fc32_t( + float(boost::int16_t(item >> 16)*scale_factor), + float(boost::int16_t(item >> 0)*scale_factor) + ); +} + +/*********************************************************************** + * Convert complex double buffer to items32 (no swap) + **********************************************************************/ +static UHD_INLINE item32_t fc64_to_item32(fc64_t num, double scale_factor){ + boost::uint16_t real = boost::int16_t(num.real()*scale_factor); + boost::uint16_t imag = boost::int16_t(num.imag()*scale_factor); + return (item32_t(real) << 16) | (item32_t(imag) << 0); +} + +/*********************************************************************** + * Convert items32 buffer to complex double + **********************************************************************/ +static UHD_INLINE fc64_t item32_to_fc64(item32_t item, double scale_factor){ + return fc64_t( + float(boost::int16_t(item >> 16)*scale_factor), + float(boost::int16_t(item >> 0)*scale_factor) + ); +} + +#endif /* INCLUDED_LIBUHD_CONVERT_COMMON_HPP */ diff --git a/host/lib/convert/convert_fc32_with_sse2.cpp b/host/lib/convert/convert_fc32_with_sse2.cpp new file mode 100644 index 000000000..676e1561c --- /dev/null +++ b/host/lib/convert/convert_fc32_with_sse2.cpp @@ -0,0 +1,196 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#include "convert_common.hpp" +#include <uhd/utils/byteswap.hpp> +#include <emmintrin.h> + +using namespace uhd::convert; + +DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){ + const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + const __m128 scalar = _mm_set_ps1(float(scale_factor)); + + #define convert_fc32_1_to_item32_1_nswap_guts(_al_) \ + for (; i+4 < nsamps; i+=4){ \ + /* load from input */ \ + __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \ + __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \ + \ + /* convert and scale */ \ + __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \ + __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \ + \ + /* pack + swap 16-bit pairs */ \ + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + \ + /* store to output */ \ + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(input) & 0xf){ + case 0x8: + output[i] = fc32_to_item32(input[i], float(scale_factor)); i++; + case 0x0: + convert_fc32_1_to_item32_1_nswap_guts(_) + break; + default: convert_fc32_1_to_item32_1_nswap_guts(u_) + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = fc32_to_item32(input[i], float(scale_factor)); + } +} + +DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){ + const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + const __m128 scalar = _mm_set_ps1(float(scale_factor)); + + #define convert_fc32_1_to_item32_1_bswap_guts(_al_) \ + for (; i+4 < nsamps; i+=4){ \ + /* load from input */ \ + __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \ + __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \ + \ + /* convert and scale */ \ + __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \ + __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \ + \ + /* pack + byteswap -> byteswap 16 bit words */ \ + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ + \ + /* store to output */ \ + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(input) & 0xf){ + case 0x8: + output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor))); i++; + case 0x0: + convert_fc32_1_to_item32_1_bswap_guts(_) + break; + default: convert_fc32_1_to_item32_1_bswap_guts(u_) + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor))); + } +} + +DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); + + const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); + const __m128i zeroi = _mm_setzero_si128(); + + #define convert_item32_1_to_fc32_1_nswap_guts(_al_) \ + for (; i+4 < nsamps; i+=4){ \ + /* load from input */ \ + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ + \ + /* unpack + swap 16-bit pairs */ \ + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ + \ + /* convert and scale */ \ + __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \ + __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \ + \ + /* store to output */ \ + _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \ + _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(output) & 0xf){ + case 0x8: + output[i] = item32_to_fc32(input[i], float(scale_factor)); i++; + case 0x0: + convert_item32_1_to_fc32_1_nswap_guts(_) + break; + default: convert_item32_1_to_fc32_1_nswap_guts(u_) + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = item32_to_fc32(input[i], float(scale_factor)); + } +} + +DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); + + const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); + const __m128i zeroi = _mm_setzero_si128(); + + #define convert_item32_1_to_fc32_1_bswap_guts(_al_) \ + for (; i+4 < nsamps; i+=4){ \ + /* load from input */ \ + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ + \ + /* byteswap + unpack -> byteswap 16 bit words */ \ + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ + \ + /* convert and scale */ \ + __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \ + __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \ + \ + /* store to output */ \ + _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \ + _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(output) & 0xf){ + case 0x8: + output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++; + case 0x0: + convert_item32_1_to_fc32_1_bswap_guts(_) + break; + default: convert_item32_1_to_fc32_1_bswap_guts(u_) + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); + } +} diff --git a/host/lib/convert/convert_fc64_with_sse2.cpp b/host/lib/convert/convert_fc64_with_sse2.cpp new file mode 100644 index 000000000..4d28396a4 --- /dev/null +++ b/host/lib/convert/convert_fc64_with_sse2.cpp @@ -0,0 +1,212 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#include "convert_common.hpp" +#include <uhd/utils/byteswap.hpp> +#include <emmintrin.h> + +using namespace uhd::convert; + +DECLARE_CONVERTER(convert_fc64_1_to_item32_1_nswap, PRIORITY_CUSTOM){ + const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + const __m128d scalar = _mm_set1_pd(scale_factor); + + #define convert_fc64_1_to_item32_1_nswap_guts(_al_) \ + for (; i+4 < nsamps; i+=4){ \ + /* load from input */ \ + __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \ + __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \ + __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \ + __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \ + \ + /* convert and scale */ \ + __m128i tmpi0 = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar)); \ + __m128i tmpi1 = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar)); \ + __m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1); \ + __m128i tmpi2 = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar)); \ + __m128i tmpi3 = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar)); \ + __m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3); \ + \ + /* pack + swap 16-bit pairs */ \ + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + \ + /* store to output */ \ + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + if ((size_t(input) & 0xf) == 0){ + convert_fc64_1_to_item32_1_nswap_guts(_) + } + else{ + convert_fc64_1_to_item32_1_nswap_guts(u_) + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = fc64_to_item32(input[i], scale_factor); + } +} + +DECLARE_CONVERTER(convert_fc64_1_to_item32_1_bswap, PRIORITY_CUSTOM){ + const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + const __m128d scalar = _mm_set1_pd(scale_factor); + + #define convert_fc64_1_to_item32_1_bswap_guts(_al_) \ + for (; i+4 < nsamps; i+=4){ \ + /* load from input */ \ + __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \ + __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \ + __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \ + __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \ + \ + /* convert and scale */ \ + __m128i tmpi0 = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar)); \ + __m128i tmpi1 = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar)); \ + __m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1); \ + __m128i tmpi2 = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar)); \ + __m128i tmpi3 = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar)); \ + __m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3); \ + \ + /* pack + byteswap -> byteswap 16 bit words */ \ + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ + \ + /* store to output */ \ + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + if ((size_t(input) & 0xf) == 0){ + convert_fc64_1_to_item32_1_bswap_guts(_) + } + else{ + convert_fc64_1_to_item32_1_bswap_guts(u_) + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = uhd::byteswap(fc64_to_item32(input[i], scale_factor)); + } +} + +DECLARE_CONVERTER(convert_item32_1_to_fc64_1_nswap, PRIORITY_CUSTOM){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]); + + const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 16)); + const __m128i zeroi = _mm_setzero_si128(); + + #define convert_item32_1_to_fc64_1_nswap_guts(_al_) \ + for (; i+4 < nsamps; i+=4){ \ + /* load from input */ \ + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ + \ + /* unpack + swap 16-bit pairs */ \ + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ + \ + /* convert and scale */ \ + __m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar); \ + tmpilo = _mm_unpackhi_epi64(tmpilo, zeroi); \ + __m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar); \ + __m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar); \ + tmpihi = _mm_unpackhi_epi64(tmpihi, zeroi); \ + __m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar); \ + \ + /* store to output */ \ + _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+0), tmp0); \ + _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+1), tmp1); \ + _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+2), tmp2); \ + _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+3), tmp3); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + if ((size_t(output) & 0xf) == 0){ + convert_item32_1_to_fc64_1_nswap_guts(_) + } + else{ + convert_item32_1_to_fc64_1_nswap_guts(u_) + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = item32_to_fc64(input[i], scale_factor); + } +} + +DECLARE_CONVERTER(convert_item32_1_to_fc64_1_bswap, PRIORITY_CUSTOM){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]); + + const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 16)); + const __m128i zeroi = _mm_setzero_si128(); + + #define convert_item32_1_to_fc64_1_bswap_guts(_al_) \ + for (; i+4 < nsamps; i+=4){ \ + /* load from input */ \ + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ + \ + /* byteswap + unpack -> byteswap 16 bit words */ \ + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ + \ + /* convert and scale */ \ + __m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar); \ + tmpilo = _mm_unpackhi_epi64(tmpilo, zeroi); \ + __m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar); \ + __m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar); \ + tmpihi = _mm_unpackhi_epi64(tmpihi, zeroi); \ + __m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar); \ + \ + /* store to output */ \ + _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+0), tmp0); \ + _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+1), tmp1); \ + _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+2), tmp2); \ + _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+3), tmp3); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + if ((size_t(output) & 0xf) == 0){ + convert_item32_1_to_fc64_1_bswap_guts(_) + } + else{ + convert_item32_1_to_fc64_1_bswap_guts(u_) + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = item32_to_fc64(uhd::byteswap(input[i]), scale_factor); + } +} diff --git a/host/lib/convert/convert_impl.cpp b/host/lib/convert/convert_impl.cpp new file mode 100644 index 000000000..9b2cdcdc9 --- /dev/null +++ b/host/lib/convert/convert_impl.cpp @@ -0,0 +1,109 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#include <uhd/convert.hpp> +#include <uhd/utils/log.hpp> +#include <uhd/utils/static.hpp> +#include <uhd/exception.hpp> + +using namespace uhd; + +#include "convert_pred.hpp" + +/*********************************************************************** + * Define types for the function tables + **********************************************************************/ +struct fcn_table_entry_type{ + convert::priority_type prio; + convert::function_type fcn; + fcn_table_entry_type(void) + : prio(convert::PRIORITY_EMPTY), fcn(NULL){ + /* NOP */ + } +}; +typedef std::vector<fcn_table_entry_type> fcn_table_type; + +/*********************************************************************** + * Setup the table registry + **********************************************************************/ +UHD_SINGLETON_FCN(fcn_table_type, get_cpu_to_otw_table); +UHD_SINGLETON_FCN(fcn_table_type, get_otw_to_cpu_table); + +fcn_table_type &get_table(dir_type dir){ + switch(dir){ + case DIR_OTW_TO_CPU: return get_otw_to_cpu_table(); + case DIR_CPU_TO_OTW: return get_cpu_to_otw_table(); + } + UHD_THROW_INVALID_CODE_PATH(); +} + +/*********************************************************************** + * The registry functions + **********************************************************************/ +void uhd::convert::register_converter( + const std::string &markup, + function_type fcn, + priority_type prio +){ + //extract the predicate and direction from the markup + dir_type dir; + pred_type pred = make_pred(markup, dir); + + //get a reference to the function table + fcn_table_type &table = get_table(dir); + + //resize the table so that its at least pred+1 + if (table.size() <= pred) table.resize(pred+1); + + //register the function if higher priority + if (table[pred].prio < prio){ + table[pred].fcn = fcn; + table[pred].prio = prio; + } + + //----------------------------------------------------------------// + UHD_LOGV(always) << "register_converter: " << markup << std::endl + << " prio: " << prio << std::endl + << " pred: " << pred << std::endl + << " dir: " << dir << std::endl + << std::endl + ; + //----------------------------------------------------------------// +} + +/*********************************************************************** + * The converter functions + **********************************************************************/ +const convert::function_type &convert::get_converter_cpu_to_otw( + const io_type_t &io_type, + const otw_type_t &otw_type, + size_t num_input_buffs, + size_t num_output_buffs +){ + pred_type pred = make_pred(io_type, otw_type, num_input_buffs, num_output_buffs); + return get_cpu_to_otw_table().at(pred).fcn; +} + +const convert::function_type &convert::get_converter_otw_to_cpu( + const io_type_t &io_type, + const otw_type_t &otw_type, + size_t num_input_buffs, + size_t num_output_buffs +){ + pred_type pred = make_pred(io_type, otw_type, num_input_buffs, num_output_buffs); + return get_otw_to_cpu_table().at(pred).fcn; +} diff --git a/host/lib/convert/convert_orc.orc b/host/lib/convert/convert_orc.orc new file mode 100644 index 000000000..5450bf4db --- /dev/null +++ b/host/lib/convert/convert_orc.orc @@ -0,0 +1,63 @@ +.function _convert_fc32_1_to_item32_1_nswap_orc +.source 8 src +.dest 4 dst +.floatparam 4 scalar +.temp 8 scaled +.temp 8 converted +.temp 4 short +x2 mulf scaled, src, scalar +x2 convfl converted, scaled +x2 convlw short, converted +swapl short, short +x2 swapw dst, short + +.function _convert_fc32_1_to_item32_1_bswap_orc +.source 8 src +.dest 4 dst +.floatparam 4 scalar +.temp 8 scaled +.temp 8 converted +.temp 4 short +x2 mulf scaled, src, scalar +x2 convfl converted, scaled +x2 convlw short, converted +x2 swapw dst, short + +.function _convert_item32_1_to_fc32_1_nswap_orc +.source 4 src +.dest 8 dst +.floatparam 4 scalar +.temp 4 tmp1 +.temp 8 tmp2 +x2 swapw tmp1, src +swapl tmp1, tmp1 +x2 convswl tmp2, tmp1 +x2 convlf tmp2, tmp2 +x2 mulf dst, tmp2, scalar + +.function _convert_item32_1_to_fc32_1_bswap_orc +.source 4 src +.dest 8 dst +.floatparam 4 scalar +.temp 4 tmp1 +.temp 8 tmp2 +x2 swapw tmp1, src +x2 convswl tmp2, tmp1 +x2 convlf tmp2, tmp2 +x2 mulf dst, tmp2, scalar + +.function _convert_sc16_1_to_item32_1_nswap_orc +.source 4 src +.dest 4 dst +.temp 4 tmp +.floatparam 4 scalar +swapl tmp, src +x2 swapw dst, tmp + +.function _convert_item32_1_to_sc16_1_nswap_orc +.source 4 src +.dest 4 dst +.floatparam 4 scalar +.temp 4 tmp +x2 swapw tmp, src +swapl dst, tmp diff --git a/host/lib/convert/convert_with_neon.cpp b/host/lib/convert/convert_with_neon.cpp new file mode 100644 index 000000000..e5f08cad9 --- /dev/null +++ b/host/lib/convert/convert_with_neon.cpp @@ -0,0 +1,61 @@ +// +// Copyright 2011-2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#include "convert_common.hpp" +#include <arm_neon.h> + +using namespace uhd::convert; + +DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){ + const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + size_t i; + + float32x4_t Q0 = vdupq_n_f32(float(scale_factor)); + for (i=0; i < (nsamps & ~0x03); i+=2) { + float32x4_t Q1 = vld1q_f32(reinterpret_cast<const float *>(&input[i])); + float32x4_t Q2 = vmulq_f32(Q1, Q0); + int32x4_t Q3 = vcvtq_s32_f32(Q2); + int16x4_t D8 = vmovn_s32(Q3); + int16x4_t D9 = vrev32_s16(D8); + vst1_s16((reinterpret_cast<int16_t *>(&output[i])), D9); + } + + for (; i < nsamps; i++) + output[i] = fc32_to_item32(input[i], float(scale_factor)); +} + +DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); + + size_t i; + + float32x4_t Q1 = vdupq_n_f32(float(scale_factor)); + for (i=0; i < (nsamps & ~0x03); i+=2) { + int16x4_t D0 = vld1_s16(reinterpret_cast<const int16_t *>(&input[i])); + int16x4_t D1 = vrev32_s16(D0); + int32x4_t Q2 = vmovl_s16(D1); + float32x4_t Q3 = vcvtq_f32_s32(Q2); + float32x4_t Q4 = vmulq_f32(Q3, Q1); + vst1q_f32((reinterpret_cast<float *>(&output[i])), Q4); + } + + for (; i < nsamps; i++) + output[i] = item32_to_fc32(input[i], float(scale_factor)); +} diff --git a/host/lib/convert/convert_with_orc.cpp b/host/lib/convert/convert_with_orc.cpp new file mode 100644 index 000000000..844c2595c --- /dev/null +++ b/host/lib/convert/convert_with_orc.cpp @@ -0,0 +1,54 @@ +// +// Copyright 2011 Ettus Research LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// + +#include "convert_common.hpp" +#include <uhd/utils/byteswap.hpp> + +using namespace uhd::convert; + +extern "C" { +extern void _convert_fc32_1_to_item32_1_nswap_orc(void *, const void *, float, int); +extern void _convert_fc32_1_to_item32_1_bswap_orc(void *, const void *, float, int); +extern void _convert_item32_1_to_fc32_1_nswap_orc(void *, const void *, float, int); +extern void _convert_item32_1_to_fc32_1_bswap_orc(void *, const void *, float, int); +extern void _convert_sc16_1_to_item32_1_nswap_orc(void *, const void *, float, int); +extern void _convert_item32_1_to_sc16_1_nswap_orc(void *, const void *, float, int); +} + +DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_LIBORC){ + _convert_fc32_1_to_item32_1_nswap_orc(outputs[0], inputs[0], scale_factor, nsamps); +} + +DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_LIBORC){ + _convert_fc32_1_to_item32_1_bswap_orc(outputs[0], inputs[0], scale_factor, nsamps); +} + +DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_LIBORC){ + _convert_item32_1_to_fc32_1_nswap_orc(outputs[0], inputs[0], scale_factor, nsamps); +} + +DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_LIBORC){ + _convert_item32_1_to_fc32_1_bswap_orc(outputs[0], inputs[0], scale_factor, nsamps); +} + +DECLARE_CONVERTER(convert_sc16_1_to_item32_1_nswap, PRIORITY_LIBORC){ + _convert_sc16_1_to_item32_1_nswap_orc(outputs[0], inputs[0], scale_factor, nsamps); +} + +DECLARE_CONVERTER(convert_item32_1_to_sc16_1_nswap, PRIORITY_LIBORC){ + _convert_item32_1_to_sc16_1_nswap_orc(outputs[0], inputs[0], scale_factor, nsamps); +} diff --git a/host/lib/convert/gen_convert_general.py b/host/lib/convert/gen_convert_general.py new file mode 100644 index 000000000..8c3138bda --- /dev/null +++ b/host/lib/convert/gen_convert_general.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +# +# Copyright 2011 Ettus Research LLC +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +TMPL_HEADER = """ +#import time +/*********************************************************************** + * This file was generated by $file on $time.strftime("%c") + **********************************************************************/ + +\#include "convert_common.hpp" +\#include <uhd/utils/byteswap.hpp> + +using namespace uhd::convert; +""" + +TMPL_CONV_TO_FROM_ITEM32_1 = """ +DECLARE_CONVERTER(convert_$(cpu_type)_1_to_item32_1_$(swap), PRIORITY_GENERAL){ + const $(cpu_type)_t *input = reinterpret_cast<const $(cpu_type)_t *>(inputs[0]); + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + for (size_t i = 0; i < nsamps; i++){ + output[i] = $(swap_fcn)($(cpu_type)_to_item32(input[i], float(scale_factor))); + } +} + +DECLARE_CONVERTER(convert_item32_1_to_$(cpu_type)_1_$(swap), PRIORITY_GENERAL){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + $(cpu_type)_t *output = reinterpret_cast<$(cpu_type)_t *>(outputs[0]); + + for (size_t i = 0; i < nsamps; i++){ + output[i] = item32_to_$(cpu_type)($(swap_fcn)(input[i]), float(scale_factor)); + } +} +""" +TMPL_CONV_TO_FROM_ITEM32_X = """ +DECLARE_CONVERTER(convert_$(cpu_type)_$(width)_to_item32_1_$(swap), PRIORITY_GENERAL){ + #for $w in range($width) + const $(cpu_type)_t *input$(w) = reinterpret_cast<const $(cpu_type)_t *>(inputs[$(w)]); + #end for + item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); + + for (size_t i = 0, j = 0; i < nsamps; i++){ + #for $w in range($width) + output[j++] = $(swap_fcn)($(cpu_type)_to_item32(input$(w)[i], float(scale_factor))); + #end for + } +} + +DECLARE_CONVERTER(convert_item32_1_to_$(cpu_type)_$(width)_$(swap), PRIORITY_GENERAL){ + const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); + #for $w in range($width) + $(cpu_type)_t *output$(w) = reinterpret_cast<$(cpu_type)_t *>(outputs[$(w)]); + #end for + + for (size_t i = 0, j = 0; i < nsamps; i++){ + #for $w in range($width) + output$(w)[i] = item32_to_$(cpu_type)($(swap_fcn)(input[j++]), float(scale_factor)); + #end for + } +} +""" + +def parse_tmpl(_tmpl_text, **kwargs): + from Cheetah.Template import Template + return str(Template(_tmpl_text, kwargs)) + +if __name__ == '__main__': + import sys, os + file = os.path.basename(__file__) + output = parse_tmpl(TMPL_HEADER, file=file) + for width in 1, 2, 3, 4: + for swap, swap_fcn in (('nswap', ''), ('bswap', 'uhd::byteswap')): + for cpu_type in 'fc64', 'fc32', 'sc16': + output += parse_tmpl( + TMPL_CONV_TO_FROM_ITEM32_1 if width == 1 else TMPL_CONV_TO_FROM_ITEM32_X, + width=width, swap=swap, swap_fcn=swap_fcn, cpu_type=cpu_type + ) + open(sys.argv[1], 'w').write(output) diff --git a/host/lib/convert/gen_convert_pred.py b/host/lib/convert/gen_convert_pred.py new file mode 100644 index 000000000..360fbcf44 --- /dev/null +++ b/host/lib/convert/gen_convert_pred.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python +# +# Copyright 2011-2011 Ettus Research LLC +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +TMPL_TEXT = """ +#import time +/*********************************************************************** + * This file was generated by $file on $time.strftime("%c") + **********************************************************************/ +\#include <uhd/exception.hpp> +\#include <boost/tokenizer.hpp> +\#include <boost/lexical_cast.hpp> +\#include <boost/detail/endian.hpp> +\#include <boost/cstdint.hpp> +\#include <string> +\#include <vector> + +typedef size_t pred_type; +typedef std::vector<pred_type> pred_vector_type; + +enum dir_type{ + DIR_OTW_TO_CPU = 0, + DIR_CPU_TO_OTW = 1 +}; + +struct pred_error : uhd::value_error{ + pred_error(const std::string &what): + uhd::value_error("convert::make_pred: " + what) + { + /* NOP */ + } +}; + +pred_type make_pred(const std::string &markup, dir_type &dir){ + pred_type pred = 0; + + try{ + boost::tokenizer<boost::char_separator<char> > tokenizer(markup, boost::char_separator<char>("_")); + std::vector<std::string> tokens(tokenizer.begin(), tokenizer.end()); + //token 0 is <convert> + std::string inp_type = tokens.at(1); + std::string num_inps = tokens.at(2); + //token 3 is <to> + std::string out_type = tokens.at(4); + std::string num_outs = tokens.at(5); + std::string swap_type = tokens.at(6); + + std::string cpu_type, otw_type; + if (inp_type.find("item") == std::string::npos){ + cpu_type = inp_type; + otw_type = out_type; + dir = DIR_CPU_TO_OTW; + } + else{ + cpu_type = out_type; + otw_type = inp_type; + dir = DIR_OTW_TO_CPU; + } + + if (cpu_type == "fc64") pred |= $ph.fc64_p; + else if (cpu_type == "fc32") pred |= $ph.fc32_p; + else if (cpu_type == "sc16") pred |= $ph.sc16_p; + else if (cpu_type == "sc8") pred |= $ph.sc8_p; + else throw pred_error("unhandled io type " + cpu_type); + + if (otw_type == "item32") pred |= $ph.item32_p; + else throw pred_error("unhandled otw type " + otw_type); + + int num_inputs = boost::lexical_cast<int>(num_inps); + int num_outputs = boost::lexical_cast<int>(num_outs); + + switch(num_inputs*num_outputs){ //FIXME treated as one value + case 1: pred |= $ph.chan1_p; break; + case 2: pred |= $ph.chan2_p; break; + case 3: pred |= $ph.chan3_p; break; + case 4: pred |= $ph.chan4_p; break; + default: throw pred_error("unhandled number of channels"); + } + + if (swap_type == "bswap") pred |= $ph.bswap_p; + else if (swap_type == "nswap") pred |= $ph.nswap_p; + else throw pred_error("unhandled swap type"); + + } + catch(...){ + throw pred_error("could not parse markup: " + markup); + } + + return pred; +} + +#define pred_table_wildcard pred_type(~0) +#define pred_table_max_size size_t(128) +#define pred_table_index(e) (pred_type(e) & 0x7f) + +static pred_vector_type get_pred_byte_order_table(void){ + pred_vector_type table(pred_table_max_size, pred_table_wildcard); + \#ifdef BOOST_BIG_ENDIAN + table[pred_table_index(otw_type_t::BO_BIG_ENDIAN)] = $ph.nswap_p; + table[pred_table_index(otw_type_t::BO_LITTLE_ENDIAN)] = $ph.bswap_p; + \#else + table[pred_table_index(otw_type_t::BO_BIG_ENDIAN)] = $ph.bswap_p; + table[pred_table_index(otw_type_t::BO_LITTLE_ENDIAN)] = $ph.nswap_p; + \#endif + table[pred_table_index(otw_type_t::BO_NATIVE)] = $ph.nswap_p; + return table; +} + +static pred_vector_type get_pred_io_type_table(void){ + pred_vector_type table(pred_table_max_size, pred_table_wildcard); + table[pred_table_index(io_type_t::COMPLEX_FLOAT64)] = $ph.fc64_p; + table[pred_table_index(io_type_t::COMPLEX_FLOAT32)] = $ph.fc32_p; + table[pred_table_index(io_type_t::COMPLEX_INT16)] = $ph.sc16_p; + return table; +} + +static pred_vector_type get_pred_num_io_table(void){ + pred_vector_type table(pred_table_max_size, pred_table_wildcard); + table[1] = $ph.chan1_p; + table[2] = $ph.chan2_p; + table[3] = $ph.chan3_p; + table[4] = $ph.chan4_p; + return table; +} + +UHD_INLINE pred_type make_pred( + const io_type_t &io_type, + const otw_type_t &otw_type, + size_t num_inputs, + size_t num_outputs +){ + pred_type pred = $ph.item32_p; //only item32 supported as of now + + static const pred_vector_type pred_byte_order_table(get_pred_byte_order_table()); + pred |= pred_byte_order_table[pred_table_index(otw_type.byteorder)]; + + static const pred_vector_type pred_io_type_table(get_pred_io_type_table()); + pred |= pred_io_type_table[pred_table_index(io_type.tid)]; + + static const pred_vector_type pred_num_io_table(get_pred_num_io_table()); + pred |= pred_num_io_table[pred_table_index(num_inputs*num_outputs)]; + + if (pred == pred_table_wildcard) throw pred_error( + "unhanded input combination for make_pred()" + ); + + return pred; +} +""" + +def parse_tmpl(_tmpl_text, **kwargs): + from Cheetah.Template import Template + return str(Template(_tmpl_text, kwargs)) + +class ph: + bswap_p = 0b00001 + nswap_p = 0b00000 + item32_p = 0b00000 + sc8_p = 0b00000 + sc16_p = 0b00010 + fc32_p = 0b00100 + fc64_p = 0b00110 + chan1_p = 0b00000 + chan2_p = 0b01000 + chan3_p = 0b10000 + chan4_p = 0b11000 + +if __name__ == '__main__': + import sys, os + file = os.path.basename(__file__) + open(sys.argv[1], 'w').write(parse_tmpl(TMPL_TEXT, file=file, ph=ph)) |