diff options
Diffstat (limited to 'host/lib')
-rw-r--r-- | host/lib/transport/convert_types_impl.hpp | 95 | ||||
-rw-r--r-- | host/lib/usrp/dboard/db_basic_and_lf.cpp | 32 | ||||
-rw-r--r-- | host/lib/usrp/dsp_utils.cpp | 30 | ||||
-rw-r--r-- | host/lib/usrp/usrp2/fw_common.h | 2 | ||||
-rw-r--r-- | host/lib/usrp/usrp2/usrp2_regs.hpp | 56 |
5 files changed, 129 insertions, 86 deletions
diff --git a/host/lib/transport/convert_types_impl.hpp b/host/lib/transport/convert_types_impl.hpp index 5958b08cb..fdc859883 100644 --- a/host/lib/transport/convert_types_impl.hpp +++ b/host/lib/transport/convert_types_impl.hpp @@ -28,6 +28,10 @@ #define USE_EMMINTRIN_H //use sse2 intrinsics #endif +#if defined(USE_EMMINTRIN_H) + #include <emmintrin.h> +#endif + /*********************************************************************** * Typedefs **********************************************************************/ @@ -72,7 +76,7 @@ static UHD_INLINE void item32_to_sc16_bswap( } /*********************************************************************** - * Convert complex float buffer to items32 + * Convert complex float buffer to items32 (no swap) **********************************************************************/ static const float shorts_per_float = float(32767); @@ -82,6 +86,41 @@ static UHD_INLINE item32_t fc32_to_item32(fc32_t num){ return (item32_t(real) << 16) | (item32_t(imag) << 0); } +//////////////////////////////////// +// none-swap +//////////////////////////////////// +#if defined(USE_EMMINTRIN_H) +static UHD_INLINE void fc32_to_item32_nswap( + const fc32_t *input, item32_t *output, size_t nsamps +){ + __m128 scalar = _mm_set_ps1(shorts_per_float); + + //convert blocks of samples with intrinsics + size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ + //load from input + __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); + __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); + + //convert and scale + __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); + __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); + + //pack + swap 16-bit pairs + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); + + //store to output + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = fc32_to_item32(input[i]); + } +} + +#else static UHD_INLINE void fc32_to_item32_nswap( const fc32_t *input, item32_t *output, size_t nsamps ){ @@ -90,9 +129,12 @@ static UHD_INLINE void fc32_to_item32_nswap( } } -#if defined(USE_EMMINTRIN_H) -#include <emmintrin.h> +#endif +//////////////////////////////////// +// byte-swap +//////////////////////////////////// +#if defined(USE_EMMINTRIN_H) static UHD_INLINE void fc32_to_item32_bswap( const fc32_t *input, item32_t *output, size_t nsamps ){ @@ -108,7 +150,7 @@ static UHD_INLINE void fc32_to_item32_bswap( __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); - //pack + byteswap -> byteswap 32 bit words + //pack + byteswap -> byteswap 16 bit words __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); @@ -145,6 +187,43 @@ static UHD_INLINE fc32_t item32_to_fc32(item32_t item){ ); } +//////////////////////////////////// +// none-swap +//////////////////////////////////// +#if defined(USE_EMMINTRIN_H) +static UHD_INLINE void item32_to_fc32_nswap( + const item32_t *input, fc32_t *output, size_t nsamps +){ + __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16)); + __m128i zeroi = _mm_setzero_si128(); + + //convert blocks of samples with intrinsics + size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ + //load from input + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); + + //unpack + swap 16-bit pairs + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); + + //convert and scale + __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); + __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); + + //store to output + _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); + _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); + } + + //convert remainder + for (; i < nsamps; i++){ + output[i] = item32_to_fc32(input[i]); + } +} + +#else static UHD_INLINE void item32_to_fc32_nswap( const item32_t *input, fc32_t *output, size_t nsamps ){ @@ -152,10 +231,12 @@ static UHD_INLINE void item32_to_fc32_nswap( output[i] = item32_to_fc32(input[i]); } } +#endif +//////////////////////////////////// +// byte-swap +//////////////////////////////////// #if defined(USE_EMMINTRIN_H) -#include <emmintrin.h> - static UHD_INLINE void item32_to_fc32_bswap( const item32_t *input, fc32_t *output, size_t nsamps ){ @@ -167,7 +248,7 @@ static UHD_INLINE void item32_to_fc32_bswap( //load from input __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); - //byteswap + unpack -> byteswap 32 bit words + //byteswap + unpack -> byteswap 16 bit words tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); diff --git a/host/lib/usrp/dboard/db_basic_and_lf.cpp b/host/lib/usrp/dboard/db_basic_and_lf.cpp index 0b6e4a75a..2a9bf2ca5 100644 --- a/host/lib/usrp/dboard/db_basic_and_lf.cpp +++ b/host/lib/usrp/dboard/db_basic_and_lf.cpp @@ -57,6 +57,12 @@ private: double _max_freq; }; +static const uhd::dict<std::string, subdev_conn_t> sd_name_to_conn = map_list_of + ("AB", SUBDEV_CONN_COMPLEX_IQ) + ("A", SUBDEV_CONN_REAL_I) + ("B", SUBDEV_CONN_REAL_Q) +; + /*********************************************************************** * Register the basic and LF dboards **********************************************************************/ @@ -77,10 +83,10 @@ static dboard_base::sptr make_lf_tx(dboard_base::ctor_args_t args){ } UHD_STATIC_BLOCK(reg_basic_and_lf_dboards){ - dboard_manager::register_dboard(0x0000, &make_basic_tx, "Basic TX"); - dboard_manager::register_dboard(0x0001, &make_basic_rx, "Basic RX", list_of("AB")("A")("B")); - dboard_manager::register_dboard(0x000e, &make_lf_tx, "LF TX"); - dboard_manager::register_dboard(0x000f, &make_lf_rx, "LF RX", list_of("AB")("A")("B")); + dboard_manager::register_dboard(0x0000, &make_basic_tx, "Basic TX", sd_name_to_conn.keys()); + dboard_manager::register_dboard(0x0001, &make_basic_rx, "Basic RX", sd_name_to_conn.keys()); + dboard_manager::register_dboard(0x000e, &make_lf_tx, "LF TX", sd_name_to_conn.keys()); + dboard_manager::register_dboard(0x000f, &make_lf_rx, "LF RX", sd_name_to_conn.keys()); } /*********************************************************************** @@ -138,14 +144,9 @@ void basic_rx::rx_get(const wax::obj &key_, wax::obj &val){ val = prop_names_t(1, ""); //vector of 1 empty string return; - case SUBDEV_PROP_CONNECTION:{ - static const uhd::dict<std::string, subdev_conn_t> name_to_conn = map_list_of - ("A", SUBDEV_CONN_REAL_I) - ("B", SUBDEV_CONN_REAL_Q) - ("AB", SUBDEV_CONN_COMPLEX_IQ) - ; - val = name_to_conn[get_subdev_name()]; - } return; + case SUBDEV_PROP_CONNECTION: + val = sd_name_to_conn[get_subdev_name()]; + return; case SUBDEV_PROP_USE_LO_OFFSET: val = false; @@ -197,7 +198,10 @@ void basic_tx::tx_get(const wax::obj &key_, wax::obj &val){ //handle the get request conditioned on the key switch(key.as<subdev_prop_t>()){ case SUBDEV_PROP_NAME: - val = get_tx_id().to_pp_string(); + val = std::string(str(boost::format("%s - %s") + % get_tx_id().to_pp_string() + % get_subdev_name() + )); return; case SUBDEV_PROP_OTHERS: @@ -233,7 +237,7 @@ void basic_tx::tx_get(const wax::obj &key_, wax::obj &val){ return; case SUBDEV_PROP_CONNECTION: - val = SUBDEV_CONN_COMPLEX_IQ; + val = sd_name_to_conn[get_subdev_name()]; return; case SUBDEV_PROP_USE_LO_OFFSET: diff --git a/host/lib/usrp/dsp_utils.cpp b/host/lib/usrp/dsp_utils.cpp index fe1313af1..10ae9a086 100644 --- a/host/lib/usrp/dsp_utils.cpp +++ b/host/lib/usrp/dsp_utils.cpp @@ -30,22 +30,36 @@ template <class T> T ceil_log2(T num){ return std::ceil(std::log(num)/std::log(T(2))); } +/*! + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-------------------------------+-------+-------+-------+-------+ + * | | DDC0Q | DDC0I | + * +-------------------------------+-------+-------+-------+-------+ + */ boost::uint32_t dsp_type1::calc_rx_mux_word(subdev_conn_t subdev_conn){ switch(subdev_conn){ - case SUBDEV_CONN_COMPLEX_IQ: return (0x1 << 2) | (0x0 << 0); //DDC0Q=ADC1, DDC0I=ADC0 - case SUBDEV_CONN_COMPLEX_QI: return (0x0 << 2) | (0x1 << 0); //DDC0Q=ADC0, DDC0I=ADC1 - case SUBDEV_CONN_REAL_I: return (0x3 << 2) | (0x0 << 0); //DDC0Q=ZERO, DDC0I=ADC0 - case SUBDEV_CONN_REAL_Q: return (0x1 << 2) | (0x3 << 0); //DDC0Q=ADC1, DDC0I=ZERO + case SUBDEV_CONN_COMPLEX_IQ: return (0x1 << 4) | (0x0 << 0); //DDC0Q=ADC0Q, DDC0I=ADC0I + case SUBDEV_CONN_COMPLEX_QI: return (0x0 << 4) | (0x1 << 0); //DDC0Q=ADC0I, DDC0I=ADC0Q + case SUBDEV_CONN_REAL_I: return (0xf << 4) | (0x0 << 0); //DDC0Q=ZERO, DDC0I=ADC0I + case SUBDEV_CONN_REAL_Q: return (0x1 << 4) | (0xf << 0); //DDC0Q=ADC0Q, DDC0I=ZERO default: UHD_THROW_INVALID_CODE_PATH(); } } +/*! + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-------------------------------+-------+-------+-------+-------+ + * | | DAC0Q | DAC0I | + * +-------------------------------+-------+-------+-------+-------+ + */ boost::uint32_t dsp_type1::calc_tx_mux_word(subdev_conn_t subdev_conn){ switch(subdev_conn){ - case SUBDEV_CONN_COMPLEX_IQ: return (0x1 << 4) | (0x0 << 0); //DAC1=DUC0Q, DAC0=DUC0I - case SUBDEV_CONN_COMPLEX_QI: return (0x0 << 4) | (0x1 << 0); //DAC1=DUC0I, DAC0=DUC0Q - case SUBDEV_CONN_REAL_I: return (0xf << 4) | (0x0 << 0); //DAC1=ZERO, DAC0=DUC0I - case SUBDEV_CONN_REAL_Q: return (0x0 << 4) | (0xf << 0); //DAC1=DUC0I, DAC0=ZERO + case SUBDEV_CONN_COMPLEX_IQ: return (0x1 << 4) | (0x0 << 0); //DAC0Q=DUC0Q, DAC0I=DUC0I + case SUBDEV_CONN_COMPLEX_QI: return (0x0 << 4) | (0x1 << 0); //DAC0Q=DUC0I, DAC0I=DUC0Q + case SUBDEV_CONN_REAL_I: return (0xf << 4) | (0x0 << 0); //DAC0Q=ZERO, DAC0I=DUC0I + case SUBDEV_CONN_REAL_Q: return (0x0 << 4) | (0xf << 0); //DAC0Q=DUC0I, DAC0I=ZERO default: UHD_THROW_INVALID_CODE_PATH(); } } diff --git a/host/lib/usrp/usrp2/fw_common.h b/host/lib/usrp/usrp2/fw_common.h index cc6c41ba7..e812e1221 100644 --- a/host/lib/usrp/usrp2/fw_common.h +++ b/host/lib/usrp/usrp2/fw_common.h @@ -33,7 +33,7 @@ extern "C" { #endif //fpga and firmware compatibility numbers -#define USRP2_FPGA_COMPAT_NUM 1 +#define USRP2_FPGA_COMPAT_NUM 2 #define USRP2_FW_COMPAT_NUM 6 //used to differentiate control packets over data port diff --git a/host/lib/usrp/usrp2/usrp2_regs.hpp b/host/lib/usrp/usrp2/usrp2_regs.hpp index 9d306090b..064ad4e95 100644 --- a/host/lib/usrp/usrp2/usrp2_regs.hpp +++ b/host/lib/usrp/usrp2/usrp2_regs.hpp @@ -124,41 +124,6 @@ #define U2_REG_DSP_TX_FREQ _SR_ADDR(SR_TX_DSP + 0) #define U2_REG_DSP_TX_SCALE_IQ _SR_ADDR(SR_TX_DSP + 1) // {scale_i,scale_q} #define U2_REG_DSP_TX_INTERP_RATE _SR_ADDR(SR_TX_DSP + 2) - - /*! - * \brief output mux configuration. - * - * <pre> - * 3 2 1 - * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - * +-------------------------------+-------+-------+-------+-------+ - * | | DAC1 | DAC0 | - * +-------------------------------+-------+-------+-------+-------+ - * - * There are N DUCs (1 now) with complex inputs and outputs. - * There are two DACs. - * - * Each 4-bit DACx field specifies the source for the DAC - * Each subfield is coded like this: - * - * 3 2 1 0 - * +-------+ - * | N | - * +-------+ - * - * N specifies which DUC output is connected to this DAC. - * - * N which interp output - * --- ------------------- - * 0 DUC 0 I - * 1 DUC 0 Q - * 2 DUC 1 I - * 3 DUC 1 Q - * F All Zeros - * - * The default value is 0x10 - * </pre> - */ #define U2_REG_DSP_TX_MUX _SR_ADDR(SR_TX_DSP + 4) ///////////////////////////////////////////////// @@ -170,27 +135,6 @@ #define U2_REG_DSP_RX_DCOFFSET_I _SR_ADDR(SR_RX_DSP + 3) // Bit 31 high sets fixed offset mode, using lower 14 bits, // otherwise it is automatic #define U2_REG_DSP_RX_DCOFFSET_Q _SR_ADDR(SR_RX_DSP + 4) // Bit 31 high sets fixed offset mode, using lower 14 bits - /*! - * \brief input mux configuration. - * - * This determines which ADC (or constant zero) is connected to - * each DDC input. There are N DDCs (1 now). Each has two inputs. - * - * <pre> - * Mux value: - * - * 3 2 1 - * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | |Q0 |I0 | - * +-------+-------+-------+-------+-------+-------+-------+-------+ - * - * Each 2-bit I field is either 00 (A/D A), 01 (A/D B) or 1X (const zero) - * Each 2-bit Q field is either 00 (A/D A), 01 (A/D B) or 1X (const zero) - * - * The default value is 0x4 - * </pre> - */ #define U2_REG_DSP_RX_MUX _SR_ADDR(SR_RX_DSP + 5) // called adc_mux in dsp_core_rx.v //////////////////////////////////////////////// |