diff options
| author | Josh Blum <josh@joshknows.com> | 2012-04-27 18:49:33 -0700 | 
|---|---|---|
| committer | Josh Blum <josh@joshknows.com> | 2012-05-09 23:54:03 -0700 | 
| commit | 4fa889c4f972120d56fdc1308ba5928fb692a305 (patch) | |
| tree | 02fb911508d17f6c870cbc83ccb21cccacd8c812 | |
| parent | 4c244c78a75646ad4040fb328fa64bb752852512 (diff) | |
| download | uhd-4fa889c4f972120d56fdc1308ba5928fb692a305.tar.gz uhd-4fa889c4f972120d56fdc1308ba5928fb692a305.tar.bz2 uhd-4fa889c4f972120d56fdc1308ba5928fb692a305.zip | |
convert: some platform specific fixes
neon: missing header
windows: shuffle needs to be deemed "constant expression",
fixed with a template parameter.
| -rw-r--r-- | host/lib/convert/convert_with_neon.cpp | 1 | ||||
| -rw-r--r-- | host/lib/convert/sse2_fc32_to_sc8.cpp | 9 | ||||
| -rw-r--r-- | host/lib/convert/sse2_sc8_to_fc32.cpp | 9 | ||||
| -rw-r--r-- | host/lib/convert/sse2_sc8_to_fc64.cpp | 4 | 
4 files changed, 15 insertions, 8 deletions
| diff --git a/host/lib/convert/convert_with_neon.cpp b/host/lib/convert/convert_with_neon.cpp index ad184e1b6..e994d97a6 100644 --- a/host/lib/convert/convert_with_neon.cpp +++ b/host/lib/convert/convert_with_neon.cpp @@ -16,6 +16,7 @@  //  #include "convert_common.hpp" +#include <uhd/utils/byteswap.hpp>  #include <arm_neon.h>  using namespace uhd::convert; diff --git a/host/lib/convert/sse2_fc32_to_sc8.cpp b/host/lib/convert/sse2_fc32_to_sc8.cpp index 72bbc0da5..dd884640d 100644 --- a/host/lib/convert/sse2_fc32_to_sc8.cpp +++ b/host/lib/convert/sse2_fc32_to_sc8.cpp @@ -21,10 +21,11 @@  using namespace uhd::convert; +template <const int shuf>  UHD_INLINE __m128i pack_sc32_4x(      const __m128 &in0, const __m128 &in1,      const __m128 &in2, const __m128 &in3, -    const __m128 &scalar, const int shuf +    const __m128 &scalar  ){      __m128i tmpi0 = _mm_cvtps_epi32(_mm_mul_ps(in0, scalar));      tmpi0 = _mm_shuffle_epi32(tmpi0, shuf); @@ -46,6 +47,7 @@ DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_SIMD){      item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);      const __m128 scalar = _mm_set_ps1(float(scale_factor)); +    const int shuf = _MM_SHUFFLE(1, 0, 3, 2);      #define convert_fc32_1_to_sc8_item32_1_bswap_guts(_al_)             \      for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \ @@ -56,7 +58,7 @@ DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_SIMD){          __m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \                                                                          \          /* convert */                                                   \ -        const __m128i tmpi = pack_sc32_4x(tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(1, 0, 3, 2)); \ +        const __m128i tmpi = pack_sc32_4x<shuf>(tmp0, tmp1, tmp2, tmp3, scalar); \                                                                          \          /* store to output */                                           \          _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \ @@ -81,6 +83,7 @@ DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_SIMD){      item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);      const __m128 scalar = _mm_set_ps1(float(scale_factor)); +    const int shuf = _MM_SHUFFLE(2, 3, 0, 1);      #define convert_fc32_1_to_sc8_item32_1_nswap_guts(_al_)             \      for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \ @@ -91,7 +94,7 @@ DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_SIMD){          __m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \                                                                          \          /* convert */                                                   \ -        const __m128i tmpi = pack_sc32_4x(tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(2, 3, 0, 1)); \ +        const __m128i tmpi = pack_sc32_4x<shuf>(tmp0, tmp1, tmp2, tmp3, scalar); \                                                                          \          /* store to output */                                           \          _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \ diff --git a/host/lib/convert/sse2_sc8_to_fc32.cpp b/host/lib/convert/sse2_sc8_to_fc32.cpp index 61ab7d26d..c0e561814 100644 --- a/host/lib/convert/sse2_sc8_to_fc32.cpp +++ b/host/lib/convert/sse2_sc8_to_fc32.cpp @@ -23,11 +23,12 @@ using namespace uhd::convert;  static const __m128i zeroi = _mm_setzero_si128(); +template <const int shuf>  UHD_INLINE void unpack_sc32_4x(      const __m128i &in,      __m128 &out0, __m128 &out1,      __m128 &out2, __m128 &out3, -    const __m128 &scalar, const int shuf +    const __m128 &scalar  ){      const __m128i tmplo = _mm_unpacklo_epi8(zeroi, in); /* value in upper 8 bits */      __m128i tmp0 = _mm_shuffle_epi32(_mm_unpacklo_epi16(zeroi, tmplo), shuf); /* value in upper 16 bits */ @@ -47,6 +48,7 @@ DECLARE_CONVERTER(sc8_item32_be, 1, fc32, 1, PRIORITY_SIMD){      fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);      const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 24)); +    const int shuf = _MM_SHUFFLE(1, 0, 3, 2);      size_t i = 0, j = 0;      fc32_t dummy; @@ -64,7 +66,7 @@ DECLARE_CONVERTER(sc8_item32_be, 1, fc32, 1, PRIORITY_SIMD){                                                                          \          /* unpack + swap 8-bit pairs */                                 \          __m128 tmp0, tmp1, tmp2, tmp3;                                  \ -        unpack_sc32_4x(tmpi, tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(1, 0, 3, 2)); \ +        unpack_sc32_4x<shuf>(tmpi, tmp0, tmp1, tmp2, tmp3, scalar); \                                                                          \          /* store to output */                                           \          _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+0), tmp0); \ @@ -90,6 +92,7 @@ DECLARE_CONVERTER(sc8_item32_le, 1, fc32, 1, PRIORITY_SIMD){      fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);      const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 24)); +    const int shuf = _MM_SHUFFLE(2, 3, 0, 1);      size_t i = 0, j = 0;      fc32_t dummy; @@ -107,7 +110,7 @@ DECLARE_CONVERTER(sc8_item32_le, 1, fc32, 1, PRIORITY_SIMD){                                                                          \          /* unpack + swap 8-bit pairs */                                 \          __m128 tmp0, tmp1, tmp2, tmp3;                                  \ -        unpack_sc32_4x(tmpi, tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(2, 3, 0, 1)); \ +        unpack_sc32_4x<shuf>(tmpi, tmp0, tmp1, tmp2, tmp3, scalar); \                                                                          \          /* store to output */                                           \          _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+0), tmp0); \ diff --git a/host/lib/convert/sse2_sc8_to_fc64.cpp b/host/lib/convert/sse2_sc8_to_fc64.cpp index aa2010d4e..ef9c0fdb4 100644 --- a/host/lib/convert/sse2_sc8_to_fc64.cpp +++ b/host/lib/convert/sse2_sc8_to_fc64.cpp @@ -29,9 +29,9 @@ UHD_INLINE void unpack_sc32_8x(      __m128d &out2, __m128d &out3,      __m128d &out4, __m128d &out5,      __m128d &out6, __m128d &out7, -    const __m128d &scalar, -    const int shuf = _MM_SHUFFLE(1, 0, 3, 2) +    const __m128d &scalar  ){ +    const int shuf = _MM_SHUFFLE(1, 0, 3, 2);      __m128i tmp;      const __m128i tmplo = _mm_unpacklo_epi8(zeroi, in); /* value in upper 8 bits */ | 
