diff options
-rw-r--r-- | host/lib/convert/convert_with_neon.cpp | 1 | ||||
-rw-r--r-- | host/lib/convert/sse2_fc32_to_sc8.cpp | 9 | ||||
-rw-r--r-- | host/lib/convert/sse2_sc8_to_fc32.cpp | 9 | ||||
-rw-r--r-- | host/lib/convert/sse2_sc8_to_fc64.cpp | 4 |
4 files changed, 15 insertions, 8 deletions
diff --git a/host/lib/convert/convert_with_neon.cpp b/host/lib/convert/convert_with_neon.cpp index ad184e1b6..e994d97a6 100644 --- a/host/lib/convert/convert_with_neon.cpp +++ b/host/lib/convert/convert_with_neon.cpp @@ -16,6 +16,7 @@ // #include "convert_common.hpp" +#include <uhd/utils/byteswap.hpp> #include <arm_neon.h> using namespace uhd::convert; diff --git a/host/lib/convert/sse2_fc32_to_sc8.cpp b/host/lib/convert/sse2_fc32_to_sc8.cpp index 72bbc0da5..dd884640d 100644 --- a/host/lib/convert/sse2_fc32_to_sc8.cpp +++ b/host/lib/convert/sse2_fc32_to_sc8.cpp @@ -21,10 +21,11 @@ using namespace uhd::convert; +template <const int shuf> UHD_INLINE __m128i pack_sc32_4x( const __m128 &in0, const __m128 &in1, const __m128 &in2, const __m128 &in3, - const __m128 &scalar, const int shuf + const __m128 &scalar ){ __m128i tmpi0 = _mm_cvtps_epi32(_mm_mul_ps(in0, scalar)); tmpi0 = _mm_shuffle_epi32(tmpi0, shuf); @@ -46,6 +47,7 @@ DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_SIMD){ item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); const __m128 scalar = _mm_set_ps1(float(scale_factor)); + const int shuf = _MM_SHUFFLE(1, 0, 3, 2); #define convert_fc32_1_to_sc8_item32_1_bswap_guts(_al_) \ for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){ \ @@ -56,7 +58,7 @@ DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_SIMD){ __m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \ \ /* convert */ \ - const __m128i tmpi = pack_sc32_4x(tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(1, 0, 3, 2)); \ + const __m128i tmpi = pack_sc32_4x<shuf>(tmp0, tmp1, tmp2, tmp3, scalar); \ \ /* store to output */ \ _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi); \ @@ -81,6 +83,7 @@ DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_SIMD){ item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); const __m128 scalar = _mm_set_ps1(float(scale_factor)); + const int shuf = _MM_SHUFFLE(2, 3, 0, 1); #define convert_fc32_1_to_sc8_item32_1_nswap_guts(_al_) \ for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){ \ @@ -91,7 +94,7 @@ DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_SIMD){ __m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \ \ /* convert */ \ - const __m128i tmpi = pack_sc32_4x(tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(2, 3, 0, 1)); \ + const __m128i tmpi = pack_sc32_4x<shuf>(tmp0, tmp1, tmp2, tmp3, scalar); \ \ /* store to output */ \ _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi); \ diff --git a/host/lib/convert/sse2_sc8_to_fc32.cpp b/host/lib/convert/sse2_sc8_to_fc32.cpp index 61ab7d26d..c0e561814 100644 --- a/host/lib/convert/sse2_sc8_to_fc32.cpp +++ b/host/lib/convert/sse2_sc8_to_fc32.cpp @@ -23,11 +23,12 @@ using namespace uhd::convert; static const __m128i zeroi = _mm_setzero_si128(); +template <const int shuf> UHD_INLINE void unpack_sc32_4x( const __m128i &in, __m128 &out0, __m128 &out1, __m128 &out2, __m128 &out3, - const __m128 &scalar, const int shuf + const __m128 &scalar ){ const __m128i tmplo = _mm_unpacklo_epi8(zeroi, in); /* value in upper 8 bits */ __m128i tmp0 = _mm_shuffle_epi32(_mm_unpacklo_epi16(zeroi, tmplo), shuf); /* value in upper 16 bits */ @@ -47,6 +48,7 @@ DECLARE_CONVERTER(sc8_item32_be, 1, fc32, 1, PRIORITY_SIMD){ fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 24)); + const int shuf = _MM_SHUFFLE(1, 0, 3, 2); size_t i = 0, j = 0; fc32_t dummy; @@ -64,7 +66,7 @@ DECLARE_CONVERTER(sc8_item32_be, 1, fc32, 1, PRIORITY_SIMD){ \ /* unpack + swap 8-bit pairs */ \ __m128 tmp0, tmp1, tmp2, tmp3; \ - unpack_sc32_4x(tmpi, tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(1, 0, 3, 2)); \ + unpack_sc32_4x<shuf>(tmpi, tmp0, tmp1, tmp2, tmp3, scalar); \ \ /* store to output */ \ _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+0), tmp0); \ @@ -90,6 +92,7 @@ DECLARE_CONVERTER(sc8_item32_le, 1, fc32, 1, PRIORITY_SIMD){ fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 24)); + const int shuf = _MM_SHUFFLE(2, 3, 0, 1); size_t i = 0, j = 0; fc32_t dummy; @@ -107,7 +110,7 @@ DECLARE_CONVERTER(sc8_item32_le, 1, fc32, 1, PRIORITY_SIMD){ \ /* unpack + swap 8-bit pairs */ \ __m128 tmp0, tmp1, tmp2, tmp3; \ - unpack_sc32_4x(tmpi, tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(2, 3, 0, 1)); \ + unpack_sc32_4x<shuf>(tmpi, tmp0, tmp1, tmp2, tmp3, scalar); \ \ /* store to output */ \ _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+0), tmp0); \ diff --git a/host/lib/convert/sse2_sc8_to_fc64.cpp b/host/lib/convert/sse2_sc8_to_fc64.cpp index aa2010d4e..ef9c0fdb4 100644 --- a/host/lib/convert/sse2_sc8_to_fc64.cpp +++ b/host/lib/convert/sse2_sc8_to_fc64.cpp @@ -29,9 +29,9 @@ UHD_INLINE void unpack_sc32_8x( __m128d &out2, __m128d &out3, __m128d &out4, __m128d &out5, __m128d &out6, __m128d &out7, - const __m128d &scalar, - const int shuf = _MM_SHUFFLE(1, 0, 3, 2) + const __m128d &scalar ){ + const int shuf = _MM_SHUFFLE(1, 0, 3, 2); __m128i tmp; const __m128i tmplo = _mm_unpacklo_epi8(zeroi, in); /* value in upper 8 bits */ |