aboutsummaryrefslogtreecommitdiffstats
path: root/host/lib/convert/sse2_sc16_to_sc16.cpp
diff options
context:
space:
mode:
authorMartin Braun <martin.braun@ettus.com>2019-07-18 15:36:11 -0700
committerMartin Braun <martin.braun@ettus.com>2019-11-26 11:49:10 -0800
commit9df26a9d89ef8fb50a667428066f3ef1732245c9 (patch)
treeaa8aa5adf1c40e0aecb3e45a527511af96e05ca1 /host/lib/convert/sse2_sc16_to_sc16.cpp
parentfed32af0806a730e0f4202003dc49cb736c832fb (diff)
downloaduhd-9df26a9d89ef8fb50a667428066f3ef1732245c9.tar.gz
uhd-9df26a9d89ef8fb50a667428066f3ef1732245c9.tar.bz2
uhd-9df26a9d89ef8fb50a667428066f3ef1732245c9.zip
convert: sse2: Apply clang-format
Diffstat (limited to 'host/lib/convert/sse2_sc16_to_sc16.cpp')
-rw-r--r--host/lib/convert/sse2_sc16_to_sc16.cpp240
1 files changed, 130 insertions, 110 deletions
diff --git a/host/lib/convert/sse2_sc16_to_sc16.cpp b/host/lib/convert/sse2_sc16_to_sc16.cpp
index 5c81f357b..e484bee31 100644
--- a/host/lib/convert/sse2_sc16_to_sc16.cpp
+++ b/host/lib/convert/sse2_sc16_to_sc16.cpp
@@ -25,20 +25,20 @@ using namespace uhd::convert;
// | C | D | A | B | Output
// -----------------
//
-#define CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_ialign_,_oalign_) \
- for (; i+3 < nsamps; i+=4) { \
- __m128i m0; \
- \
- /* load from input */ \
- m0 = _mm_load ## _ialign_ ## si128((const __m128i *) (input+i));\
- \
- /* swap 16-bit pairs */ \
- m0 = _mm_shufflelo_epi16(m0, _MM_SHUFFLE(2, 3, 0, 1)); \
- m0 = _mm_shufflehi_epi16(m0, _MM_SHUFFLE(2, 3, 0, 1)); \
- \
- /* store to output */ \
- _mm_store ## _oalign_ ## si128((__m128i *) (output+i), m0); \
- } \
+#define CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_ialign_, _oalign_) \
+ for (; i + 3 < nsamps; i += 4) { \
+ __m128i m0; \
+ \
+ /* load from input */ \
+ m0 = _mm_load##_ialign_##si128((const __m128i*)(input + i)); \
+ \
+ /* swap 16-bit pairs */ \
+ m0 = _mm_shufflelo_epi16(m0, _MM_SHUFFLE(2, 3, 0, 1)); \
+ m0 = _mm_shufflehi_epi16(m0, _MM_SHUFFLE(2, 3, 0, 1)); \
+ \
+ /* store to output */ \
+ _mm_store##_oalign_##si128((__m128i*)(output + i), m0); \
+ }
//
// SSE byte swap
@@ -54,138 +54,158 @@ using namespace uhd::convert;
// | B | A | D | C | Output
// -----------------
//
-#define CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_ialign_,_oalign_) \
- for (; i+3 < nsamps; i+=4) { \
- __m128i m0, m1, m2; \
- \
- /* load from input */ \
- m0 = _mm_load ## _ialign_ ## si128((const __m128i *) (input+i));\
- \
- /* byteswap 16 bit words */ \
- m1 = _mm_srli_epi16(m0, 8); \
- m2 = _mm_slli_epi16(m0, 8); \
- m0 = _mm_or_si128(m1, m2); \
- \
- /* store to output */ \
- _mm_store ## _oalign_ ## si128((__m128i *) (output+i), m0); \
- } \
-
-DECLARE_CONVERTER(sc16, 1, sc16_item32_le, 1, PRIORITY_SIMD){
- const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]);
- item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+#define CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_ialign_, _oalign_) \
+ for (; i + 3 < nsamps; i += 4) { \
+ __m128i m0, m1, m2; \
+ \
+ /* load from input */ \
+ m0 = _mm_load##_ialign_##si128((const __m128i*)(input + i)); \
+ \
+ /* byteswap 16 bit words */ \
+ m1 = _mm_srli_epi16(m0, 8); \
+ m2 = _mm_slli_epi16(m0, 8); \
+ m0 = _mm_or_si128(m1, m2); \
+ \
+ /* store to output */ \
+ _mm_store##_oalign_##si128((__m128i*)(output + i), m0); \
+ }
+
+DECLARE_CONVERTER(sc16, 1, sc16_item32_le, 1, PRIORITY_SIMD)
+{
+ const sc16_t* input = reinterpret_cast<const sc16_t*>(inputs[0]);
+ item32_t* output = reinterpret_cast<item32_t*>(outputs[0]);
size_t i = 0;
// need to dispatch according to alignment for fastest conversion
- switch (size_t(input) & 0xf){
- case 0x0:
- // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
- CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_,u_)
- break;
- case 0x8:
- if (nsamps < 2)
+ switch (size_t(input) & 0xf) {
+ case 0x0:
+ // the data is 16-byte aligned, so do the fast processing of the bulk of the
+ // samples
+ CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_, u_)
+ break;
+ case 0x8:
+ if (nsamps < 2)
+ break;
+ // the first sample is 8-byte aligned - process it to align the remainder of
+ // the samples to 16-bytes
+ xx_to_item32_sc16<uhd::htowx>(input, output, 2, 1.0);
+ i += 2;
+ CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_, u_)
+ // do faster processing of the bulk of the samples now that we are 16-byte
+ // aligned
break;
- // the first sample is 8-byte aligned - process it to align the remainder of the samples to 16-bytes
- xx_to_item32_sc16<uhd::htowx>(input, output, 2, 1.0);
- i += 2;
- CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_,u_)
- // do faster processing of the bulk of the samples now that we are 16-byte aligned
- break;
- default:
- // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load
- CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_,u_)
+ default:
+ // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+ // load
+ CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_, u_)
}
// convert any remaining samples
- xx_to_item32_sc16<uhd::htowx>(input+i, output+i, nsamps-i, 1.0);
+ xx_to_item32_sc16<uhd::htowx>(input + i, output + i, nsamps - i, 1.0);
}
-DECLARE_CONVERTER(sc16, 1, sc16_item32_be, 1, PRIORITY_SIMD){
- const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]);
- item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+DECLARE_CONVERTER(sc16, 1, sc16_item32_be, 1, PRIORITY_SIMD)
+{
+ const sc16_t* input = reinterpret_cast<const sc16_t*>(inputs[0]);
+ item32_t* output = reinterpret_cast<item32_t*>(outputs[0]);
size_t i = 0;
// need to dispatch according to alignment for fastest conversion
- switch (size_t(input) & 0xf){
- case 0x0:
- // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
- CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_,u_)
- break;
- case 0x8:
- if (nsamps < 2)
+ switch (size_t(input) & 0xf) {
+ case 0x0:
+ // the data is 16-byte aligned, so do the fast processing of the bulk of the
+ // samples
+ CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_, u_)
break;
- // the first value is 8-byte aligned - process it and prepare the bulk of the data for fast conversion
- xx_to_item32_sc16<uhd::htonx>(input, output, 2, 1.0);
- i += 2;
- // do faster processing of the remaining samples now that we are 16-byte aligned
- CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_,u_)
- break;
- default:
- // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load
- CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_,u_)
+ case 0x8:
+ if (nsamps < 2)
+ break;
+ // the first value is 8-byte aligned - process it and prepare the bulk of the
+ // data for fast conversion
+ xx_to_item32_sc16<uhd::htonx>(input, output, 2, 1.0);
+ i += 2;
+ // do faster processing of the remaining samples now that we are 16-byte
+ // aligned
+ CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_, u_)
+ break;
+ default:
+ // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+ // load
+ CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_, u_)
}
// convert any remaining samples
- xx_to_item32_sc16<uhd::htonx>(input+i, output+i, nsamps-i, 1.0);
+ xx_to_item32_sc16<uhd::htonx>(input + i, output + i, nsamps - i, 1.0);
}
-DECLARE_CONVERTER(sc16_item32_le, 1, sc16, 1, PRIORITY_SIMD){
- const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
- sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]);
+DECLARE_CONVERTER(sc16_item32_le, 1, sc16, 1, PRIORITY_SIMD)
+{
+ const item32_t* input = reinterpret_cast<const item32_t*>(inputs[0]);
+ sc16_t* output = reinterpret_cast<sc16_t*>(outputs[0]);
size_t i = 0;
// need to dispatch according to alignment for fastest conversion
- switch (size_t(output) & 0xf){
- case 0x0:
- // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
- CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_,_)
- break;
- case 0x8:
- if (nsamps < 2)
+ switch (size_t(output) & 0xf) {
+ case 0x0:
+ // the data is 16-byte aligned, so do the fast processing of the bulk of the
+ // samples
+ CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_, _)
+ break;
+ case 0x8:
+ if (nsamps < 2)
+ break;
+ // the first sample is 8-byte aligned - process it to align the remainder of
+ // the samples to 16-bytes
+ item32_sc16_to_xx<uhd::htowx>(input, output, 2, 1.0);
+ i += 2;
+ // do faster processing of the bulk of the samples now that we are 16-byte
+ // aligned
+ CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_, _)
break;
- // the first sample is 8-byte aligned - process it to align the remainder of the samples to 16-bytes
- item32_sc16_to_xx<uhd::htowx>(input, output, 2, 1.0);
- i += 2;
- // do faster processing of the bulk of the samples now that we are 16-byte aligned
- CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_,_)
- break;
- default:
- // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load and store
- CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_,u_)
+ default:
+ // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+ // load and store
+ CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_, u_)
}
// convert any remaining samples
- item32_sc16_to_xx<uhd::htowx>(input+i, output+i, nsamps-i, 1.0);
+ item32_sc16_to_xx<uhd::htowx>(input + i, output + i, nsamps - i, 1.0);
}
-DECLARE_CONVERTER(sc16_item32_be, 1, sc16, 1, PRIORITY_SIMD){
- const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
- sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]);
+DECLARE_CONVERTER(sc16_item32_be, 1, sc16, 1, PRIORITY_SIMD)
+{
+ const item32_t* input = reinterpret_cast<const item32_t*>(inputs[0]);
+ sc16_t* output = reinterpret_cast<sc16_t*>(outputs[0]);
size_t i = 0;
// need to dispatch according to alignment for fastest conversion
- switch (size_t(output) & 0xf){
- case 0x0:
- // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
- CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_,_)
- break;
- case 0x8:
- if (nsamps < 2)
+ switch (size_t(output) & 0xf) {
+ case 0x0:
+ // the data is 16-byte aligned, so do the fast processing of the bulk of the
+ // samples
+ CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_, _)
+ break;
+ case 0x8:
+ if (nsamps < 2)
+ break;
+ // the first sample is 8-byte aligned - process it to align the remainder of
+ // the samples to 16-bytes
+ item32_sc16_to_xx<uhd::htonx>(input, output, 2, 1.0);
+ i += 2;
+ // do faster processing of the bulk of the samples now that we are 16-byte
+ // aligned
+ CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_, _)
break;
- // the first sample is 8-byte aligned - process it to align the remainder of the samples to 16-bytes
- item32_sc16_to_xx<uhd::htonx>(input, output, 2, 1.0);
- i += 2;
- // do faster processing of the bulk of the samples now that we are 16-byte aligned
- CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_,_)
- break;
- default:
- // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load and store
- CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_,u_)
+ default:
+ // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+ // load and store
+ CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_, u_)
}
// convert any remaining samples
- item32_sc16_to_xx<uhd::htonx>(input+i, output+i, nsamps-i, 1.0);
+ item32_sc16_to_xx<uhd::htonx>(input + i, output + i, nsamps - i, 1.0);
}