convert: sse2: Apply clang-format

author: Martin Braun <martin.braun@ettus.com> 2019-07-18 15:36:11 -0700
committer: Martin Braun <martin.braun@ettus.com> 2019-11-26 11:49:10 -0800
commit: 9df26a9d89ef8fb50a667428066f3ef1732245c9 (patch)
tree: aa8aa5adf1c40e0aecb3e45a527511af96e05ca1 /host/lib/convert
parent: fed32af0806a730e0f4202003dc49cb736c832fb (diff)
download: uhd-9df26a9d89ef8fb50a667428066f3ef1732245c9.tar.gz
uhd-9df26a9d89ef8fb50a667428066f3ef1732245c9.tar.bz2
uhd-9df26a9d89ef8fb50a667428066f3ef1732245c9.zip
9 files changed, 742 insertions, 679 deletions
diff --git a/host/lib/convert/sse2_fc32_to_sc16.cpp b/host/lib/convert/sse2_fc32_to_sc16.cpp
index f562074c6..2d1f853b9 100644
--- a/host/lib/convert/sse2_fc32_to_sc16.cpp
+++ b/host/lib/convert/sse2_fc32_to_sc16.cpp
@@ -1,6 +1,7 @@
 //
 // Copyright 2011-2012 Ettus Research LLC
 // Copyright 2018 Ettus Research, a National Instruments Company
+// Copyright 2019 Ettus Research, a National Instruments Brand
 //
 // SPDX-License-Identifier: GPL-3.0-or-later
 //
@@ -11,101 +12,111 @@
 
 using namespace uhd::convert;
 
-DECLARE_CONVERTER(fc32, 1, sc16_item32_le, 1, PRIORITY_SIMD){
-    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+DECLARE_CONVERTER(fc32, 1, sc16_item32_le, 1, PRIORITY_SIMD)
+{
+    const fc32_t* input = reinterpret_cast<const fc32_t*>(inputs[0]);
+    item32_t* output    = reinterpret_cast<item32_t*>(outputs[0]);
 
     const __m128 scalar = _mm_set_ps1(float(scale_factor));
 
-    // this macro converts values faster by using SSE intrinsics to convert 4 values at a time
-    #define convert_fc32_1_to_item32_1_nswap_guts(_al_)                 \
-    for (; i+3 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
-        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
-                                                                        \
-        /* convert and scale */                                         \
-        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
-        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \
-                                                                        \
-        /* pack + swap 16-bit pairs */                                  \
-        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
-        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
-        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
-                                                                        \
-        /* store to output */                                           \
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
-    }                                                                   \
+// this macro converts values faster by using SSE intrinsics to convert 4 values at a time
+#define convert_fc32_1_to_item32_1_nswap_guts(_al_)                            \
+    for (; i + 3 < nsamps; i += 4) {                                           \
+        /* load from input */                                                  \
+        __m128 tmplo =                                                         \
+            _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 0)); \
+        __m128 tmphi =                                                         \
+            _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 2)); \
+                                                                               \
+        /* convert and scale */                                                \
+        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));           \
+        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));           \
+                                                                               \
+        /* pack + swap 16-bit pairs */                                         \
+        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                        \
+        tmpi         = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));     \
+        tmpi         = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));     \
+                                                                               \
+        /* store to output */                                                  \
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output + i), tmpi);        \
+    }
 
     size_t i = 0;
 
     // need to dispatch according to alignment for fastest conversion
-    switch (size_t(input) & 0xf){
-    case 0x0:
-        // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
-        convert_fc32_1_to_item32_1_nswap_guts(_)
-        break;
-    case 0x8:
-        // the first sample is 8-byte aligned - process it to align the remainder of the samples to 16-bytes
-        xx_to_item32_sc16<uhd::htowx>(input, output, 1, scale_factor);
-        i++;
-        // do faster processing of the bulk of the samples now that we are 16-byte aligned
-        convert_fc32_1_to_item32_1_nswap_guts(_)
-        break;
-    default:
-        // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load
-        convert_fc32_1_to_item32_1_nswap_guts(u_)
+    switch (size_t(input) & 0xf) {
+        case 0x0:
+            // the data is 16-byte aligned, so do the fast processing of the bulk of the
+            // samples
+            convert_fc32_1_to_item32_1_nswap_guts(_) break;
+        case 0x8:
+            // the first sample is 8-byte aligned - process it to align the remainder of
+            // the samples to 16-bytes
+            xx_to_item32_sc16<uhd::htowx>(input, output, 1, scale_factor);
+            i++;
+            // do faster processing of the bulk of the samples now that we are 16-byte
+            // aligned
+            convert_fc32_1_to_item32_1_nswap_guts(_) break;
+        default:
+            // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+            // load
+            convert_fc32_1_to_item32_1_nswap_guts(u_)
     }
 
     // convert any remaining samples
-    xx_to_item32_sc16<uhd::htowx>(input+i, output+i, nsamps-i, scale_factor);
+    xx_to_item32_sc16<uhd::htowx>(input + i, output + i, nsamps - i, scale_factor);
 }
 
-DECLARE_CONVERTER(fc32, 1, sc16_item32_be, 1, PRIORITY_SIMD){
-    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+DECLARE_CONVERTER(fc32, 1, sc16_item32_be, 1, PRIORITY_SIMD)
+{
+    const fc32_t* input = reinterpret_cast<const fc32_t*>(inputs[0]);
+    item32_t* output    = reinterpret_cast<item32_t*>(outputs[0]);
 
     const __m128 scalar = _mm_set_ps1(float(scale_factor));
 
-    // this macro converts values faster by using SSE intrinsics to convert 4 values at a time
-    #define convert_fc32_1_to_item32_1_bswap_guts(_al_)                 \
-    for (; i+3 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
-        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
-                                                                        \
-        /* convert and scale */                                         \
-        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
-        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \
-                                                                        \
-        /* pack + byteswap -> byteswap 16 bit words */                  \
-        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
-        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
-                                                                        \
-        /* store to output */                                           \
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
-    }                                                                   \
+// this macro converts values faster by using SSE intrinsics to convert 4 values at a time
+#define convert_fc32_1_to_item32_1_bswap_guts(_al_)                                    \
+    for (; i + 3 < nsamps; i += 4) {                                                   \
+        /* load from input */                                                          \
+        __m128 tmplo =                                                                 \
+            _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 0));         \
+        __m128 tmphi =                                                                 \
+            _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 2));         \
+                                                                                       \
+        /* convert and scale */                                                        \
+        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));                   \
+        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));                   \
+                                                                                       \
+        /* pack + byteswap -> byteswap 16 bit words */                                 \
+        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                                \
+        tmpi         = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+                                                                                       \
+        /* store to output */                                                          \
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output + i), tmpi);                \
+    }
 
     size_t i = 0;
 
     // need to dispatch according to alignment for fastest conversion
-    switch (size_t(input) & 0xf){
-    case 0x0:
-        // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
-        convert_fc32_1_to_item32_1_bswap_guts(_)
-        break;
-    case 0x8:
-        // the first value is 8-byte aligned - process it and prepare the bulk of the data for fast conversion
-        xx_to_item32_sc16<uhd::htonx>(input, output, 1, scale_factor);
-        i++;
-        // do faster processing of the remaining samples now that we are 16-byte aligned
-        convert_fc32_1_to_item32_1_bswap_guts(_)
-        break;
-    default:
-        // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load
-        convert_fc32_1_to_item32_1_bswap_guts(u_)
+    switch (size_t(input) & 0xf) {
+        case 0x0:
+            // the data is 16-byte aligned, so do the fast processing of the bulk of the
+            // samples
+            convert_fc32_1_to_item32_1_bswap_guts(_) break;
+        case 0x8:
+            // the first value is 8-byte aligned - process it and prepare the bulk of the
+            // data for fast conversion
+            xx_to_item32_sc16<uhd::htonx>(input, output, 1, scale_factor);
+            i++;
+            // do faster processing of the remaining samples now that we are 16-byte
+            // aligned
+            convert_fc32_1_to_item32_1_bswap_guts(_) break;
+        default:
+            // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+            // load
+            convert_fc32_1_to_item32_1_bswap_guts(u_)
     }
 
     // convert any remaining samples
-    xx_to_item32_sc16<uhd::htonx>(input+i, output+i, nsamps-i, scale_factor);
+    xx_to_item32_sc16<uhd::htonx>(input + i, output + i, nsamps - i, scale_factor);
 }
diff --git a/host/lib/convert/sse2_fc32_to_sc8.cpp b/host/lib/convert/sse2_fc32_to_sc8.cpp
index b3f96ea39..66faa82cc 100644
--- a/host/lib/convert/sse2_fc32_to_sc8.cpp
+++ b/host/lib/convert/sse2_fc32_to_sc8.cpp
@@ -12,94 +12,95 @@
 using namespace uhd::convert;
 
 template <const int shuf>
-UHD_INLINE __m128i pack_sc32_4x(
-    const __m128 &in0, const __m128 &in1,
-    const __m128 &in2, const __m128 &in3,
-    const __m128 &scalar
-){
-    __m128i tmpi0 = _mm_cvtps_epi32(_mm_mul_ps(in0, scalar));
-    tmpi0 = _mm_shuffle_epi32(tmpi0, shuf);
-    __m128i tmpi1 = _mm_cvtps_epi32(_mm_mul_ps(in1, scalar));
-    tmpi1 = _mm_shuffle_epi32(tmpi1, shuf);
+UHD_INLINE __m128i pack_sc32_4x(const __m128& in0,
+    const __m128& in1,
+    const __m128& in2,
+    const __m128& in3,
+    const __m128& scalar)
+{
+    __m128i tmpi0    = _mm_cvtps_epi32(_mm_mul_ps(in0, scalar));
+    tmpi0            = _mm_shuffle_epi32(tmpi0, shuf);
+    __m128i tmpi1    = _mm_cvtps_epi32(_mm_mul_ps(in1, scalar));
+    tmpi1            = _mm_shuffle_epi32(tmpi1, shuf);
     const __m128i lo = _mm_packs_epi32(tmpi0, tmpi1);
 
-    __m128i tmpi2 = _mm_cvtps_epi32(_mm_mul_ps(in2, scalar));
-    tmpi2 = _mm_shuffle_epi32(tmpi2, shuf);
-    __m128i tmpi3 = _mm_cvtps_epi32(_mm_mul_ps(in3, scalar));
-    tmpi3 = _mm_shuffle_epi32(tmpi3, shuf);
+    __m128i tmpi2    = _mm_cvtps_epi32(_mm_mul_ps(in2, scalar));
+    tmpi2            = _mm_shuffle_epi32(tmpi2, shuf);
+    __m128i tmpi3    = _mm_cvtps_epi32(_mm_mul_ps(in3, scalar));
+    tmpi3            = _mm_shuffle_epi32(tmpi3, shuf);
     const __m128i hi = _mm_packs_epi32(tmpi2, tmpi3);
 
     return _mm_packs_epi16(lo, hi);
 }
 
-DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_SIMD){
-    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_SIMD)
+{
+    const fc32_t* input = reinterpret_cast<const fc32_t*>(inputs[0]);
+    item32_t* output    = reinterpret_cast<item32_t*>(outputs[0]);
 
     const __m128 scalar = _mm_set_ps1(float(scale_factor));
-    const int shuf = _MM_SHUFFLE(3, 2, 1, 0);
-
-    #define convert_fc32_1_to_sc8_item32_1_bswap_guts(_al_)             \
-    for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \
-        /* load from input */                                           \
-        __m128 tmp0 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
-        __m128 tmp1 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
-        __m128 tmp2 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+4)); \
-        __m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \
-                                                                        \
-        /* convert */                                                   \
-        const __m128i tmpi = pack_sc32_4x<shuf>(tmp0, tmp1, tmp2, tmp3, scalar); \
-                                                                        \
-        /* store to output */                                           \
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \
-    }                                                                   \
+    const int shuf      = _MM_SHUFFLE(3, 2, 1, 0);
+
+#define convert_fc32_1_to_sc8_item32_1_bswap_guts(_al_)                                  \
+    for (size_t j = 0; i + 7 < nsamps; i += 8, j += 4) {                                 \
+        /* load from input */                                                            \
+        __m128 tmp0 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 0)); \
+        __m128 tmp1 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 2)); \
+        __m128 tmp2 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 4)); \
+        __m128 tmp3 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 6)); \
+                                                                                         \
+        /* convert */                                                                    \
+        const __m128i tmpi = pack_sc32_4x<shuf>(tmp0, tmp1, tmp2, tmp3, scalar);         \
+                                                                                         \
+        /* store to output */                                                            \
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output + j), tmpi);                  \
+    }
 
     size_t i = 0;
 
-    //dispatch according to alignment
-    if ((size_t(input) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(input) & 0xf) == 0) {
         convert_fc32_1_to_sc8_item32_1_bswap_guts(_)
-    }
-    else{
+    } else {
         convert_fc32_1_to_sc8_item32_1_bswap_guts(u_)
     }
 
-    //convert remainder
-    xx_to_item32_sc8<uhd::htonx>(input+i, output+(i/2), nsamps-i, scale_factor);
+    // convert remainder
+    xx_to_item32_sc8<uhd::htonx>(input + i, output + (i / 2), nsamps - i, scale_factor);
 }
 
-DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_SIMD){
-    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_SIMD)
+{
+    const fc32_t* input = reinterpret_cast<const fc32_t*>(inputs[0]);
+    item32_t* output    = reinterpret_cast<item32_t*>(outputs[0]);
 
     const __m128 scalar = _mm_set_ps1(float(scale_factor));
-    const int shuf = _MM_SHUFFLE(0, 1, 2, 3);
-
-    #define convert_fc32_1_to_sc8_item32_1_nswap_guts(_al_)             \
-    for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \
-        /* load from input */                                           \
-        __m128 tmp0 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
-        __m128 tmp1 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
-        __m128 tmp2 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+4)); \
-        __m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \
-                                                                        \
-        /* convert */                                                   \
-        const __m128i tmpi = pack_sc32_4x<shuf>(tmp0, tmp1, tmp2, tmp3, scalar); \
-                                                                        \
-        /* store to output */                                           \
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \
-    }                                                                   \
+    const int shuf      = _MM_SHUFFLE(0, 1, 2, 3);
+
+#define convert_fc32_1_to_sc8_item32_1_nswap_guts(_al_)                                  \
+    for (size_t j = 0; i + 7 < nsamps; i += 8, j += 4) {                                 \
+        /* load from input */                                                            \
+        __m128 tmp0 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 0)); \
+        __m128 tmp1 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 2)); \
+        __m128 tmp2 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 4)); \
+        __m128 tmp3 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 6)); \
+                                                                                         \
+        /* convert */                                                                    \
+        const __m128i tmpi = pack_sc32_4x<shuf>(tmp0, tmp1, tmp2, tmp3, scalar);         \
+                                                                                         \
+        /* store to output */                                                            \
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output + j), tmpi);                  \
+    }
 
     size_t i = 0;
 
-    //dispatch according to alignment
-    if ((size_t(input) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(input) & 0xf) == 0) {
         convert_fc32_1_to_sc8_item32_1_nswap_guts(_)
-    }
-    else{
+    } else {
         convert_fc32_1_to_sc8_item32_1_nswap_guts(u_)
     }
 
-    //convert remainder
-    xx_to_item32_sc8<uhd::htowx>(input+i, output+(i/2), nsamps-i, scale_factor);
+    // convert remainder
+    xx_to_item32_sc8<uhd::htowx>(input + i, output + (i / 2), nsamps - i, scale_factor);
 }
diff --git a/host/lib/convert/sse2_fc64_to_sc16.cpp b/host/lib/convert/sse2_fc64_to_sc16.cpp
index 2004c1fd7..7c2ce1f8e 100644
--- a/host/lib/convert/sse2_fc64_to_sc16.cpp
+++ b/host/lib/convert/sse2_fc64_to_sc16.cpp
@@ -11,91 +11,99 @@
 
 using namespace uhd::convert;
 
-DECLARE_CONVERTER(fc64, 1, sc16_item32_le, 1, PRIORITY_SIMD){
-    const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+DECLARE_CONVERTER(fc64, 1, sc16_item32_le, 1, PRIORITY_SIMD)
+{
+    const fc64_t* input = reinterpret_cast<const fc64_t*>(inputs[0]);
+    item32_t* output    = reinterpret_cast<item32_t*>(outputs[0]);
 
     const __m128d scalar = _mm_set1_pd(scale_factor);
 
-    #define convert_fc64_1_to_item32_1_nswap_guts(_al_)                 \
-    for (; i+3 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \
-        __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \
-        __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \
-        __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \
-                                                                        \
-        /* convert and scale */                                         \
-        __m128i tmpi0 = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar));     \
-        __m128i tmpi1 = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar));     \
-        __m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1);              \
-        __m128i tmpi2 = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar));     \
-        __m128i tmpi3 = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar));     \
-        __m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3);              \
-                                                                        \
-        /* pack + swap 16-bit pairs */                                  \
-        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
-        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
-        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
-                                                                        \
-        /* store to output */                                           \
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
-    }                                                                   \
+#define convert_fc64_1_to_item32_1_nswap_guts(_al_)                             \
+    for (; i + 3 < nsamps; i += 4) {                                            \
+        /* load from input */                                                   \
+        __m128d tmp0 =                                                          \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 0)); \
+        __m128d tmp1 =                                                          \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 1)); \
+        __m128d tmp2 =                                                          \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 2)); \
+        __m128d tmp3 =                                                          \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 3)); \
+                                                                                \
+        /* convert and scale */                                                 \
+        __m128i tmpi0  = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar));            \
+        __m128i tmpi1  = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar));            \
+        __m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1);                      \
+        __m128i tmpi2  = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar));            \
+        __m128i tmpi3  = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar));            \
+        __m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3);                      \
+                                                                                \
+        /* pack + swap 16-bit pairs */                                          \
+        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                         \
+        tmpi         = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+        tmpi         = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+                                                                                \
+        /* store to output */                                                   \
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output + i), tmpi);         \
+    }
 
     size_t i = 0;
 
-    //dispatch according to alignment
-    if ((size_t(input) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(input) & 0xf) == 0) {
         convert_fc64_1_to_item32_1_nswap_guts(_)
-    }
-    else{
+    } else {
         convert_fc64_1_to_item32_1_nswap_guts(u_)
     }
 
-    //convert remainder
-    xx_to_item32_sc16<uhd::htowx>(input+i, output+i, nsamps-i, scale_factor);
+    // convert remainder
+    xx_to_item32_sc16<uhd::htowx>(input + i, output + i, nsamps - i, scale_factor);
 }
 
-DECLARE_CONVERTER(fc64, 1, sc16_item32_be, 1, PRIORITY_SIMD){
-    const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+DECLARE_CONVERTER(fc64, 1, sc16_item32_be, 1, PRIORITY_SIMD)
+{
+    const fc64_t* input = reinterpret_cast<const fc64_t*>(inputs[0]);
+    item32_t* output    = reinterpret_cast<item32_t*>(outputs[0]);
 
     const __m128d scalar = _mm_set1_pd(scale_factor);
 
-    #define convert_fc64_1_to_item32_1_bswap_guts(_al_)                 \
-    for (; i+3 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \
-        __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \
-        __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \
-        __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \
-                                                                        \
-        /* convert and scale */                                         \
-        __m128i tmpi0 = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar));     \
-        __m128i tmpi1 = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar));     \
-        __m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1);              \
-        __m128i tmpi2 = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar));     \
-        __m128i tmpi3 = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar));     \
-        __m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3);              \
-                                                                        \
-        /* pack + byteswap -> byteswap 16 bit words */                  \
-        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
-        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
-                                                                        \
-        /* store to output */                                           \
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
-    }                                                                   \
+#define convert_fc64_1_to_item32_1_bswap_guts(_al_)                                    \
+    for (; i + 3 < nsamps; i += 4) {                                                   \
+        /* load from input */                                                          \
+        __m128d tmp0 =                                                                 \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 0));        \
+        __m128d tmp1 =                                                                 \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 1));        \
+        __m128d tmp2 =                                                                 \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 2));        \
+        __m128d tmp3 =                                                                 \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 3));        \
+                                                                                       \
+        /* convert and scale */                                                        \
+        __m128i tmpi0  = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar));                   \
+        __m128i tmpi1  = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar));                   \
+        __m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1);                             \
+        __m128i tmpi2  = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar));                   \
+        __m128i tmpi3  = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar));                   \
+        __m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3);                             \
+                                                                                       \
+        /* pack + byteswap -> byteswap 16 bit words */                                 \
+        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                                \
+        tmpi         = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+                                                                                       \
+        /* store to output */                                                          \
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output + i), tmpi);                \
+    }
 
     size_t i = 0;
 
-    //dispatch according to alignment
-    if ((size_t(input) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(input) & 0xf) == 0) {
         convert_fc64_1_to_item32_1_bswap_guts(_)
-    }
-    else{
+    } else {
         convert_fc64_1_to_item32_1_bswap_guts(u_)
     }
 
-    //convert remainder
-    xx_to_item32_sc16<uhd::htonx>(input+i, output+i, nsamps-i, scale_factor);
+    // convert remainder
+    xx_to_item32_sc16<uhd::htonx>(input + i, output + i, nsamps - i, scale_factor);
 }
diff --git a/host/lib/convert/sse2_fc64_to_sc8.cpp b/host/lib/convert/sse2_fc64_to_sc8.cpp
index 455ca95e3..95db4e927 100644
--- a/host/lib/convert/sse2_fc64_to_sc8.cpp
+++ b/host/lib/convert/sse2_fc64_to_sc8.cpp
@@ -12,108 +12,119 @@
 using namespace uhd::convert;
 
 UHD_INLINE __m128i pack_sc8_item32_4x(
-    const __m128i &in0, const __m128i &in1,
-    const __m128i &in2, const __m128i &in3
-){
+    const __m128i& in0, const __m128i& in1, const __m128i& in2, const __m128i& in3)
+{
     const __m128i lo = _mm_packs_epi32(in0, in1);
     const __m128i hi = _mm_packs_epi32(in2, in3);
     return _mm_packs_epi16(lo, hi);
 }
 
 UHD_INLINE __m128i pack_sc32_4x(
-    const __m128d &lo, const __m128d &hi,
-    const __m128d &scalar
-){
+    const __m128d& lo, const __m128d& hi, const __m128d& scalar)
+{
     const __m128i tmpi_lo = _mm_cvttpd_epi32(_mm_mul_pd(hi, scalar));
     const __m128i tmpi_hi = _mm_cvttpd_epi32(_mm_mul_pd(lo, scalar));
     return _mm_unpacklo_epi64(tmpi_lo, tmpi_hi);
 }
 
-DECLARE_CONVERTER(fc64, 1, sc8_item32_be, 1, PRIORITY_SIMD){
-    const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+DECLARE_CONVERTER(fc64, 1, sc8_item32_be, 1, PRIORITY_SIMD)
+{
+    const fc64_t* input = reinterpret_cast<const fc64_t*>(inputs[0]);
+    item32_t* output    = reinterpret_cast<item32_t*>(outputs[0]);
 
     const __m128d scalar = _mm_set1_pd(scale_factor);
 
-    #define convert_fc64_1_to_sc8_item32_1_bswap_guts(_al_)             \
-    for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \
-        /* load from input */                                           \
-        __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \
-        __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \
-        __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \
-        __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \
-        __m128d tmp4 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+4)); \
-        __m128d tmp5 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+5)); \
-        __m128d tmp6 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+6)); \
-        __m128d tmp7 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+7)); \
-                                                                        \
-        /* interleave */                                                \
-        const __m128i tmpi = pack_sc8_item32_4x(                        \
-            pack_sc32_4x(tmp1, tmp0, scalar),                           \
-            pack_sc32_4x(tmp3, tmp2, scalar),                           \
-            pack_sc32_4x(tmp5, tmp4, scalar),                           \
-            pack_sc32_4x(tmp7, tmp6, scalar)                            \
-        );                                                              \
-                                                                        \
-        /* store to output */                                           \
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \
-    }                                                                   \
+#define convert_fc64_1_to_sc8_item32_1_bswap_guts(_al_)                           \
+    for (size_t j = 0; i + 7 < nsamps; i += 8, j += 4) {                          \
+        /* load from input */                                                     \
+        __m128d tmp0 =                                                            \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 0));   \
+        __m128d tmp1 =                                                            \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 1));   \
+        __m128d tmp2 =                                                            \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 2));   \
+        __m128d tmp3 =                                                            \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 3));   \
+        __m128d tmp4 =                                                            \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 4));   \
+        __m128d tmp5 =                                                            \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 5));   \
+        __m128d tmp6 =                                                            \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 6));   \
+        __m128d tmp7 =                                                            \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 7));   \
+                                                                                  \
+        /* interleave */                                                          \
+        const __m128i tmpi = pack_sc8_item32_4x(pack_sc32_4x(tmp1, tmp0, scalar), \
+            pack_sc32_4x(tmp3, tmp2, scalar),                                     \
+            pack_sc32_4x(tmp5, tmp4, scalar),                                     \
+            pack_sc32_4x(tmp7, tmp6, scalar));                                    \
+                                                                                  \
+        /* store to output */                                                     \
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output + j), tmpi);           \
+    }
 
     size_t i = 0;
 
-    //dispatch according to alignment
-    if ((size_t(input) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(input) & 0xf) == 0) {
         convert_fc64_1_to_sc8_item32_1_bswap_guts(_)
-    }
-    else{
+    } else {
         convert_fc64_1_to_sc8_item32_1_bswap_guts(u_)
     }
 
-    //convert remainder
-    xx_to_item32_sc8<uhd::htonx>(input+i, output+(i/2), nsamps-i, scale_factor);
+    // convert remainder
+    xx_to_item32_sc8<uhd::htonx>(input + i, output + (i / 2), nsamps - i, scale_factor);
 }
 
-DECLARE_CONVERTER(fc64, 1, sc8_item32_le, 1, PRIORITY_SIMD){
-    const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+DECLARE_CONVERTER(fc64, 1, sc8_item32_le, 1, PRIORITY_SIMD)
+{
+    const fc64_t* input = reinterpret_cast<const fc64_t*>(inputs[0]);
+    item32_t* output    = reinterpret_cast<item32_t*>(outputs[0]);
 
     const __m128d scalar = _mm_set1_pd(scale_factor);
 
-    #define convert_fc64_1_to_sc8_item32_1_nswap_guts(_al_)             \
-    for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \
-        /* load from input */                                           \
-        __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \
-        __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \
-        __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \
-        __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \
-        __m128d tmp4 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+4)); \
-        __m128d tmp5 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+5)); \
-        __m128d tmp6 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+6)); \
-        __m128d tmp7 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+7)); \
-                                                                        \
-        /* interleave */                                                \
-        __m128i tmpi = pack_sc8_item32_4x(                              \
-            pack_sc32_4x(tmp0, tmp1, scalar),                           \
-            pack_sc32_4x(tmp2, tmp3, scalar),                           \
-            pack_sc32_4x(tmp4, tmp5, scalar),                           \
-            pack_sc32_4x(tmp6, tmp7, scalar)                            \
-        );                                                              \
-        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); /*byteswap*/\
-                                                                        \
-        /* store to output */                                           \
-        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \
-    }                                                                   \
+#define convert_fc64_1_to_sc8_item32_1_nswap_guts(_al_)                                  \
+    for (size_t j = 0; i + 7 < nsamps; i += 8, j += 4) {                                 \
+        /* load from input */                                                            \
+        __m128d tmp0 =                                                                   \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 0));          \
+        __m128d tmp1 =                                                                   \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 1));          \
+        __m128d tmp2 =                                                                   \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 2));          \
+        __m128d tmp3 =                                                                   \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 3));          \
+        __m128d tmp4 =                                                                   \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 4));          \
+        __m128d tmp5 =                                                                   \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 5));          \
+        __m128d tmp6 =                                                                   \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 6));          \
+        __m128d tmp7 =                                                                   \
+            _mm_load##_al_##pd(reinterpret_cast<const double*>(input + i + 7));          \
+                                                                                         \
+        /* interleave */                                                                 \
+        __m128i tmpi = pack_sc8_item32_4x(pack_sc32_4x(tmp0, tmp1, scalar),              \
+            pack_sc32_4x(tmp2, tmp3, scalar),                                            \
+            pack_sc32_4x(tmp4, tmp5, scalar),                                            \
+            pack_sc32_4x(tmp6, tmp7, scalar));                                           \
+        tmpi =                                                                           \
+            _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); /*byteswap*/ \
+                                                                                         \
+        /* store to output */                                                            \
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output + j), tmpi);                  \
+    }
 
     size_t i = 0;
 
-    //dispatch according to alignment
-    if ((size_t(input) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(input) & 0xf) == 0) {
         convert_fc64_1_to_sc8_item32_1_nswap_guts(_)
-    }
-    else{
+    } else {
         convert_fc64_1_to_sc8_item32_1_nswap_guts(u_)
     }
 
-    //convert remainder
-    xx_to_item32_sc8<uhd::htowx>(input+i, output+(i/2), nsamps-i, scale_factor);
+    // convert remainder
+    xx_to_item32_sc8<uhd::htowx>(input + i, output + (i / 2), nsamps - i, scale_factor);
 }
diff --git a/host/lib/convert/sse2_sc16_to_fc32.cpp b/host/lib/convert/sse2_sc16_to_fc32.cpp
index d75c4a2a7..a16ef30d4 100644
--- a/host/lib/convert/sse2_sc16_to_fc32.cpp
+++ b/host/lib/convert/sse2_sc16_to_fc32.cpp
@@ -11,105 +11,111 @@
 
 using namespace uhd::convert;
 
-DECLARE_CONVERTER(sc16_item32_le, 1, fc32, 1, PRIORITY_SIMD){
-    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
-    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+DECLARE_CONVERTER(sc16_item32_le, 1, fc32, 1, PRIORITY_SIMD)
+{
+    const item32_t* input = reinterpret_cast<const item32_t*>(inputs[0]);
+    fc32_t* output        = reinterpret_cast<fc32_t*>(outputs[0]);
 
-    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
+    const __m128 scalar = _mm_set_ps1(float(scale_factor) / (1 << 16));
     const __m128i zeroi = _mm_setzero_si128();
 
-    // this macro converts values faster by using SSE intrinsics to convert 4 values at a time
-    #define convert_item32_1_to_fc32_1_nswap_guts(_al_)                 \
-    for (; i+3 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
-                                                                        \
-        /* unpack + swap 16-bit pairs */                                \
-        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
-        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+// this macro converts values faster by using SSE intrinsics to convert 4 values at a time
+#define convert_item32_1_to_fc32_1_nswap_guts(_al_)                                    \
+    for (; i + 3 < nsamps; i += 4) {                                                   \
+        /* load from input */                                                          \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i*>(input + i));   \
+                                                                                       \
+        /* unpack + swap 16-bit pairs */                                               \
+        tmpi           = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));           \
+        tmpi           = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));           \
         __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
-        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
-                                                                        \
-        /* convert and scale */                                         \
-        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \
-        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \
-    }                                                                   \
+        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);                              \
+                                                                                       \
+        /* convert and scale */                                                        \
+        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);                    \
+        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);                    \
+                                                                                       \
+        /* store to output */                                                          \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + i + 0), tmplo);          \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + i + 2), tmphi);          \
+    }
 
     size_t i = 0;
 
     // need to dispatch according to alignment for fastest conversion
-    switch (size_t(output) & 0xf){
-    case 0x0:
-        // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
-        convert_item32_1_to_fc32_1_nswap_guts(_)
-        break;
-    case 0x8:
-        // the first sample is 8-byte aligned - process it to align the remainder of the samples to 16-bytes
-        item32_sc16_to_xx<uhd::htowx>(input, output, 1, scale_factor);
-        i++;
-        // do faster processing of the bulk of the samples now that we are 16-byte aligned
-        convert_item32_1_to_fc32_1_nswap_guts(_)
-        break;
-    default:
-        // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load and store
-        convert_item32_1_to_fc32_1_nswap_guts(u_)
+    switch (size_t(output) & 0xf) {
+        case 0x0:
+            // the data is 16-byte aligned, so do the fast processing of the bulk of the
+            // samples
+            convert_item32_1_to_fc32_1_nswap_guts(_) break;
+        case 0x8:
+            // the first sample is 8-byte aligned - process it to align the remainder of
+            // the samples to 16-bytes
+            item32_sc16_to_xx<uhd::htowx>(input, output, 1, scale_factor);
+            i++;
+            // do faster processing of the bulk of the samples now that we are 16-byte
+            // aligned
+            convert_item32_1_to_fc32_1_nswap_guts(_) break;
+        default:
+            // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+            // load and store
+            convert_item32_1_to_fc32_1_nswap_guts(u_)
     }
 
     // convert any remaining samples
-    item32_sc16_to_xx<uhd::htowx>(input+i, output+i, nsamps-i, scale_factor);
+    item32_sc16_to_xx<uhd::htowx>(input + i, output + i, nsamps - i, scale_factor);
 }
 
-DECLARE_CONVERTER(sc16_item32_be, 1, fc32, 1, PRIORITY_SIMD){
-    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
-    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+DECLARE_CONVERTER(sc16_item32_be, 1, fc32, 1, PRIORITY_SIMD)
+{
+    const item32_t* input = reinterpret_cast<const item32_t*>(inputs[0]);
+    fc32_t* output        = reinterpret_cast<fc32_t*>(outputs[0]);
 
-    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
+    const __m128 scalar = _mm_set_ps1(float(scale_factor) / (1 << 16));
     const __m128i zeroi = _mm_setzero_si128();
 
-    // this macro converts values faster by using SSE intrinsics to convert 4 values at a time
-    #define convert_item32_1_to_fc32_1_bswap_guts(_al_)                 \
-    for (; i+3 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
-                                                                        \
-        /* byteswap + unpack -> byteswap 16 bit words */                \
-        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
-        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
-        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
-                                                                        \
-        /* convert and scale */                                         \
-        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \
-        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \
-    }                                                                   \
+// this macro converts values faster by using SSE intrinsics to convert 4 values at a time
+#define convert_item32_1_to_fc32_1_bswap_guts(_al_)                                      \
+    for (; i + 3 < nsamps; i += 4) {                                                     \
+        /* load from input */                                                            \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i*>(input + i));     \
+                                                                                         \
+        /* byteswap + unpack -> byteswap 16 bit words */                                 \
+        tmpi           = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */   \
+        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);                                \
+                                                                                         \
+        /* convert and scale */                                                          \
+        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);                      \
+        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);                      \
+                                                                                         \
+        /* store to output */                                                            \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + i + 0), tmplo);            \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + i + 2), tmphi);            \
+    }
 
     size_t i = 0;
 
     // need to dispatch according to alignment for fastest conversion
-    switch (size_t(output) & 0xf){
-    case 0x0:
-        // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
-        convert_item32_1_to_fc32_1_bswap_guts(_)
-        break;
-    case 0x8:
-        // the first sample is 8-byte aligned - process it to align the remainder of the samples to 16-bytes
-        item32_sc16_to_xx<uhd::htonx>(input, output, 1, scale_factor);
-        i++;
-        // do faster processing of the bulk of the samples now that we are 16-byte aligned
-        convert_item32_1_to_fc32_1_bswap_guts(_)
-        break;
-    default:
-        // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load and store
-        convert_item32_1_to_fc32_1_bswap_guts(u_)
+    switch (size_t(output) & 0xf) {
+        case 0x0:
+            // the data is 16-byte aligned, so do the fast processing of the bulk of the
+            // samples
+            convert_item32_1_to_fc32_1_bswap_guts(_) break;
+        case 0x8:
+            // the first sample is 8-byte aligned - process it to align the remainder of
+            // the samples to 16-bytes
+            item32_sc16_to_xx<uhd::htonx>(input, output, 1, scale_factor);
+            i++;
+            // do faster processing of the bulk of the samples now that we are 16-byte
+            // aligned
+            convert_item32_1_to_fc32_1_bswap_guts(_) break;
+        default:
+            // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+            // load and store
+            convert_item32_1_to_fc32_1_bswap_guts(u_)
     }
 
     // convert any remaining samples
-    item32_sc16_to_xx<uhd::htonx>(input+i, output+i, nsamps-i, scale_factor);
+    item32_sc16_to_xx<uhd::htonx>(input + i, output + i, nsamps - i, scale_factor);
 }
diff --git a/host/lib/convert/sse2_sc16_to_fc64.cpp b/host/lib/convert/sse2_sc16_to_fc64.cpp
index 7f22fd07f..45821ac9f 100644
--- a/host/lib/convert/sse2_sc16_to_fc64.cpp
+++ b/host/lib/convert/sse2_sc16_to_fc64.cpp
@@ -11,95 +11,95 @@
 
 using namespace uhd::convert;
 
-DECLARE_CONVERTER(sc16_item32_le, 1, fc64, 1, PRIORITY_SIMD){
-    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
-    fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]);
+DECLARE_CONVERTER(sc16_item32_le, 1, fc64, 1, PRIORITY_SIMD)
+{
+    const item32_t* input = reinterpret_cast<const item32_t*>(inputs[0]);
+    fc64_t* output        = reinterpret_cast<fc64_t*>(outputs[0]);
 
-    const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 16));
-    const __m128i zeroi = _mm_setzero_si128();
+    const __m128d scalar = _mm_set1_pd(scale_factor / (1 << 16));
+    const __m128i zeroi  = _mm_setzero_si128();
 
-    #define convert_item32_1_to_fc64_1_nswap_guts(_al_)                 \
-    for (; i+3 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
-                                                                        \
-        /* unpack + swap 16-bit pairs */                                \
-        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
-        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
+#define convert_item32_1_to_fc64_1_nswap_guts(_al_)                                    \
+    for (; i + 3 < nsamps; i += 4) {                                                   \
+        /* load from input */                                                          \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i*>(input + i));   \
+                                                                                       \
+        /* unpack + swap 16-bit pairs */                                               \
+        tmpi           = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));           \
+        tmpi           = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));           \
         __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
-        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
-                                                                        \
-        /* convert and scale */                                         \
-        __m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
-        tmpilo = _mm_unpackhi_epi64(tmpilo, zeroi);                     \
-        __m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
-        __m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
-        tmpihi = _mm_unpackhi_epi64(tmpihi, zeroi);                     \
-        __m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+0), tmp0); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+1), tmp1); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+2), tmp2); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+3), tmp3); \
-    }                                                                   \
+        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);                              \
+                                                                                       \
+        /* convert and scale */                                                        \
+        __m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);                    \
+        tmpilo       = _mm_unpackhi_epi64(tmpilo, zeroi);                              \
+        __m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);                    \
+        __m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);                    \
+        tmpihi       = _mm_unpackhi_epi64(tmpihi, zeroi);                              \
+        __m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);                    \
+                                                                                       \
+        /* store to output */                                                          \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + i + 0), tmp0);          \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + i + 1), tmp1);          \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + i + 2), tmp2);          \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + i + 3), tmp3);          \
+    }
 
     size_t i = 0;
 
-    //dispatch according to alignment
-    if ((size_t(output) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(output) & 0xf) == 0) {
         convert_item32_1_to_fc64_1_nswap_guts(_)
-    }
-    else{
+    } else {
         convert_item32_1_to_fc64_1_nswap_guts(u_)
     }
 
-    //convert remainder
-    item32_sc16_to_xx<uhd::htowx>(input+i, output+i, nsamps-i, scale_factor);
+    // convert remainder
+    item32_sc16_to_xx<uhd::htowx>(input + i, output + i, nsamps - i, scale_factor);
 }
 
-DECLARE_CONVERTER(sc16_item32_be, 1, fc64, 1, PRIORITY_SIMD){
-    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
-    fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]);
+DECLARE_CONVERTER(sc16_item32_be, 1, fc64, 1, PRIORITY_SIMD)
+{
+    const item32_t* input = reinterpret_cast<const item32_t*>(inputs[0]);
+    fc64_t* output        = reinterpret_cast<fc64_t*>(outputs[0]);
 
-    const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 16));
-    const __m128i zeroi = _mm_setzero_si128();
+    const __m128d scalar = _mm_set1_pd(scale_factor / (1 << 16));
+    const __m128i zeroi  = _mm_setzero_si128();
 
-    #define convert_item32_1_to_fc64_1_bswap_guts(_al_)                 \
-    for (; i+3 < nsamps; i+=4){                                         \
-        /* load from input */                                           \
-        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
-                                                                        \
-        /* byteswap + unpack -> byteswap 16 bit words */                \
-        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
-        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
-        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
-                                                                        \
-        /* convert and scale */                                         \
-        __m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
-        tmpilo = _mm_unpackhi_epi64(tmpilo, zeroi);                     \
-        __m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
-        __m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
-        tmpihi = _mm_unpackhi_epi64(tmpihi, zeroi);                     \
-        __m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+0), tmp0); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+1), tmp1); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+2), tmp2); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+3), tmp3); \
-    }                                                                   \
+#define convert_item32_1_to_fc64_1_bswap_guts(_al_)                                      \
+    for (; i + 3 < nsamps; i += 4) {                                                     \
+        /* load from input */                                                            \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i*>(input + i));     \
+                                                                                         \
+        /* byteswap + unpack -> byteswap 16 bit words */                                 \
+        tmpi           = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */   \
+        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);                                \
+                                                                                         \
+        /* convert and scale */                                                          \
+        __m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);                      \
+        tmpilo       = _mm_unpackhi_epi64(tmpilo, zeroi);                                \
+        __m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);                      \
+        __m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);                      \
+        tmpihi       = _mm_unpackhi_epi64(tmpihi, zeroi);                                \
+        __m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);                      \
+                                                                                         \
+        /* store to output */                                                            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + i + 0), tmp0);            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + i + 1), tmp1);            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + i + 2), tmp2);            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + i + 3), tmp3);            \
+    }
 
     size_t i = 0;
 
-    //dispatch according to alignment
-    if ((size_t(output) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(output) & 0xf) == 0) {
         convert_item32_1_to_fc64_1_bswap_guts(_)
-    }
-    else{
+    } else {
         convert_item32_1_to_fc64_1_bswap_guts(u_)
     }
 
-    //convert remainder
-    item32_sc16_to_xx<uhd::htonx>(input+i, output+i, nsamps-i, scale_factor);
+    // convert remainder
+    item32_sc16_to_xx<uhd::htonx>(input + i, output + i, nsamps - i, scale_factor);
 }
diff --git a/host/lib/convert/sse2_sc16_to_sc16.cpp b/host/lib/convert/sse2_sc16_to_sc16.cpp
index 5c81f357b..e484bee31 100644
--- a/host/lib/convert/sse2_sc16_to_sc16.cpp
+++ b/host/lib/convert/sse2_sc16_to_sc16.cpp
@@ -25,20 +25,20 @@ using namespace uhd::convert;
 //      | C | D | A | B |   Output
 //      -----------------
 //
-#define CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_ialign_,_oalign_)          \
-    for (; i+3 < nsamps; i+=4) {                                        \
-        __m128i m0;                                                     \
-                                                                        \
-        /* load from input */                                           \
-        m0 = _mm_load ## _ialign_ ## si128((const __m128i *) (input+i));\
-                                                                        \
-        /* swap 16-bit pairs */                                         \
-        m0 = _mm_shufflelo_epi16(m0, _MM_SHUFFLE(2, 3, 0, 1));          \
-        m0 = _mm_shufflehi_epi16(m0, _MM_SHUFFLE(2, 3, 0, 1));          \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _oalign_ ## si128((__m128i *) (output+i), m0);     \
-    }                                                                   \
+#define CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_ialign_, _oalign_)      \
+    for (; i + 3 < nsamps; i += 4) {                                 \
+        __m128i m0;                                                  \
+                                                                     \
+        /* load from input */                                        \
+        m0 = _mm_load##_ialign_##si128((const __m128i*)(input + i)); \
+                                                                     \
+        /* swap 16-bit pairs */                                      \
+        m0 = _mm_shufflelo_epi16(m0, _MM_SHUFFLE(2, 3, 0, 1));       \
+        m0 = _mm_shufflehi_epi16(m0, _MM_SHUFFLE(2, 3, 0, 1));       \
+                                                                     \
+        /* store to output */                                        \
+        _mm_store##_oalign_##si128((__m128i*)(output + i), m0);      \
+    }
 
 //
 // SSE byte swap
@@ -54,138 +54,158 @@ using namespace uhd::convert;
 //      | B | A | D | C |   Output
 //      -----------------
 //
-#define CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_ialign_,_oalign_)          \
-    for (; i+3 < nsamps; i+=4) {                                        \
-        __m128i m0, m1, m2;                                             \
-                                                                        \
-        /* load from input */                                           \
-        m0 = _mm_load ## _ialign_ ## si128((const __m128i *) (input+i));\
-                                                                        \
-        /* byteswap 16 bit words */                                     \
-        m1 = _mm_srli_epi16(m0, 8);                                     \
-        m2 = _mm_slli_epi16(m0, 8);                                     \
-        m0 = _mm_or_si128(m1, m2);                                      \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _oalign_ ## si128((__m128i *) (output+i), m0);     \
-    }                                                                   \
-
-DECLARE_CONVERTER(sc16, 1, sc16_item32_le, 1, PRIORITY_SIMD){
-    const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+#define CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_ialign_, _oalign_)      \
+    for (; i + 3 < nsamps; i += 4) {                                 \
+        __m128i m0, m1, m2;                                          \
+                                                                     \
+        /* load from input */                                        \
+        m0 = _mm_load##_ialign_##si128((const __m128i*)(input + i)); \
+                                                                     \
+        /* byteswap 16 bit words */                                  \
+        m1 = _mm_srli_epi16(m0, 8);                                  \
+        m2 = _mm_slli_epi16(m0, 8);                                  \
+        m0 = _mm_or_si128(m1, m2);                                   \
+                                                                     \
+        /* store to output */                                        \
+        _mm_store##_oalign_##si128((__m128i*)(output + i), m0);      \
+    }
+
+DECLARE_CONVERTER(sc16, 1, sc16_item32_le, 1, PRIORITY_SIMD)
+{
+    const sc16_t* input = reinterpret_cast<const sc16_t*>(inputs[0]);
+    item32_t* output    = reinterpret_cast<item32_t*>(outputs[0]);
 
     size_t i = 0;
 
     // need to dispatch according to alignment for fastest conversion
-    switch (size_t(input) & 0xf){
-    case 0x0:
-        // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
-        CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_,u_)
-        break;
-    case 0x8:
-        if (nsamps < 2)
+    switch (size_t(input) & 0xf) {
+        case 0x0:
+            // the data is 16-byte aligned, so do the fast processing of the bulk of the
+            // samples
+            CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_, u_)
+            break;
+        case 0x8:
+            if (nsamps < 2)
+                break;
+            // the first sample is 8-byte aligned - process it to align the remainder of
+            // the samples to 16-bytes
+            xx_to_item32_sc16<uhd::htowx>(input, output, 2, 1.0);
+            i += 2;
+            CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_, u_)
+            // do faster processing of the bulk of the samples now that we are 16-byte
+            // aligned
             break;
-        // the first sample is 8-byte aligned - process it to align the remainder of the samples to 16-bytes
-        xx_to_item32_sc16<uhd::htowx>(input, output, 2, 1.0);
-        i += 2;
-        CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(_,u_)
-        // do faster processing of the bulk of the samples now that we are 16-byte aligned
-        break;
-    default:
-        // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load
-        CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_,u_)
+        default:
+            // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+            // load
+            CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_, u_)
     }
 
     // convert any remaining samples
-    xx_to_item32_sc16<uhd::htowx>(input+i, output+i, nsamps-i, 1.0);
+    xx_to_item32_sc16<uhd::htowx>(input + i, output + i, nsamps - i, 1.0);
 }
 
-DECLARE_CONVERTER(sc16, 1, sc16_item32_be, 1, PRIORITY_SIMD){
-    const sc16_t *input = reinterpret_cast<const sc16_t *>(inputs[0]);
-    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
+DECLARE_CONVERTER(sc16, 1, sc16_item32_be, 1, PRIORITY_SIMD)
+{
+    const sc16_t* input = reinterpret_cast<const sc16_t*>(inputs[0]);
+    item32_t* output    = reinterpret_cast<item32_t*>(outputs[0]);
 
     size_t i = 0;
 
     // need to dispatch according to alignment for fastest conversion
-    switch (size_t(input) & 0xf){
-    case 0x0:
-        // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
-        CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_,u_)
-        break;
-    case 0x8:
-        if (nsamps < 2)
+    switch (size_t(input) & 0xf) {
+        case 0x0:
+            // the data is 16-byte aligned, so do the fast processing of the bulk of the
+            // samples
+            CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_, u_)
             break;
-        // the first value is 8-byte aligned - process it and prepare the bulk of the data for fast conversion
-        xx_to_item32_sc16<uhd::htonx>(input, output, 2, 1.0);
-        i += 2;
-        // do faster processing of the remaining samples now that we are 16-byte aligned
-        CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_,u_)
-        break;
-    default:
-        // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load
-        CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_,u_)
+        case 0x8:
+            if (nsamps < 2)
+                break;
+            // the first value is 8-byte aligned - process it and prepare the bulk of the
+            // data for fast conversion
+            xx_to_item32_sc16<uhd::htonx>(input, output, 2, 1.0);
+            i += 2;
+            // do faster processing of the remaining samples now that we are 16-byte
+            // aligned
+            CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(_, u_)
+            break;
+        default:
+            // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+            // load
+            CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_, u_)
     }
 
     // convert any remaining samples
-    xx_to_item32_sc16<uhd::htonx>(input+i, output+i, nsamps-i, 1.0);
+    xx_to_item32_sc16<uhd::htonx>(input + i, output + i, nsamps - i, 1.0);
 }
 
-DECLARE_CONVERTER(sc16_item32_le, 1, sc16, 1, PRIORITY_SIMD){
-    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
-    sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]);
+DECLARE_CONVERTER(sc16_item32_le, 1, sc16, 1, PRIORITY_SIMD)
+{
+    const item32_t* input = reinterpret_cast<const item32_t*>(inputs[0]);
+    sc16_t* output        = reinterpret_cast<sc16_t*>(outputs[0]);
 
     size_t i = 0;
 
     // need to dispatch according to alignment for fastest conversion
-    switch (size_t(output) & 0xf){
-    case 0x0:
-        // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
-        CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_,_)
-        break;
-    case 0x8:
-        if (nsamps < 2)
+    switch (size_t(output) & 0xf) {
+        case 0x0:
+            // the data is 16-byte aligned, so do the fast processing of the bulk of the
+            // samples
+            CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_, _)
+            break;
+        case 0x8:
+            if (nsamps < 2)
+                break;
+            // the first sample is 8-byte aligned - process it to align the remainder of
+            // the samples to 16-bytes
+            item32_sc16_to_xx<uhd::htowx>(input, output, 2, 1.0);
+            i += 2;
+            // do faster processing of the bulk of the samples now that we are 16-byte
+            // aligned
+            CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_, _)
             break;
-        // the first sample is 8-byte aligned - process it to align the remainder of the samples to 16-bytes
-        item32_sc16_to_xx<uhd::htowx>(input, output, 2, 1.0);
-        i += 2;
-        // do faster processing of the bulk of the samples now that we are 16-byte aligned
-        CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_,_)
-        break;
-    default:
-        // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load and store
-        CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_,u_)
+        default:
+            // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+            // load and store
+            CONVERT_SC16_1_TO_SC16_1_NSWAP_GUTS(u_, u_)
     }
 
     // convert any remaining samples
-    item32_sc16_to_xx<uhd::htowx>(input+i, output+i, nsamps-i, 1.0);
+    item32_sc16_to_xx<uhd::htowx>(input + i, output + i, nsamps - i, 1.0);
 }
 
-DECLARE_CONVERTER(sc16_item32_be, 1, sc16, 1, PRIORITY_SIMD){
-    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
-    sc16_t *output = reinterpret_cast<sc16_t *>(outputs[0]);
+DECLARE_CONVERTER(sc16_item32_be, 1, sc16, 1, PRIORITY_SIMD)
+{
+    const item32_t* input = reinterpret_cast<const item32_t*>(inputs[0]);
+    sc16_t* output        = reinterpret_cast<sc16_t*>(outputs[0]);
 
     size_t i = 0;
 
     // need to dispatch according to alignment for fastest conversion
-    switch (size_t(output) & 0xf){
-    case 0x0:
-        // the data is 16-byte aligned, so do the fast processing of the bulk of the samples
-        CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_,_)
-        break;
-    case 0x8:
-        if (nsamps < 2)
+    switch (size_t(output) & 0xf) {
+        case 0x0:
+            // the data is 16-byte aligned, so do the fast processing of the bulk of the
+            // samples
+            CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_, _)
+            break;
+        case 0x8:
+            if (nsamps < 2)
+                break;
+            // the first sample is 8-byte aligned - process it to align the remainder of
+            // the samples to 16-bytes
+            item32_sc16_to_xx<uhd::htonx>(input, output, 2, 1.0);
+            i += 2;
+            // do faster processing of the bulk of the samples now that we are 16-byte
+            // aligned
+            CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_, _)
             break;
-        // the first sample is 8-byte aligned - process it to align the remainder of the samples to 16-bytes
-        item32_sc16_to_xx<uhd::htonx>(input, output, 2, 1.0);
-        i += 2;
-        // do faster processing of the bulk of the samples now that we are 16-byte aligned
-        CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_,_)
-        break;
-    default:
-        // we are not 8 or 16-byte aligned, so do fast processing with the unaligned load and store
-        CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_,u_)
+        default:
+            // we are not 8 or 16-byte aligned, so do fast processing with the unaligned
+            // load and store
+            CONVERT_SC16_1_TO_SC16_1_BSWAP_GUTS(u_, u_)
     }
 
     // convert any remaining samples
-    item32_sc16_to_xx<uhd::htonx>(input+i, output+i, nsamps-i, 1.0);
+    item32_sc16_to_xx<uhd::htonx>(input + i, output + i, nsamps - i, 1.0);
 }
diff --git a/host/lib/convert/sse2_sc8_to_fc32.cpp b/host/lib/convert/sse2_sc8_to_fc32.cpp
index 6d68850bf..aefda2b13 100644
--- a/host/lib/convert/sse2_sc8_to_fc32.cpp
+++ b/host/lib/convert/sse2_sc8_to_fc32.cpp
@@ -14,109 +14,111 @@ using namespace uhd::convert;
 static const __m128i zeroi = _mm_setzero_si128();
 
 template <const int shuf>
-UHD_INLINE void unpack_sc32_4x(
-    const __m128i &in,
-    __m128 &out0, __m128 &out1,
-    __m128 &out2, __m128 &out3,
-    const __m128 &scalar
-){
+UHD_INLINE void unpack_sc32_4x(const __m128i& in,
+    __m128& out0,
+    __m128& out1,
+    __m128& out2,
+    __m128& out3,
+    const __m128& scalar)
+{
     const __m128i tmplo = _mm_unpacklo_epi8(zeroi, in); /* value in upper 8 bits */
-    __m128i tmp0 = _mm_shuffle_epi32(_mm_unpacklo_epi16(zeroi, tmplo), shuf); /* value in upper 16 bits */
+    __m128i tmp0        = _mm_shuffle_epi32(
+        _mm_unpacklo_epi16(zeroi, tmplo), shuf); /* value in upper 16 bits */
     __m128i tmp1 = _mm_shuffle_epi32(_mm_unpackhi_epi16(zeroi, tmplo), shuf);
-    out0 = _mm_mul_ps(_mm_cvtepi32_ps(tmp0), scalar);
-    out1 = _mm_mul_ps(_mm_cvtepi32_ps(tmp1), scalar);
+    out0         = _mm_mul_ps(_mm_cvtepi32_ps(tmp0), scalar);
+    out1         = _mm_mul_ps(_mm_cvtepi32_ps(tmp1), scalar);
 
     const __m128i tmphi = _mm_unpackhi_epi8(zeroi, in);
-    __m128i tmp2 = _mm_shuffle_epi32(_mm_unpacklo_epi16(zeroi, tmphi), shuf);
-    __m128i tmp3 = _mm_shuffle_epi32(_mm_unpackhi_epi16(zeroi, tmphi), shuf);
-    out2 = _mm_mul_ps(_mm_cvtepi32_ps(tmp2), scalar);
-    out3 = _mm_mul_ps(_mm_cvtepi32_ps(tmp3), scalar);
+    __m128i tmp2        = _mm_shuffle_epi32(_mm_unpacklo_epi16(zeroi, tmphi), shuf);
+    __m128i tmp3        = _mm_shuffle_epi32(_mm_unpackhi_epi16(zeroi, tmphi), shuf);
+    out2                = _mm_mul_ps(_mm_cvtepi32_ps(tmp2), scalar);
+    out3                = _mm_mul_ps(_mm_cvtepi32_ps(tmp3), scalar);
 }
 
-DECLARE_CONVERTER(sc8_item32_be, 1, fc32, 1, PRIORITY_SIMD){
-    const item32_t *input = reinterpret_cast<const item32_t *>(size_t(inputs[0]) & ~0x3);
-    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+DECLARE_CONVERTER(sc8_item32_be, 1, fc32, 1, PRIORITY_SIMD)
+{
+    const item32_t* input = reinterpret_cast<const item32_t*>(size_t(inputs[0]) & ~0x3);
+    fc32_t* output        = reinterpret_cast<fc32_t*>(outputs[0]);
 
-    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 24));
-    const int shuf = _MM_SHUFFLE(3, 2, 1, 0);
+    const __m128 scalar = _mm_set_ps1(float(scale_factor) / (1 << 24));
+    const int shuf      = _MM_SHUFFLE(3, 2, 1, 0);
 
     size_t i = 0, j = 0;
     fc32_t dummy;
     size_t num_samps = nsamps;
 
-    if ((size_t(inputs[0]) & 0x3) != 0){
+    if ((size_t(inputs[0]) & 0x3) != 0) {
         item32_sc8_to_xx<uhd::ntohx>(input++, output++, 1, scale_factor);
         num_samps--;
     }
 
-    #define convert_sc8_item32_1_to_fc32_1_bswap_guts(_al_)             \
-    for (; j+7 < num_samps; j+=8, i+=4){                                \
-        /* load from input */                                           \
-        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
-                                                                        \
-        /* unpack + swap 8-bit pairs */                                 \
-        __m128 tmp0, tmp1, tmp2, tmp3;                                  \
-        unpack_sc32_4x<shuf>(tmpi, tmp0, tmp1, tmp2, tmp3, scalar); \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+0), tmp0); \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+2), tmp1); \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+4), tmp2); \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+6), tmp3); \
+#define convert_sc8_item32_1_to_fc32_1_bswap_guts(_al_)                              \
+    for (; j + 7 < num_samps; j += 8, i += 4) {                                      \
+        /* load from input */                                                        \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i*>(input + i)); \
+                                                                                     \
+        /* unpack + swap 8-bit pairs */                                              \
+        __m128 tmp0, tmp1, tmp2, tmp3;                                               \
+        unpack_sc32_4x<shuf>(tmpi, tmp0, tmp1, tmp2, tmp3, scalar);                  \
+                                                                                     \
+        /* store to output */                                                        \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + j + 0), tmp0);         \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + j + 2), tmp1);         \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + j + 4), tmp2);         \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + j + 6), tmp3);         \
     }
 
-    //dispatch according to alignment
-    if ((size_t(output) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(output) & 0xf) == 0) {
         convert_sc8_item32_1_to_fc32_1_bswap_guts(_)
-    }
-    else{
+    } else {
         convert_sc8_item32_1_to_fc32_1_bswap_guts(u_)
     }
 
-    //convert remainder
-    item32_sc8_to_xx<uhd::ntohx>(input+i, output+j, num_samps-j, scale_factor);
+    // convert remainder
+    item32_sc8_to_xx<uhd::ntohx>(input + i, output + j, num_samps - j, scale_factor);
 }
 
-DECLARE_CONVERTER(sc8_item32_le, 1, fc32, 1, PRIORITY_SIMD){
-    const item32_t *input = reinterpret_cast<const item32_t *>(size_t(inputs[0]) & ~0x3);
-    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
+DECLARE_CONVERTER(sc8_item32_le, 1, fc32, 1, PRIORITY_SIMD)
+{
+    const item32_t* input = reinterpret_cast<const item32_t*>(size_t(inputs[0]) & ~0x3);
+    fc32_t* output        = reinterpret_cast<fc32_t*>(outputs[0]);
 
-    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 24));
-    const int shuf = _MM_SHUFFLE(0, 1, 2, 3);
+    const __m128 scalar = _mm_set_ps1(float(scale_factor) / (1 << 24));
+    const int shuf      = _MM_SHUFFLE(0, 1, 2, 3);
 
     size_t i = 0, j = 0;
     fc32_t dummy;
     size_t num_samps = nsamps;
 
-    if ((size_t(inputs[0]) & 0x3) != 0){
+    if ((size_t(inputs[0]) & 0x3) != 0) {
         item32_sc8_to_xx<uhd::wtohx>(input++, output++, 1, scale_factor);
         num_samps--;
     }
 
-    #define convert_sc8_item32_1_to_fc32_1_nswap_guts(_al_)             \
-    for (; j+7 < num_samps; j+=8, i+=4){                                \
-        /* load from input */                                           \
-        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
-                                                                        \
-        /* unpack + swap 8-bit pairs */                                 \
-        __m128 tmp0, tmp1, tmp2, tmp3;                                  \
-        unpack_sc32_4x<shuf>(tmpi, tmp0, tmp1, tmp2, tmp3, scalar); \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+0), tmp0); \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+2), tmp1); \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+4), tmp2); \
-        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+j+6), tmp3); \
+#define convert_sc8_item32_1_to_fc32_1_nswap_guts(_al_)                              \
+    for (; j + 7 < num_samps; j += 8, i += 4) {                                      \
+        /* load from input */                                                        \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i*>(input + i)); \
+                                                                                     \
+        /* unpack + swap 8-bit pairs */                                              \
+        __m128 tmp0, tmp1, tmp2, tmp3;                                               \
+        unpack_sc32_4x<shuf>(tmpi, tmp0, tmp1, tmp2, tmp3, scalar);                  \
+                                                                                     \
+        /* store to output */                                                        \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + j + 0), tmp0);         \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + j + 2), tmp1);         \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + j + 4), tmp2);         \
+        _mm_store##_al_##ps(reinterpret_cast<float*>(output + j + 6), tmp3);         \
     }
 
-    //dispatch according to alignment
-    if ((size_t(output) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(output) & 0xf) == 0) {
         convert_sc8_item32_1_to_fc32_1_nswap_guts(_)
-    }
-    else{
+    } else {
         convert_sc8_item32_1_to_fc32_1_nswap_guts(u_)
     }
 
-    //convert remainder
-    item32_sc8_to_xx<uhd::wtohx>(input+i, output+j, num_samps-j, scale_factor);
+    // convert remainder
+    item32_sc8_to_xx<uhd::wtohx>(input + i, output + j, num_samps - j, scale_factor);
 }
diff --git a/host/lib/convert/sse2_sc8_to_fc64.cpp b/host/lib/convert/sse2_sc8_to_fc64.cpp
index f5b406152..3cc2fefd0 100644
--- a/host/lib/convert/sse2_sc8_to_fc64.cpp
+++ b/host/lib/convert/sse2_sc8_to_fc64.cpp
@@ -13,129 +13,133 @@ using namespace uhd::convert;
 
 static const __m128i zeroi = _mm_setzero_si128();
 
-UHD_INLINE void unpack_sc32_8x(
-    const __m128i &in,
-    __m128d &out0, __m128d &out1,
-    __m128d &out2, __m128d &out3,
-    __m128d &out4, __m128d &out5,
-    __m128d &out6, __m128d &out7,
-    const __m128d &scalar
-){
+UHD_INLINE void unpack_sc32_8x(const __m128i& in,
+    __m128d& out0,
+    __m128d& out1,
+    __m128d& out2,
+    __m128d& out3,
+    __m128d& out4,
+    __m128d& out5,
+    __m128d& out6,
+    __m128d& out7,
+    const __m128d& scalar)
+{
     const int shuf = _MM_SHUFFLE(1, 0, 3, 2);
     __m128i tmp;
 
     const __m128i tmplo = _mm_unpacklo_epi8(zeroi, in); /* value in upper 8 bits */
-    tmp = _mm_unpacklo_epi16(zeroi, tmplo); /* value in upper 16 bits */
-    out0 = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
-    tmp = _mm_shuffle_epi32(tmp, shuf);
-    out1 = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
-    tmp = _mm_unpackhi_epi16(zeroi, tmplo);
-    out2 = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
-    tmp = _mm_shuffle_epi32(tmp, shuf);
-    out3 = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
+    tmp                 = _mm_unpacklo_epi16(zeroi, tmplo); /* value in upper 16 bits */
+    out0                = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
+    tmp                 = _mm_shuffle_epi32(tmp, shuf);
+    out1                = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
+    tmp                 = _mm_unpackhi_epi16(zeroi, tmplo);
+    out2                = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
+    tmp                 = _mm_shuffle_epi32(tmp, shuf);
+    out3                = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
 
     const __m128i tmphi = _mm_unpackhi_epi8(zeroi, in);
-    tmp = _mm_unpacklo_epi16(zeroi, tmphi);
-    out4 = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
-    tmp = _mm_shuffle_epi32(tmp, shuf);
-    out5 = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
-    tmp = _mm_unpackhi_epi16(zeroi, tmphi);
-    out6 = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
-    tmp = _mm_shuffle_epi32(tmp, shuf);
-    out7 = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
+    tmp                 = _mm_unpacklo_epi16(zeroi, tmphi);
+    out4                = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
+    tmp                 = _mm_shuffle_epi32(tmp, shuf);
+    out5                = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
+    tmp                 = _mm_unpackhi_epi16(zeroi, tmphi);
+    out6                = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
+    tmp                 = _mm_shuffle_epi32(tmp, shuf);
+    out7                = _mm_mul_pd(_mm_cvtepi32_pd(tmp), scalar);
 }
 
-DECLARE_CONVERTER(sc8_item32_be, 1, fc64, 1, PRIORITY_SIMD){
-    const item32_t *input = reinterpret_cast<const item32_t *>(size_t(inputs[0]) & ~0x3);
-    fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]);
+DECLARE_CONVERTER(sc8_item32_be, 1, fc64, 1, PRIORITY_SIMD)
+{
+    const item32_t* input = reinterpret_cast<const item32_t*>(size_t(inputs[0]) & ~0x3);
+    fc64_t* output        = reinterpret_cast<fc64_t*>(outputs[0]);
 
-    const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 24));
+    const __m128d scalar = _mm_set1_pd(scale_factor / (1 << 24));
 
     size_t i = 0, j = 0;
     fc32_t dummy;
     size_t num_samps = nsamps;
 
-    if ((size_t(inputs[0]) & 0x3) != 0){
+    if ((size_t(inputs[0]) & 0x3) != 0) {
         item32_sc8_to_xx<uhd::ntohx>(input++, output++, 1, scale_factor);
         num_samps--;
     }
 
-    #define convert_sc8_item32_1_to_fc64_1_bswap_guts(_al_)             \
-    for (; j+7 < num_samps; j+=8, i+=4){                                \
-        /* load from input */                                           \
-        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
-                                                                        \
-        /* unpack */                                                    \
-        __m128d tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;         \
+#define convert_sc8_item32_1_to_fc64_1_bswap_guts(_al_)                               \
+    for (; j + 7 < num_samps; j += 8, i += 4) {                                       \
+        /* load from input */                                                         \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i*>(input + i));  \
+                                                                                      \
+        /* unpack */                                                                  \
+        __m128d tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;                       \
         unpack_sc32_8x(tmpi, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, scalar); \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+0), tmp0); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+1), tmp1); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+2), tmp2); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+3), tmp3); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+4), tmp4); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+5), tmp5); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+6), tmp6); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+7), tmp7); \
+                                                                                      \
+        /* store to output */                                                         \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 0), tmp0);         \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 1), tmp1);         \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 2), tmp2);         \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 3), tmp3);         \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 4), tmp4);         \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 5), tmp5);         \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 6), tmp6);         \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 7), tmp7);         \
     }
 
-    //dispatch according to alignment
-    if ((size_t(output) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(output) & 0xf) == 0) {
         convert_sc8_item32_1_to_fc64_1_bswap_guts(_)
-    }
-    else{
+    } else {
         convert_sc8_item32_1_to_fc64_1_bswap_guts(u_)
     }
 
-    //convert remainder
-    item32_sc8_to_xx<uhd::ntohx>(input+i, output+j, num_samps-j, scale_factor);
+    // convert remainder
+    item32_sc8_to_xx<uhd::ntohx>(input + i, output + j, num_samps - j, scale_factor);
 }
 
-DECLARE_CONVERTER(sc8_item32_le, 1, fc64, 1, PRIORITY_SIMD){
-    const item32_t *input = reinterpret_cast<const item32_t *>(size_t(inputs[0]) & ~0x3);
-    fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]);
+DECLARE_CONVERTER(sc8_item32_le, 1, fc64, 1, PRIORITY_SIMD)
+{
+    const item32_t* input = reinterpret_cast<const item32_t*>(size_t(inputs[0]) & ~0x3);
+    fc64_t* output        = reinterpret_cast<fc64_t*>(outputs[0]);
 
-    const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 24));
+    const __m128d scalar = _mm_set1_pd(scale_factor / (1 << 24));
 
     size_t i = 0, j = 0;
     fc32_t dummy;
     size_t num_samps = nsamps;
 
-    if ((size_t(inputs[0]) & 0x3) != 0){
+    if ((size_t(inputs[0]) & 0x3) != 0) {
         item32_sc8_to_xx<uhd::wtohx>(input++, output++, 1, scale_factor);
         num_samps--;
     }
 
-    #define convert_sc8_item32_1_to_fc64_1_nswap_guts(_al_)             \
-    for (; j+7 < num_samps; j+=8, i+=4){                                \
-        /* load from input */                                           \
-        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
-                                                                        \
-        /* unpack */                                                    \
-        __m128d tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;         \
-        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); /*byteswap*/\
-        unpack_sc32_8x(tmpi, tmp1, tmp0, tmp3, tmp2, tmp5, tmp4, tmp7, tmp6, scalar); \
-                                                                        \
-        /* store to output */                                           \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+0), tmp0); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+1), tmp1); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+2), tmp2); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+3), tmp3); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+4), tmp4); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+5), tmp5); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+6), tmp6); \
-        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+j+7), tmp7); \
+#define convert_sc8_item32_1_to_fc64_1_nswap_guts(_al_)                                  \
+    for (; j + 7 < num_samps; j += 8, i += 4) {                                          \
+        /* load from input */                                                            \
+        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i*>(input + i));     \
+                                                                                         \
+        /* unpack */                                                                     \
+        __m128d tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;                          \
+        tmpi =                                                                           \
+            _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); /*byteswap*/ \
+        unpack_sc32_8x(tmpi, tmp1, tmp0, tmp3, tmp2, tmp5, tmp4, tmp7, tmp6, scalar);    \
+                                                                                         \
+        /* store to output */                                                            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 0), tmp0);            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 1), tmp1);            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 2), tmp2);            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 3), tmp3);            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 4), tmp4);            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 5), tmp5);            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 6), tmp6);            \
+        _mm_store##_al_##pd(reinterpret_cast<double*>(output + j + 7), tmp7);            \
     }
 
-    //dispatch according to alignment
-    if ((size_t(output) & 0xf) == 0){
+    // dispatch according to alignment
+    if ((size_t(output) & 0xf) == 0) {
         convert_sc8_item32_1_to_fc64_1_nswap_guts(_)
-    }
-    else{
+    } else {
         convert_sc8_item32_1_to_fc64_1_nswap_guts(u_)
     }
 
-    //convert remainder
-    item32_sc8_to_xx<uhd::wtohx>(input+i, output+j, num_samps-j, scale_factor);
+    // convert remainder
+    item32_sc8_to_xx<uhd::wtohx>(input + i, output + j, num_samps - j, scale_factor);
 }
author	Martin Braun <martin.braun@ettus.com>	2019-07-18 15:36:11 -0700
committer	Martin Braun <martin.braun@ettus.com>	2019-11-26 11:49:10 -0800
commit	9df26a9d89ef8fb50a667428066f3ef1732245c9 (patch)
tree	aa8aa5adf1c40e0aecb3e45a527511af96e05ca1 /host/lib/convert
parent	fed32af0806a730e0f4202003dc49cb736c832fb (diff)
download	uhd-9df26a9d89ef8fb50a667428066f3ef1732245c9.tar.gz uhd-9df26a9d89ef8fb50a667428066f3ef1732245c9.tar.bz2 uhd-9df26a9d89ef8fb50a667428066f3ef1732245c9.zip