summaryrefslogtreecommitdiffstats
path: root/host/lib/transport
diff options
context:
space:
mode:
Diffstat (limited to 'host/lib/transport')
-rw-r--r--host/lib/transport/CMakeLists.txt6
-rw-r--r--host/lib/transport/convert_types_impl.hpp48
2 files changed, 54 insertions, 0 deletions
diff --git a/host/lib/transport/CMakeLists.txt b/host/lib/transport/CMakeLists.txt
index b9ec7a6ad..b95d46381 100644
--- a/host/lib/transport/CMakeLists.txt
+++ b/host/lib/transport/CMakeLists.txt
@@ -58,6 +58,12 @@ IF(HAVE_EMMINTRIN_H)
ADD_DEFINITIONS(-DHAVE_EMMINTRIN_H)
ENDIF(HAVE_EMMINTRIN_H)
+INCLUDE(CheckIncludeFileCXX)
+CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_ARM_NEON_H)
+
+IF(HAVE_ARM_NEON_H)
+ ADD_DEFINITIONS(-DHAVE_ARM_NEON_H)
+ENDIF(HAVE_ARM_NEON_H)
########################################################################
# Setup defines for interface address discovery
########################################################################
diff --git a/host/lib/transport/convert_types_impl.hpp b/host/lib/transport/convert_types_impl.hpp
index 90618dec6..48ff99725 100644
--- a/host/lib/transport/convert_types_impl.hpp
+++ b/host/lib/transport/convert_types_impl.hpp
@@ -32,6 +32,14 @@
#include <emmintrin.h>
#endif
+#ifdef HAVE_ARM_NEON_H
+ #define USE_ARM_NEON_H
+#endif
+
+#if defined(USE_ARM_NEON_H)
+ #include <arm_neon.h>
+#endif
+
/***********************************************************************
* Typedefs
**********************************************************************/
@@ -135,6 +143,26 @@ static UHD_INLINE void fc32_to_item32_nswap(
}
}
+#elif defined(USE_ARM_NEON_H)
+static UHD_INLINE void fc32_to_item32_nswap(
+ const fc32_t *input, item32_t *output, size_t nsamps)
+{
+ size_t i;
+
+ float32x4_t Q0 = vdupq_n_f32(shorts_per_float);
+ for (i=0; i < (nsamps & ~0x03); i+=2) {
+ float32x4_t Q1 = vld1q_f32(reinterpret_cast<const float *>(&input[i]));
+ float32x4_t Q2 = vmulq_f32(Q1, Q0);
+ int32x4_t Q3 = vcvtq_s32_f32(Q2);
+ int16x4_t D8 = vmovn_s32(Q3);
+ int16x4_t D9 = vrev32_s16(D8);
+ vst1_s16((reinterpret_cast<int16_t *>(&output[i])), D9);
+ }
+
+ for (; i < nsamps; i++)
+ output[i] = fc32_to_item32(input[i]);
+}
+
#else
static UHD_INLINE void fc32_to_item32_nswap(
const fc32_t *input, item32_t *output, size_t nsamps
@@ -238,6 +266,26 @@ static UHD_INLINE void item32_to_fc32_nswap(
}
}
+#elif defined(USE_ARM_NEON_H)
+static UHD_INLINE void item32_to_fc32_nswap(
+ const item32_t *input, fc32_t *output, size_t nsamps)
+{
+ size_t i;
+
+ float32x4_t Q1 = vdupq_n_f32(floats_per_short);
+ for (i=0; i < (nsamps & ~0x03); i+=2) {
+ int16x4_t D0 = vld1_s16(reinterpret_cast<const int16_t *>(&input[i]));
+ int16x4_t D1 = vrev32_s16(D0);
+ int32x4_t Q2 = vmovl_s16(D1);
+ float32x4_t Q3 = vcvtq_f32_s32(Q2);
+ float32x4_t Q4 = vmulq_f32(Q3, Q1);
+ vst1q_f32((reinterpret_cast<float *>(&output[i])), Q4);
+ }
+
+ for (; i < nsamps; i++)
+ output[i] = item32_to_fc32(input[i]);
+}
+
#else
static UHD_INLINE void item32_to_fc32_nswap(
const item32_t *input, fc32_t *output, size_t nsamps