From 4a757e64cbb513e6461e547f4d095b0539ae0b09 Mon Sep 17 00:00:00 2001
From: Philip Balister <philip@opensdr.com>
Date: Thu, 9 Sep 2010 15:55:35 -0400
Subject: Convert fc32_to_item32_nswap to use ARM NEON if available.

---
 host/lib/transport/CMakeLists.txt         |  6 ++++++
 host/lib/transport/convert_types_impl.hpp | 24 ++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/host/lib/transport/CMakeLists.txt b/host/lib/transport/CMakeLists.txt
index 753fd5e85..43449d732 100644
--- a/host/lib/transport/CMakeLists.txt
+++ b/host/lib/transport/CMakeLists.txt
@@ -45,6 +45,12 @@ IF(HAVE_EMMINTRIN_H)
     ADD_DEFINITIONS(-DHAVE_EMMINTRIN_H)
 ENDIF(HAVE_EMMINTRIN_H)
 
+INCLUDE(CheckIncludeFileCXX)
+CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_ARM_NEON_H)
+
+IF(HAVE_ARM_NEON_H)
+    ADD_DEFINITIONS(-DHAVE_ARM_NEON_H)
+ENDIF(HAVE_ARM_NEON_H)
 ########################################################################
 # Setup defines for interface address discovery
 ########################################################################
diff --git a/host/lib/transport/convert_types_impl.hpp b/host/lib/transport/convert_types_impl.hpp
index 90618dec6..6cfc8fed6 100644
--- a/host/lib/transport/convert_types_impl.hpp
+++ b/host/lib/transport/convert_types_impl.hpp
@@ -32,6 +32,14 @@
     #include <emmintrin.h>
 #endif
 
+#ifdef HAVE_ARM_NEON_H
+    #define USE_ARM_NEON_H
+#endif
+
+#if defined(USE_ARM_NEON_H)
+    #include <arm_neon.h>
+#endif
+
 /***********************************************************************
  * Typedefs
  **********************************************************************/
@@ -135,6 +143,22 @@ static UHD_INLINE void fc32_to_item32_nswap(
     }
 }
 
+#elif defined(USE_ARM_NEON_H)
+static UHD_INLINE void fc32_to_item32_nswap(
+    const fc32_t *input, item32_t *output, size_t nsamps)
+{
+    size_t i;
+
+    float32x4_t Q0 = vdupq_n_f32(shorts_per_float);
+    for (i=0; i < (nsamps & ~0x03); i+=4) {
+        float32x4_t Q1 = vld1q_f32(reinterpret_cast<const float *>(&input[i]));
+        float32x4_t Q2 = vmulq_f32(Q1, Q0);
+        int32x4_t Q3 = vcvtq_s32_f32(Q2);
+        int16x4_t D8 = vmovn_s32(Q3);
+        vst1_s16((reinterpret_cast<int16_t *>(&output[i])), D8);
+    }
+}
+
 #else
 static UHD_INLINE void fc32_to_item32_nswap(
     const fc32_t *input, item32_t *output, size_t nsamps
-- 
cgit v1.2.3