aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias P. Braendli <matthias.braendli@mpb.li>2024-11-11 09:42:47 +0100
committerMatthias P. Braendli <matthias.braendli@mpb.li>2024-11-11 09:42:47 +0100
commit767d69622770a4bb886f527eaa2e1e2a15a71309 (patch)
tree71fc194b4593dd9f258b5c56f096942260576965
parent51199fd8605368d62edc464e984f847ad5281aab (diff)
downloaddabmod-767d69622770a4bb886f527eaa2e1e2a15a71309.tar.gz
dabmod-767d69622770a4bb886f527eaa2e1e2a15a71309.tar.bz2
dabmod-767d69622770a4bb886f527eaa2e1e2a15a71309.zip
Use ARM NEON in FormatConverter
-rw-r--r--src/Buffer.h2
-rw-r--r--src/CicEqualizer.h3
-rw-r--r--src/FormatConverter.cpp43
-rw-r--r--src/Utils.cpp3
4 files changed, 38 insertions, 13 deletions
diff --git a/src/Buffer.h b/src/Buffer.h
index d5aa802..2c2a65e 100644
--- a/src/Buffer.h
+++ b/src/Buffer.h
@@ -43,7 +43,7 @@ typedef std::complex<fixed_16> complexfix;
typedef std::complex<fpm::fixed_16_16> complexfix_wide;
/* Buffer is a container for a byte array, which is memory-aligned
- * to 32 bytes for SSE performance.
+ * to 32 bytes for SIMD performance.
*
* The allocation/freeing of the data is handled internally.
*/
diff --git a/src/CicEqualizer.h b/src/CicEqualizer.h
index 70c3ae9..4510d0c 100644
--- a/src/CicEqualizer.h
+++ b/src/CicEqualizer.h
@@ -29,9 +29,6 @@
#include <vector>
#include <sys/types.h>
-#ifdef __SSE__
-# include <xmmintrin.h>
-#endif
class CicEqualizer : public ModCodec
{
diff --git a/src/FormatConverter.cpp b/src/FormatConverter.cpp
index 0821191..94dfa2c 100644
--- a/src/FormatConverter.cpp
+++ b/src/FormatConverter.cpp
@@ -28,11 +28,15 @@
#include "FormatConverter.h"
#include "PcDebug.h"
+#include "Log.h"
-#include <sys/types.h>
-#include <string.h>
#include <stdexcept>
+#include <cstring>
#include <assert.h>
+#include <sys/types.h>
+#if defined(__ARM_NEON)
+#include <arm_neon.h>
+#endif
FormatConverter::FormatConverter(bool input_is_complexfix_wide, const std::string& format_out) :
ModCodec(),
@@ -42,9 +46,16 @@ FormatConverter::FormatConverter(bool input_is_complexfix_wide, const std::strin
FormatConverter::~FormatConverter()
{
- etiLog.level(debug) << "FormatConverter: "
- << m_num_clipped_samples.load() <<
- " clipped samples";
+ if (
+#if defined(__ARM_NEON)
+ not m_input_complexfix_wide
+#else
+ true
+#endif
+ ) {
+ etiLog.level(debug) << "FormatConverter: " <<
+ m_num_clipped_samples.load() << " clipped";
+ }
}
@@ -56,16 +67,29 @@ int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut)
size_t num_clipped_samples = 0;
-
if (m_input_complexfix_wide) {
size_t sizeIn = dataIn->getLength() / sizeof(int32_t);
- int32_t* in = reinterpret_cast<int32_t*>(dataIn->getData());
if (m_format_out == "s16") {
dataOut->setLength(sizeIn * sizeof(int16_t));
+ const int32_t *in = reinterpret_cast<int32_t*>(dataIn->getData());
int16_t* out = reinterpret_cast<int16_t*>(dataOut->getData());
+ constexpr int shift = 7;
+
+#if defined(__ARM_NEON)
+ if (sizeIn % 4 != 0) {
+ throw std::logic_error("Unexpected length not multiple of 4");
+ }
+
+ for (size_t i = 0; i < sizeIn; i += 4) {
+ int32x4_t input_vec = vld1q_s32(&in[i]);
+ // Apply shift right, saturate on conversion to int16_t
+ int16x4_t output_vec = vqshrn_n_s32(input_vec, shift);
+ vst1_s16(&out[i], output_vec);
+ }
+#else
for (size_t i = 0; i < sizeIn; i++) {
- const int32_t val = in[i] >> 7;
+ const int32_t val = in[i] >> shift;
if (val < INT16_MIN) {
out[i] = INT16_MIN;
num_clipped_samples++;
@@ -78,6 +102,7 @@ int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut)
out[i] = val;
}
}
+#endif
}
else {
throw std::runtime_error("FormatConverter: Invalid fix format " + m_format_out);
@@ -85,7 +110,7 @@ int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut)
}
else {
size_t sizeIn = dataIn->getLength() / sizeof(float);
- float* in = reinterpret_cast<float*>(dataIn->getData());
+ const float* in = reinterpret_cast<float*>(dataIn->getData());
if (m_format_out == "s16") {
dataOut->setLength(sizeIn * sizeof(int16_t));
diff --git a/src/Utils.cpp b/src/Utils.cpp
index 0065bc1..f54122c 100644
--- a/src/Utils.cpp
+++ b/src/Utils.cpp
@@ -63,6 +63,9 @@ static void printHeader()
#if defined(__SSE__)
"SSE " <<
#endif
+#if defined(__ARM_NEON)
+ "NEON " <<
+#endif
"\n";
}