From 289ca8255ec7341530327a4b118372276cc3147e Mon Sep 17 00:00:00 2001 From: "Matthias P. Braendli" Date: Thu, 25 Dec 2014 15:56:46 +0100 Subject: FormatConverter: correct SSE code --- src/FormatConverter.cpp | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/FormatConverter.cpp b/src/FormatConverter.cpp index 4f7d95c..8b510bd 100644 --- a/src/FormatConverter.cpp +++ b/src/FormatConverter.cpp @@ -35,6 +35,9 @@ #include #include +#ifdef __SSE__ +# include +#endif FormatConverter::FormatConverter(void) : ModCodec(ModFormat(sizeof(complexf)), @@ -50,13 +53,20 @@ int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut) dataOut->setLength(sizeIn * sizeof(int8_t)); float* in = reinterpret_cast(dataIn->getData()); - int8_t* out = reinterpret_cast(dataOut->getData()); -#if 0 - // WARNING: Untested Code Ahead +#ifdef __SSE__ + /* + _mm_cvtps_pi8 does: + |<----------- 128 bits ------------>| + __m128 | I1 | Q1 | I2 | Q2 | in float + __m64 |I1Q1I2Q2|00000000| in int8_t + */ + + uint32_t* out = reinterpret_cast(dataOut->getData()); + assert(sizeIn % 16 == 0); assert((uintptr_t)in % 16 == 0); - for(int i = 0; i < sizeIn; i+=16) + for(size_t i = 0, j = 0; i < sizeIn; i+=16, j+=4) { __m128 a1 = _mm_load_ps(in+i+0); __m128 a2 = _mm_load_ps(in+i+4); @@ -66,23 +76,17 @@ int FormatConverter::process(Buffer* const dataIn, Buffer* dataOut) __m64 b2 = _mm_cvtps_pi8(a2); __m64 b3 = _mm_cvtps_pi8(a3); __m64 b4 = _mm_cvtps_pi8(a4); - _mm_store_ps(out+i+0, b1); - _mm_store_ps(out+i+4, b2); - _mm_store_ps(out+i+8, b3); - _mm_store_ps(out+i+12, b4); + out[j+0] = b1[0]; + out[j+1] = b2[0]; + out[j+2] = b3[0]; + out[j+3] = b4[0]; } #else + int8_t* out = reinterpret_cast(dataOut->getData()); + // Slow implementation that uses _ftol() for (size_t i = 0; i < sizeIn; i++) { - if (in[i] > 127.0f) { - out[i] = 127; - } - else if (in[i] < -127.0f) { - out[i] = -127; - } - else { - out[i] = in[i]; - } + out[i] = in[i]; } #endif -- cgit v1.2.3