diff options
author | Matthias P. Braendli <matthias.braendli@mpb.li> | 2014-01-02 21:55:13 +0100 |
---|---|---|
committer | Matthias P. Braendli <matthias.braendli@mpb.li> | 2014-01-02 21:55:13 +0100 |
commit | a31630e0d5b9880c716d9004ef4154396ba41ebc (patch) | |
tree | aebbd3b132e5f2dd31bc34750ccded2378fc687a /sumsq_sse2.c | |
parent | 9aaac5be9db5e1537badc65242412ef14c5096e3 (diff) | |
download | ka9q-fec-a31630e0d5b9880c716d9004ef4154396ba41ebc.tar.gz ka9q-fec-a31630e0d5b9880c716d9004ef4154396ba41ebc.tar.bz2 ka9q-fec-a31630e0d5b9880c716d9004ef4154396ba41ebc.zip |
Extract fec-3.0.1
Diffstat (limited to 'sumsq_sse2.c')
-rw-r--r-- | sumsq_sse2.c | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/sumsq_sse2.c b/sumsq_sse2.c new file mode 100644 index 0000000..b05d2e9 --- /dev/null +++ b/sumsq_sse2.c @@ -0,0 +1,33 @@ +/* Compute the sum of the squares of a vector of signed shorts + + * The SSE2 and MMX assist routines both operate on multiples of + * 8 words; they differ only in their alignment requirements (8 bytes + * for MMX, 16 bytes for SSE2) + + * Copyright 2004 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser Public License (LGPL) + */ + +long long sumsq_sse2_assist(signed short *,int); + +long long sumsq_sse2(signed short *in,int cnt){ + long long sum = 0; + + /* Handle stuff before the next 8-byte boundary */ + while(((int)in & 15) != 0 && cnt != 0){ + sum += (long)in[0] * in[0]; + in++; + cnt--; + } + sum += sumsq_sse2_assist(in,cnt); + in += cnt & ~7; + cnt &= 7; + + /* Handle up to 7 trailing words */ + while(cnt != 0){ + sum += (long)in[0] * in[0]; + in++; + cnt--; + } + return sum; +} |