diff options
Diffstat (limited to 'sumsq_sse2.c')
-rw-r--r-- | sumsq_sse2.c | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/sumsq_sse2.c b/sumsq_sse2.c new file mode 100644 index 0000000..b05d2e9 --- /dev/null +++ b/sumsq_sse2.c @@ -0,0 +1,33 @@ +/* Compute the sum of the squares of a vector of signed shorts + + * The SSE2 and MMX assist routines both operate on multiples of + * 8 words; they differ only in their alignment requirements (8 bytes + * for MMX, 16 bytes for SSE2) + + * Copyright 2004 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser Public License (LGPL) + */ + +long long sumsq_sse2_assist(signed short *,int); + +long long sumsq_sse2(signed short *in,int cnt){ + long long sum = 0; + + /* Handle stuff before the next 8-byte boundary */ + while(((int)in & 15) != 0 && cnt != 0){ + sum += (long)in[0] * in[0]; + in++; + cnt--; + } + sum += sumsq_sse2_assist(in,cnt); + in += cnt & ~7; + cnt &= 7; + + /* Handle up to 7 trailing words */ + while(cnt != 0){ + sum += (long)in[0] * in[0]; + in++; + cnt--; + } + return sum; +} |