diff options
Diffstat (limited to 'sumsq_mmx.c')
-rw-r--r-- | sumsq_mmx.c | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/sumsq_mmx.c b/sumsq_mmx.c new file mode 100644 index 0000000..e766831 --- /dev/null +++ b/sumsq_mmx.c @@ -0,0 +1,35 @@ +/* Compute the sum of the squares of a vector of signed shorts + + * MMX-assisted version (also used on SSE) + + * The SSE2 and MMX assist routines both operate on multiples of + * 8 words; they differ only in their alignment requirements (8 bytes + * for MMX, 16 bytes for SSE2) + + * Copyright 2004 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser Public License (LGPL) + */ + +long long sumsq_mmx_assist(signed short *,int); + +long long sumsq_mmx(signed short *in,int cnt){ + long long sum = 0; + + /* Handle stuff before the next 8-byte boundary */ + while(((int)in & 7) != 0 && cnt != 0){ + sum += (long)in[0] * in[0]; + in++; + cnt--; + } + sum += sumsq_mmx_assist(in,cnt); + in += cnt & ~7; + cnt &= 7; + + /* Handle up to 7 words at end */ + while(cnt != 0){ + sum += (long)in[0] * in[0]; + in++; + cnt--; + } + return sum; +} |