aboutsummaryrefslogtreecommitdiffstats
path: root/sumsq_sse2.c
diff options
context:
space:
mode:
authorMatthias P. Braendli <matthias.braendli@mpb.li>2014-01-02 21:55:13 +0100
committerMatthias P. Braendli <matthias.braendli@mpb.li>2014-01-02 21:55:13 +0100
commita31630e0d5b9880c716d9004ef4154396ba41ebc (patch)
treeaebbd3b132e5f2dd31bc34750ccded2378fc687a /sumsq_sse2.c
parent9aaac5be9db5e1537badc65242412ef14c5096e3 (diff)
downloadka9q-fec-a31630e0d5b9880c716d9004ef4154396ba41ebc.tar.gz
ka9q-fec-a31630e0d5b9880c716d9004ef4154396ba41ebc.tar.bz2
ka9q-fec-a31630e0d5b9880c716d9004ef4154396ba41ebc.zip
Extract fec-3.0.1
Diffstat (limited to 'sumsq_sse2.c')
-rw-r--r--sumsq_sse2.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/sumsq_sse2.c b/sumsq_sse2.c
new file mode 100644
index 0000000..b05d2e9
--- /dev/null
+++ b/sumsq_sse2.c
@@ -0,0 +1,33 @@
+/* Compute the sum of the squares of a vector of signed shorts
+
+ * The SSE2 and MMX assist routines both operate on multiples of
+ * 8 words; they differ only in their alignment requirements (8 bytes
+ * for MMX, 16 bytes for SSE2)
+
+ * Copyright 2004 Phil Karn, KA9Q
+ * May be used under the terms of the GNU Lesser Public License (LGPL)
+ */
+
+long long sumsq_sse2_assist(signed short *,int);
+
+long long sumsq_sse2(signed short *in,int cnt){
+ long long sum = 0;
+
+ /* Handle stuff before the next 8-byte boundary */
+ while(((int)in & 15) != 0 && cnt != 0){
+ sum += (long)in[0] * in[0];
+ in++;
+ cnt--;
+ }
+ sum += sumsq_sse2_assist(in,cnt);
+ in += cnt & ~7;
+ cnt &= 7;
+
+ /* Handle up to 7 trailing words */
+ while(cnt != 0){
+ sum += (long)in[0] * in[0];
+ in++;
+ cnt--;
+ }
+ return sum;
+}