From a31630e0d5b9880c716d9004ef4154396ba41ebc Mon Sep 17 00:00:00 2001 From: "Matthias P. Braendli" Date: Thu, 2 Jan 2014 21:55:13 +0100 Subject: Extract fec-3.0.1 --- mmxbfly27.s | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 mmxbfly27.s (limited to 'mmxbfly27.s') diff --git a/mmxbfly27.s b/mmxbfly27.s new file mode 100644 index 0000000..4abbf48 --- /dev/null +++ b/mmxbfly27.s @@ -0,0 +1,148 @@ +/* Intel SIMD MMX implementation of Viterbi ACS butterflies + for 64-state (k=7) convolutional code + Copyright 2004 Phil Karn, KA9Q + This code may be used under the terms of the GNU Lesser General Public License (LGPL) + + int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ; +*/ + # MMX (64-bit SIMD) version + # requires Pentium-MMX, Pentium-II or better + + # These are offsets into struct v27, defined in viterbi27_mmx.c + .set DP,128 + .set OLDMETRICS,132 + .set NEWMETRICS,136 + .text + .global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2 + .type update_viterbi27_blk_mmx,@function + .align 16 + +update_viterbi27_blk_mmx: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %edx + pushl %ebx + + movl 8(%ebp),%edx # edx = vp + testl %edx,%edx + jnz 0f + movl -1,%eax + jmp err +0: movl OLDMETRICS(%edx),%esi # esi -> old metrics + movl NEWMETRICS(%edx),%edi # edi -> new metrics + movl DP(%edx),%edx # edx -> decisions + +1: movl 16(%ebp),%eax # eax = nbits + decl %eax + jl 2f # passed zero, we're done + movl %eax,16(%ebp) + + movl 12(%ebp),%ebx # ebx = syms + movw (%ebx),%ax # ax = second symbol : first symbol + addl $2,%ebx + movl %ebx,12(%ebp) + + movb %ah,%bl + andl $255,%eax + andl $255,%ebx + + # shift into first array index dimension slot + shll $5,%eax + shll $5,%ebx + + # each invocation of this macro will do 8 butterflies in parallel + .MACRO butterfly GROUP + # Compute branch metrics + movq (Mettab27_1+8*\GROUP)(%eax),%mm3 + movq fifteens,%mm0 + + paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3 + paddb ones,%mm3 # emulate pavgb - this may not be necessary + psrlq $1,%mm3 + pand %mm0,%mm3 + + movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0 + movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1 + movq %mm6,%mm1 + movq %mm2,%mm7 + + paddb %mm3,%mm6 + paddb %mm3,%mm2 + pxor %mm0,%mm3 # invert branch metric + paddb %mm3,%mm7 # path metric for inverted symbols + paddb %mm3,%mm1 + + # live registers 1 2 6 7 + # Compare mm6 and mm7; mm1 and mm2 + pxor %mm3,%mm3 + movq %mm6,%mm4 + movq %mm1,%mm5 + psubb %mm7,%mm4 # mm4 = mm6 - mm7 + psubb %mm2,%mm5 # mm5 = mm1 - mm2 + pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better) + pcmpgtb %mm3,%mm5 # mm5 = second set of decisions + + # live registers 1 2 4 5 6 7 + # select survivors + movq %mm4,%mm0 + pand %mm4,%mm7 + movq %mm5,%mm3 + pand %mm5,%mm2 + pandn %mm6,%mm0 + pandn %mm1,%mm3 + por %mm0,%mm7 # mm7 = first set of survivors + por %mm3,%mm2 # mm2 = second set of survivors + + # live registers 2 4 5 7 + # interleave & store decisions in mm4, mm5 + # interleave & store new branch metrics in mm2, mm7 + movq %mm4,%mm3 + movq %mm7,%mm0 + punpckhbw %mm5,%mm4 + punpcklbw %mm5,%mm3 + punpcklbw %mm2,%mm7 # interleave second 8 new metrics + punpckhbw %mm2,%mm0 # interleave first 8 new metrics + movq %mm4,(16*\GROUP+8)(%edx) + movq %mm3,(16*\GROUP)(%edx) + movq %mm7,(16*\GROUP)(%edi) + movq %mm0,(16*\GROUP+8)(%edi) + + .endm + +# invoke macro 4 times for a total of 32 butterflies + butterfly GROUP=0 + butterfly GROUP=1 + butterfly GROUP=2 + butterfly GROUP=3 + + addl $64,%edx # bump decision pointer + + # swap metrics + movl %esi,%eax + movl %edi,%esi + movl %eax,%edi + jmp 1b + +2: emms + movl 8(%ebp),%ebx # ebx = vp + # stash metric pointers + movl %esi,OLDMETRICS(%ebx) + movl %edi,NEWMETRICS(%ebx) + movl %edx,DP(%ebx) # stash incremented value of vp->dp + xorl %eax,%eax +err: popl %ebx + popl %edx + popl %edi + popl %esi + popl %ebp + ret + + .data + .align 8 +fifteens: + .byte 15,15,15,15,15,15,15,15 + + .align 8 +ones: .byte 1,1,1,1,1,1,1,1 -- cgit v1.2.3