1*638691a0SAndroid Build Coastguard Worker/* Intel SIMD MMX implementation of Viterbi ACS butterflies 2*638691a0SAndroid Build Coastguard Worker for 64-state (k=7) convolutional code 3*638691a0SAndroid Build Coastguard Worker Copyright 2004 Phil Karn, KA9Q 4*638691a0SAndroid Build Coastguard Worker This code may be used under the terms of the GNU Lesser General Public License (LGPL) 5*638691a0SAndroid Build Coastguard Worker 6*638691a0SAndroid Build Coastguard Worker int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ; 7*638691a0SAndroid Build Coastguard Worker*/ 8*638691a0SAndroid Build Coastguard Worker # MMX (64-bit SIMD) version 9*638691a0SAndroid Build Coastguard Worker # requires Pentium-MMX, Pentium-II or better 10*638691a0SAndroid Build Coastguard Worker 11*638691a0SAndroid Build Coastguard Worker # These are offsets into struct v27, defined in viterbi27_mmx.c 12*638691a0SAndroid Build Coastguard Worker .set DP,128 13*638691a0SAndroid Build Coastguard Worker .set OLDMETRICS,132 14*638691a0SAndroid Build Coastguard Worker .set NEWMETRICS,136 15*638691a0SAndroid Build Coastguard Worker .text 16*638691a0SAndroid Build Coastguard Worker .global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2 17*638691a0SAndroid Build Coastguard Worker .type update_viterbi27_blk_mmx,@function 18*638691a0SAndroid Build Coastguard Worker .align 16 19*638691a0SAndroid Build Coastguard Worker 20*638691a0SAndroid Build Coastguard Workerupdate_viterbi27_blk_mmx: 21*638691a0SAndroid Build Coastguard Worker pushl %ebp 22*638691a0SAndroid Build Coastguard Worker movl %esp,%ebp 23*638691a0SAndroid Build Coastguard Worker pushl %esi 24*638691a0SAndroid Build Coastguard Worker pushl %edi 25*638691a0SAndroid Build Coastguard Worker pushl %edx 26*638691a0SAndroid Build Coastguard Worker pushl %ebx 27*638691a0SAndroid Build Coastguard Worker 28*638691a0SAndroid Build Coastguard Worker movl 8(%ebp),%edx # edx = vp 29*638691a0SAndroid Build Coastguard Worker testl %edx,%edx 30*638691a0SAndroid Build Coastguard Worker jnz 0f 31*638691a0SAndroid Build Coastguard Worker movl -1,%eax 32*638691a0SAndroid Build Coastguard Worker jmp err 33*638691a0SAndroid Build Coastguard Worker0: movl OLDMETRICS(%edx),%esi # esi -> old metrics 34*638691a0SAndroid Build Coastguard Worker movl NEWMETRICS(%edx),%edi # edi -> new metrics 35*638691a0SAndroid Build Coastguard Worker movl DP(%edx),%edx # edx -> decisions 36*638691a0SAndroid Build Coastguard Worker 37*638691a0SAndroid Build Coastguard Worker1: movl 16(%ebp),%eax # eax = nbits 38*638691a0SAndroid Build Coastguard Worker decl %eax 39*638691a0SAndroid Build Coastguard Worker jl 2f # passed zero, we're done 40*638691a0SAndroid Build Coastguard Worker movl %eax,16(%ebp) 41*638691a0SAndroid Build Coastguard Worker 42*638691a0SAndroid Build Coastguard Worker movl 12(%ebp),%ebx # ebx = syms 43*638691a0SAndroid Build Coastguard Worker movw (%ebx),%ax # ax = second symbol : first symbol 44*638691a0SAndroid Build Coastguard Worker addl $2,%ebx 45*638691a0SAndroid Build Coastguard Worker movl %ebx,12(%ebp) 46*638691a0SAndroid Build Coastguard Worker 47*638691a0SAndroid Build Coastguard Worker movb %ah,%bl 48*638691a0SAndroid Build Coastguard Worker andl $255,%eax 49*638691a0SAndroid Build Coastguard Worker andl $255,%ebx 50*638691a0SAndroid Build Coastguard Worker 51*638691a0SAndroid Build Coastguard Worker # shift into first array index dimension slot 52*638691a0SAndroid Build Coastguard Worker shll $5,%eax 53*638691a0SAndroid Build Coastguard Worker shll $5,%ebx 54*638691a0SAndroid Build Coastguard Worker 55*638691a0SAndroid Build Coastguard Worker # each invocation of this macro will do 8 butterflies in parallel 56*638691a0SAndroid Build Coastguard Worker .MACRO butterfly GROUP 57*638691a0SAndroid Build Coastguard Worker # Compute branch metrics 58*638691a0SAndroid Build Coastguard Worker movq (Mettab27_1+8*\GROUP)(%eax),%mm3 59*638691a0SAndroid Build Coastguard Worker movq fifteens,%mm0 60*638691a0SAndroid Build Coastguard Worker 61*638691a0SAndroid Build Coastguard Worker paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3 62*638691a0SAndroid Build Coastguard Worker paddb ones,%mm3 # emulate pavgb - this may not be necessary 63*638691a0SAndroid Build Coastguard Worker psrlq $1,%mm3 64*638691a0SAndroid Build Coastguard Worker pand %mm0,%mm3 65*638691a0SAndroid Build Coastguard Worker 66*638691a0SAndroid Build Coastguard Worker movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0 67*638691a0SAndroid Build Coastguard Worker movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1 68*638691a0SAndroid Build Coastguard Worker movq %mm6,%mm1 69*638691a0SAndroid Build Coastguard Worker movq %mm2,%mm7 70*638691a0SAndroid Build Coastguard Worker 71*638691a0SAndroid Build Coastguard Worker paddb %mm3,%mm6 72*638691a0SAndroid Build Coastguard Worker paddb %mm3,%mm2 73*638691a0SAndroid Build Coastguard Worker pxor %mm0,%mm3 # invert branch metric 74*638691a0SAndroid Build Coastguard Worker paddb %mm3,%mm7 # path metric for inverted symbols 75*638691a0SAndroid Build Coastguard Worker paddb %mm3,%mm1 76*638691a0SAndroid Build Coastguard Worker 77*638691a0SAndroid Build Coastguard Worker # live registers 1 2 6 7 78*638691a0SAndroid Build Coastguard Worker # Compare mm6 and mm7; mm1 and mm2 79*638691a0SAndroid Build Coastguard Worker pxor %mm3,%mm3 80*638691a0SAndroid Build Coastguard Worker movq %mm6,%mm4 81*638691a0SAndroid Build Coastguard Worker movq %mm1,%mm5 82*638691a0SAndroid Build Coastguard Worker psubb %mm7,%mm4 # mm4 = mm6 - mm7 83*638691a0SAndroid Build Coastguard Worker psubb %mm2,%mm5 # mm5 = mm1 - mm2 84*638691a0SAndroid Build Coastguard Worker pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better) 85*638691a0SAndroid Build Coastguard Worker pcmpgtb %mm3,%mm5 # mm5 = second set of decisions 86*638691a0SAndroid Build Coastguard Worker 87*638691a0SAndroid Build Coastguard Worker # live registers 1 2 4 5 6 7 88*638691a0SAndroid Build Coastguard Worker # select survivors 89*638691a0SAndroid Build Coastguard Worker movq %mm4,%mm0 90*638691a0SAndroid Build Coastguard Worker pand %mm4,%mm7 91*638691a0SAndroid Build Coastguard Worker movq %mm5,%mm3 92*638691a0SAndroid Build Coastguard Worker pand %mm5,%mm2 93*638691a0SAndroid Build Coastguard Worker pandn %mm6,%mm0 94*638691a0SAndroid Build Coastguard Worker pandn %mm1,%mm3 95*638691a0SAndroid Build Coastguard Worker por %mm0,%mm7 # mm7 = first set of survivors 96*638691a0SAndroid Build Coastguard Worker por %mm3,%mm2 # mm2 = second set of survivors 97*638691a0SAndroid Build Coastguard Worker 98*638691a0SAndroid Build Coastguard Worker # live registers 2 4 5 7 99*638691a0SAndroid Build Coastguard Worker # interleave & store decisions in mm4, mm5 100*638691a0SAndroid Build Coastguard Worker # interleave & store new branch metrics in mm2, mm7 101*638691a0SAndroid Build Coastguard Worker movq %mm4,%mm3 102*638691a0SAndroid Build Coastguard Worker movq %mm7,%mm0 103*638691a0SAndroid Build Coastguard Worker punpckhbw %mm5,%mm4 104*638691a0SAndroid Build Coastguard Worker punpcklbw %mm5,%mm3 105*638691a0SAndroid Build Coastguard Worker punpcklbw %mm2,%mm7 # interleave second 8 new metrics 106*638691a0SAndroid Build Coastguard Worker punpckhbw %mm2,%mm0 # interleave first 8 new metrics 107*638691a0SAndroid Build Coastguard Worker movq %mm4,(16*\GROUP+8)(%edx) 108*638691a0SAndroid Build Coastguard Worker movq %mm3,(16*\GROUP)(%edx) 109*638691a0SAndroid Build Coastguard Worker movq %mm7,(16*\GROUP)(%edi) 110*638691a0SAndroid Build Coastguard Worker movq %mm0,(16*\GROUP+8)(%edi) 111*638691a0SAndroid Build Coastguard Worker 112*638691a0SAndroid Build Coastguard Worker .endm 113*638691a0SAndroid Build Coastguard Worker 114*638691a0SAndroid Build Coastguard Worker# invoke macro 4 times for a total of 32 butterflies 115*638691a0SAndroid Build Coastguard Worker butterfly GROUP=0 116*638691a0SAndroid Build Coastguard Worker butterfly GROUP=1 117*638691a0SAndroid Build Coastguard Worker butterfly GROUP=2 118*638691a0SAndroid Build Coastguard Worker butterfly GROUP=3 119*638691a0SAndroid Build Coastguard Worker 120*638691a0SAndroid Build Coastguard Worker addl $64,%edx # bump decision pointer 121*638691a0SAndroid Build Coastguard Worker 122*638691a0SAndroid Build Coastguard Worker # swap metrics 123*638691a0SAndroid Build Coastguard Worker movl %esi,%eax 124*638691a0SAndroid Build Coastguard Worker movl %edi,%esi 125*638691a0SAndroid Build Coastguard Worker movl %eax,%edi 126*638691a0SAndroid Build Coastguard Worker jmp 1b 127*638691a0SAndroid Build Coastguard Worker 128*638691a0SAndroid Build Coastguard Worker2: emms 129*638691a0SAndroid Build Coastguard Worker movl 8(%ebp),%ebx # ebx = vp 130*638691a0SAndroid Build Coastguard Worker # stash metric pointers 131*638691a0SAndroid Build Coastguard Worker movl %esi,OLDMETRICS(%ebx) 132*638691a0SAndroid Build Coastguard Worker movl %edi,NEWMETRICS(%ebx) 133*638691a0SAndroid Build Coastguard Worker movl %edx,DP(%ebx) # stash incremented value of vp->dp 134*638691a0SAndroid Build Coastguard Worker xorl %eax,%eax 135*638691a0SAndroid Build Coastguard Workererr: popl %ebx 136*638691a0SAndroid Build Coastguard Worker popl %edx 137*638691a0SAndroid Build Coastguard Worker popl %edi 138*638691a0SAndroid Build Coastguard Worker popl %esi 139*638691a0SAndroid Build Coastguard Worker popl %ebp 140*638691a0SAndroid Build Coastguard Worker ret 141*638691a0SAndroid Build Coastguard Worker 142*638691a0SAndroid Build Coastguard Worker .data 143*638691a0SAndroid Build Coastguard Worker .align 8 144*638691a0SAndroid Build Coastguard Workerfifteens: 145*638691a0SAndroid Build Coastguard Worker .byte 15,15,15,15,15,15,15,15 146*638691a0SAndroid Build Coastguard Worker 147*638691a0SAndroid Build Coastguard Worker .align 8 148*638691a0SAndroid Build Coastguard Workerones: .byte 1,1,1,1,1,1,1,1 149