xref: /aosp_15_r20/external/fec/mmxbfly27.s (revision 638691a093b4f9473cd6ee8f3e0139deef159a86)
1*638691a0SAndroid Build Coastguard Worker/* Intel SIMD MMX implementation of Viterbi ACS butterflies
2*638691a0SAndroid Build Coastguard Worker   for 64-state (k=7) convolutional code
3*638691a0SAndroid Build Coastguard Worker   Copyright 2004 Phil Karn, KA9Q
4*638691a0SAndroid Build Coastguard Worker   This code may be used under the terms of the GNU Lesser General Public License (LGPL)
5*638691a0SAndroid Build Coastguard Worker
6*638691a0SAndroid Build Coastguard Worker   int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ;
7*638691a0SAndroid Build Coastguard Worker*/
8*638691a0SAndroid Build Coastguard Worker	# MMX (64-bit SIMD) version
9*638691a0SAndroid Build Coastguard Worker	# requires Pentium-MMX, Pentium-II or better
10*638691a0SAndroid Build Coastguard Worker
11*638691a0SAndroid Build Coastguard Worker	# These are offsets into struct v27, defined in viterbi27_mmx.c
12*638691a0SAndroid Build Coastguard Worker	.set DP,128
13*638691a0SAndroid Build Coastguard Worker	.set OLDMETRICS,132
14*638691a0SAndroid Build Coastguard Worker	.set NEWMETRICS,136
15*638691a0SAndroid Build Coastguard Worker	.text
16*638691a0SAndroid Build Coastguard Worker	.global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2
17*638691a0SAndroid Build Coastguard Worker	.type update_viterbi27_blk_mmx,@function
18*638691a0SAndroid Build Coastguard Worker	.align 16
19*638691a0SAndroid Build Coastguard Worker
20*638691a0SAndroid Build Coastguard Workerupdate_viterbi27_blk_mmx:
21*638691a0SAndroid Build Coastguard Worker	pushl %ebp
22*638691a0SAndroid Build Coastguard Worker	movl %esp,%ebp
23*638691a0SAndroid Build Coastguard Worker	pushl %esi
24*638691a0SAndroid Build Coastguard Worker	pushl %edi
25*638691a0SAndroid Build Coastguard Worker	pushl %edx
26*638691a0SAndroid Build Coastguard Worker	pushl %ebx
27*638691a0SAndroid Build Coastguard Worker
28*638691a0SAndroid Build Coastguard Worker	movl 8(%ebp),%edx	# edx = vp
29*638691a0SAndroid Build Coastguard Worker	testl %edx,%edx
30*638691a0SAndroid Build Coastguard Worker	jnz  0f
31*638691a0SAndroid Build Coastguard Worker	movl -1,%eax
32*638691a0SAndroid Build Coastguard Worker	jmp  err
33*638691a0SAndroid Build Coastguard Worker0:	movl OLDMETRICS(%edx),%esi	# esi -> old metrics
34*638691a0SAndroid Build Coastguard Worker	movl NEWMETRICS(%edx),%edi	# edi -> new metrics
35*638691a0SAndroid Build Coastguard Worker	movl DP(%edx),%edx	# edx -> decisions
36*638691a0SAndroid Build Coastguard Worker
37*638691a0SAndroid Build Coastguard Worker1:	movl 16(%ebp),%eax	# eax = nbits
38*638691a0SAndroid Build Coastguard Worker	decl %eax
39*638691a0SAndroid Build Coastguard Worker	jl   2f			# passed zero, we're done
40*638691a0SAndroid Build Coastguard Worker	movl %eax,16(%ebp)
41*638691a0SAndroid Build Coastguard Worker
42*638691a0SAndroid Build Coastguard Worker	movl 12(%ebp),%ebx	# ebx = syms
43*638691a0SAndroid Build Coastguard Worker	movw (%ebx),%ax		# ax = second symbol : first symbol
44*638691a0SAndroid Build Coastguard Worker	addl $2,%ebx
45*638691a0SAndroid Build Coastguard Worker	movl %ebx,12(%ebp)
46*638691a0SAndroid Build Coastguard Worker
47*638691a0SAndroid Build Coastguard Worker	movb %ah,%bl
48*638691a0SAndroid Build Coastguard Worker	andl $255,%eax
49*638691a0SAndroid Build Coastguard Worker	andl $255,%ebx
50*638691a0SAndroid Build Coastguard Worker
51*638691a0SAndroid Build Coastguard Worker	# shift into first array index dimension slot
52*638691a0SAndroid Build Coastguard Worker	shll $5,%eax
53*638691a0SAndroid Build Coastguard Worker	shll $5,%ebx
54*638691a0SAndroid Build Coastguard Worker
55*638691a0SAndroid Build Coastguard Worker	# each invocation of this macro will do 8 butterflies in parallel
56*638691a0SAndroid Build Coastguard Worker	.MACRO butterfly GROUP
57*638691a0SAndroid Build Coastguard Worker	# Compute branch metrics
58*638691a0SAndroid Build Coastguard Worker	movq (Mettab27_1+8*\GROUP)(%eax),%mm3
59*638691a0SAndroid Build Coastguard Worker	movq fifteens,%mm0
60*638691a0SAndroid Build Coastguard Worker
61*638691a0SAndroid Build Coastguard Worker	paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3
62*638691a0SAndroid Build Coastguard Worker	paddb ones,%mm3  # emulate pavgb - this may not be necessary
63*638691a0SAndroid Build Coastguard Worker	psrlq $1,%mm3
64*638691a0SAndroid Build Coastguard Worker	pand %mm0,%mm3
65*638691a0SAndroid Build Coastguard Worker
66*638691a0SAndroid Build Coastguard Worker	movq (8*\GROUP)(%esi),%mm6	# Incoming path metric, high bit = 0
67*638691a0SAndroid Build Coastguard Worker	movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1
68*638691a0SAndroid Build Coastguard Worker	movq %mm6,%mm1
69*638691a0SAndroid Build Coastguard Worker	movq %mm2,%mm7
70*638691a0SAndroid Build Coastguard Worker
71*638691a0SAndroid Build Coastguard Worker	paddb %mm3,%mm6
72*638691a0SAndroid Build Coastguard Worker	paddb %mm3,%mm2
73*638691a0SAndroid Build Coastguard Worker	pxor  %mm0,%mm3		 # invert branch metric
74*638691a0SAndroid Build Coastguard Worker	paddb %mm3,%mm7		 # path metric for inverted symbols
75*638691a0SAndroid Build Coastguard Worker	paddb %mm3,%mm1
76*638691a0SAndroid Build Coastguard Worker
77*638691a0SAndroid Build Coastguard Worker	# live registers 1 2 6 7
78*638691a0SAndroid Build Coastguard Worker	# Compare mm6 and mm7;  mm1 and mm2
79*638691a0SAndroid Build Coastguard Worker	pxor %mm3,%mm3
80*638691a0SAndroid Build Coastguard Worker	movq %mm6,%mm4
81*638691a0SAndroid Build Coastguard Worker	movq %mm1,%mm5
82*638691a0SAndroid Build Coastguard Worker	psubb %mm7,%mm4		# mm4 = mm6 - mm7
83*638691a0SAndroid Build Coastguard Worker	psubb %mm2,%mm5		# mm5 = mm1 - mm2
84*638691a0SAndroid Build Coastguard Worker	pcmpgtb %mm3,%mm4	# mm4 = first set of decisions (ff = 1 better)
85*638691a0SAndroid Build Coastguard Worker	pcmpgtb %mm3,%mm5	# mm5 = second set of decisions
86*638691a0SAndroid Build Coastguard Worker
87*638691a0SAndroid Build Coastguard Worker	# live registers 1 2 4 5 6 7
88*638691a0SAndroid Build Coastguard Worker	# select survivors
89*638691a0SAndroid Build Coastguard Worker	movq %mm4,%mm0
90*638691a0SAndroid Build Coastguard Worker	pand %mm4,%mm7
91*638691a0SAndroid Build Coastguard Worker	movq %mm5,%mm3
92*638691a0SAndroid Build Coastguard Worker	pand %mm5,%mm2
93*638691a0SAndroid Build Coastguard Worker	pandn %mm6,%mm0
94*638691a0SAndroid Build Coastguard Worker	pandn %mm1,%mm3
95*638691a0SAndroid Build Coastguard Worker	por %mm0,%mm7		# mm7 = first set of survivors
96*638691a0SAndroid Build Coastguard Worker	por %mm3,%mm2		# mm2 = second set of survivors
97*638691a0SAndroid Build Coastguard Worker
98*638691a0SAndroid Build Coastguard Worker	# live registers 2 4 5 7
99*638691a0SAndroid Build Coastguard Worker	# interleave & store decisions in mm4, mm5
100*638691a0SAndroid Build Coastguard Worker	# interleave & store new branch metrics in mm2, mm7
101*638691a0SAndroid Build Coastguard Worker	movq %mm4,%mm3
102*638691a0SAndroid Build Coastguard Worker	movq %mm7,%mm0
103*638691a0SAndroid Build Coastguard Worker	punpckhbw %mm5,%mm4
104*638691a0SAndroid Build Coastguard Worker	punpcklbw %mm5,%mm3
105*638691a0SAndroid Build Coastguard Worker	punpcklbw %mm2,%mm7	# interleave second 8 new metrics
106*638691a0SAndroid Build Coastguard Worker	punpckhbw %mm2,%mm0	# interleave first 8 new metrics
107*638691a0SAndroid Build Coastguard Worker	movq %mm4,(16*\GROUP+8)(%edx)
108*638691a0SAndroid Build Coastguard Worker	movq %mm3,(16*\GROUP)(%edx)
109*638691a0SAndroid Build Coastguard Worker	movq %mm7,(16*\GROUP)(%edi)
110*638691a0SAndroid Build Coastguard Worker	movq %mm0,(16*\GROUP+8)(%edi)
111*638691a0SAndroid Build Coastguard Worker
112*638691a0SAndroid Build Coastguard Worker	.endm
113*638691a0SAndroid Build Coastguard Worker
114*638691a0SAndroid Build Coastguard Worker# invoke macro 4 times for a total of 32 butterflies
115*638691a0SAndroid Build Coastguard Worker	butterfly GROUP=0
116*638691a0SAndroid Build Coastguard Worker	butterfly GROUP=1
117*638691a0SAndroid Build Coastguard Worker	butterfly GROUP=2
118*638691a0SAndroid Build Coastguard Worker	butterfly GROUP=3
119*638691a0SAndroid Build Coastguard Worker
120*638691a0SAndroid Build Coastguard Worker	addl $64,%edx		# bump decision pointer
121*638691a0SAndroid Build Coastguard Worker
122*638691a0SAndroid Build Coastguard Worker	# swap metrics
123*638691a0SAndroid Build Coastguard Worker	movl %esi,%eax
124*638691a0SAndroid Build Coastguard Worker	movl %edi,%esi
125*638691a0SAndroid Build Coastguard Worker	movl %eax,%edi
126*638691a0SAndroid Build Coastguard Worker	jmp 1b
127*638691a0SAndroid Build Coastguard Worker
128*638691a0SAndroid Build Coastguard Worker2:	emms
129*638691a0SAndroid Build Coastguard Worker	movl 8(%ebp),%ebx	# ebx = vp
130*638691a0SAndroid Build Coastguard Worker	# stash metric pointers
131*638691a0SAndroid Build Coastguard Worker	movl %esi,OLDMETRICS(%ebx)
132*638691a0SAndroid Build Coastguard Worker	movl %edi,NEWMETRICS(%ebx)
133*638691a0SAndroid Build Coastguard Worker	movl %edx,DP(%ebx)	# stash incremented value of vp->dp
134*638691a0SAndroid Build Coastguard Worker	xorl %eax,%eax
135*638691a0SAndroid Build Coastguard Workererr:	popl %ebx
136*638691a0SAndroid Build Coastguard Worker	popl %edx
137*638691a0SAndroid Build Coastguard Worker	popl %edi
138*638691a0SAndroid Build Coastguard Worker	popl %esi
139*638691a0SAndroid Build Coastguard Worker	popl %ebp
140*638691a0SAndroid Build Coastguard Worker	ret
141*638691a0SAndroid Build Coastguard Worker
142*638691a0SAndroid Build Coastguard Worker	.data
143*638691a0SAndroid Build Coastguard Worker	.align 8
144*638691a0SAndroid Build Coastguard Workerfifteens:
145*638691a0SAndroid Build Coastguard Worker	.byte 15,15,15,15,15,15,15,15
146*638691a0SAndroid Build Coastguard Worker
147*638691a0SAndroid Build Coastguard Worker	.align 8
148*638691a0SAndroid Build Coastguard Workerones:	.byte 1,1,1,1,1,1,1,1
149