ilsp.S - OpenGrok cross reference for /linux-6.14.4/arch/m68k/ifpsp060/src/ilsp.S

Lines Matching +full:compare +full:- +full:and +full:- +full:swap
3 M68000 Hi-Performance Microprocessor Division
5 Production Release P1.00 -- October 10, 1994
9 THE SOFTWARE is provided on an "AS IS" basis and without warranty.
13 and any warranty against infringement with regard to the SOFTWARE  label
14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
21 Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
23 You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24 so long as this entire notice is retained without alteration in any modified and/or
25 redistributed versions, and that such modified versions are clearly identified as such.
31 # and contains the entry points into the package. The user, in
63 #	_060LSP__idivu64_(): Emulate 64-bit unsigned div instruction.	#
64 #	_060LSP__idivs64_(): Emulate 64-bit signed div instruction.	#
67 #	and therefore does not work exactly like the 680X0 div{s,u}.l	#
68 #	64-bit divide instruction.					#
84 #	If the operands are signed, make them unsigned and save the	#
85 # sign info for later. Separate out special cases like divide-by-zero	#
86 # or 32-bit divides if possible. Else, use a special math algorithm	#
90 # zero, then perform a divide-by-zero using a 16-bit implemented	#
96 set	POSNEG,		-1
97 set	NDIVISOR,	-2
98 set	NDIVIDEND,	-3
99 set	DDSECOND,	-4
100 set	DDNORMAL,	-8
101 set	DDQUOTIENT,	-12
102 set	DIV64_CC,	-16
110 	link.w		%a6,&-16
111 	movm.l		&0x3f00,-(%sp)		# save d2-d7
112 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
125 	link.w		%a6,&-16
126 	movm.l		&0x3f00,-(%sp)		# save d2-d7
127 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
141 # separate signed and unsigned divide
164 #	- is (dividend == 0) ?
165 #	- is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
186 #	- is hi(dividend) >= divisor ? if yes, then overflow
193 # separate into signed and unsigned finishes.
207 # 0x80000000 is the largest number representable as a 32-bit negative
209 	cmpi.l		%d6, &0x80000000	# will (-quot) fit in 32 bits?
212 	neg.l		%d6			# make (-quot) 2's comp
227 # here, the result is in d1 and d0. the current strategy is to save
235 	movm.l		(%sp)+,&0x00fc		# restore d2-d7
260 	movm.l		(%sp)+,&0x00fc		# restore d2-d7
271 # For this implementation b=2**16, and the target is U1U2U3U4/V1V2,	#
272 # where U,V are words of the quadword dividend and longword divisor,	#
273 # and U1, V1 are the most significant words.				#
276 # in %d6. The divisor must be in the variable ddivisor, and the		#
289 # Since the divisor is only a word (and larger than the mslw of the dividend),
295 # longword of the dividend as (0) remainder (see Knuth) and merely complete
296 # the last two divisions to get a quotient longword and word remainder:
299 	swap		%d5			# same as r*b if previous step rqd
300 	swap		%d6			# get u3 to lsw position
306 	swap		%d6			# get u4
311 	swap		%d1
314 	swap		%d5			# now remainder
315 	mov.l		%d1, %d6		# and quotient
322 # digit (word). After subtraction, the dividend is shifted and the
323 # process repeated. Before beginning, the divisor and quotient are
344 	swap		%d2
345 	swap		%d3
358 # now test the trial quotient and adjust. This step plus the
361 	mov.l		%d6, -(%sp)
363 	swap		%d6			# in lsw position
367 	swap		%d3
370 	sub.l		%d3, %d4		# U1U2 - V1q
372 	swap		%d4
380 #	add.l		%d6, %d4		# (U1U2 - V1q) + U3
383 	bls.b		lddadjd1		# is V2q > (U1U2-V1q) + U3 ?
384 	subq.l		&0x1, %d1		# yes, decrement and recheck
387 # now test the word by multiplying it by the divisor (V1V2) and comparing
389 	mov.l		%d5, -(%sp)		# save %d5 (%d6 already saved)
391 	swap		%d6			# shift answer to ms 3 words
403 # - according to Knuth, this is done only 2 out of 65536 times for random
407 	swap		%d3
413 	swap		%d3			# aligned with 2nd word of dividend
418 # first quotient digit now correct. store digit and shift the
422 	swap		%d5
423 	swap		%d6
433 	swap		%d6
434 	swap		%d5
448 # factors for the 32X32->64 multiplication are in %d5 and %d6.
456 	swap		%d3
457 	swap		%d4
458 	mulu.w		%d5, %d6		# %d6 <- lsw*lsw
459 	mulu.w		%d3, %d5		# %d5 <- msw-dest*lsw-source
460 	mulu.w		%d4, %d2		# %d2 <- msw-source*lsw-dest
461 	mulu.w		%d4, %d3		# %d3 <- msw*msw
462 # now use swap and addx to consolidate to two longwords
464 	swap		%d6
469 	swap		%d6			# %d6 is low 32 bits of final product
472 	swap		%d5			# now use msws of longwords
473 	swap		%d2
480 #	_060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction	#
481 #	_060LSP__imuls64_(): Emulate 64-bit signed mul instruction.	#
484 #	and therefore does not work exactly like the 680X0 mul{s,u}.l	#
485 #	64-bit multiply instruction.					#
493 #	0xc(sp) = pointer to location to place 64-bit result		#
496 #	0xc(sp) = points to location of 64-bit result			#
499 #	Perform the multiply in pieces using 16x16->32 unsigned		#
500 # multiplies and "add" instructions.					#
506 set MUL64_CC, -4
512 	link.w		%a6,&-4
513 	movm.l		&0x3800,-(%sp)		# save d2-d4
514 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
527 #	----------------------------					#
529 #	----------------------------					#
530 #		     -----------------------------			#
532 #		     -----------------------------			#
533 #		     -----------------------------			#
535 #		     -----------------------------			#
536 #	  |			   -----------------------------	#
537 #	--|--			   | lo(mplier) * lo(mplicand) |	#
538 #	  |			   -----------------------------	#
540 #	--------------------------------------------------------	#
542 #	--------------------------------------------------------	#
549 	swap		%d3			# hi(mr) in lo d3
550 	swap		%d4			# hi(md) in lo d4
562 	swap		%d0			# hi([1]) <==> lo([1])
567 	swap		%d0			# lo([1]) <==> hi([1])
570 # now, clear lo, put hi in lo reg, and add to [4]
573 	swap		%d1			# hi([2]) in lo d1
574 	swap		%d2			# hi([3]) in lo d2
588 # here, the result is in d1 and d0. the current strategy is to save
597 	movm.l		(%sp)+,&0x001c		# restore d2-d4
604 # save the zero result to the register file and set the 'Z' ccode bit.
623 	link.w		%a6,&-4
624 	movm.l		&0x3c00,-(%sp)		# save d2-d5
625 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
653 #	----------------------------					#
655 #	----------------------------					#
656 #		     -----------------------------			#
658 #		     -----------------------------			#
659 #		     -----------------------------			#
661 #		     -----------------------------			#
662 #	  |			   -----------------------------	#
663 #	--|--			   | lo(mplier) * lo(mplicand) |	#
664 #	  |			   -----------------------------	#
666 #	--------------------------------------------------------	#
668 #	--------------------------------------------------------	#
675 	swap		%d3			# hi(mr) in lo d3
676 	swap		%d4			# hi(md) in lo d4
688 	swap		%d0			# hi([1]) <==> lo([1])
693 	swap		%d0			# lo([1]) <==> hi([1])
696 # now, clear lo, put hi in lo reg, and add to [4]
699 	swap		%d1			# hi([2]) in lo d1
700 	swap		%d2			# hi([3]) in lo d2
709 #   -negate all bits and add 1
725 # here, the result is in d1 and d0. the current strategy is to save
734 	movm.l		(%sp)+,&0x003c		# restore d2-d5
741 # save the zero result to the register file and set the 'Z' ccode bit.
763 #	and therefore does not work exactly like the 680X0 "cmp2"	#
786 set	CMP2_CC,	-4
792 	link.w		%a6,&-4
793 	movm.l		&0x3800,-(%sp)		# save d2-d4
794 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
805 	bra.w		l_cmp2_cmp		# go do the compare emulation
811 	link.w		%a6,&-4
812 	movm.l		&0x3800,-(%sp)		# save d2-d4
813 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
824 	bra.w		l_cmp2_cmp		# go do the compare emulation
830 	link.w		%a6,&-4
831 	movm.l		&0x3800,-(%sp)		# save d2-d4
832 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
840 	bra.w		l_cmp2_cmp		# go do the compare emulation
846 	link.w		%a6,&-4
847 	movm.l		&0x3800,-(%sp)		# save d2-d4
848 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
860 # operation is a data register compare.
863 	bra.w		l_cmp2_cmp		# go do the compare emulation
869 	link.w		%a6,&-4
870 	movm.l		&0x3800,-(%sp)		# save d2-d4
871 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
883 # operation is a data register compare.
886 	bra.w		l_cmp2_cmp		# go emulate compare
892 	link.w		%a6,&-4
893 	movm.l		&0x3800,-(%sp)		# save d2-d4
894 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
905 #	(1) save 'Z' bit from (Rn - lo)
906 #	(2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
907 #	(3) keep 'X', 'N', and 'V' from before instruction
911 	sub.l		%d0, %d2		# (Rn - lo)
914 	sub.l		%d0, %d1		# (hi - lo)
915 	cmp.l		%d1,%d2			# ((hi - lo) - (Rn - hi))
919 	andi.b		&0x5, %d3		# keep 'Z' and 'N'
928 	movm.l		(%sp)+,&0x001c		# restore d2-d4