Lines Matching +full:compare +full:- +full:and +full:- +full:swap

3 M68000 Hi-Performance Microprocessor Division
5 Production Release P1.00 -- October 10, 1994
9 THE SOFTWARE is provided on an "AS IS" basis and without warranty.
13 and any warranty against infringement with regard to the SOFTWARE label
14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
21 Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
23 You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24 so long as this entire notice is retained without alteration in any modified and/or
25 redistributed versions, and that such modified versions are clearly identified as such.
31 # and contains the entry points into the package. The user, in
63 # _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. #
64 # _060LSP__idivs64_(): Emulate 64-bit signed div instruction. #
67 # and therefore does not work exactly like the 680X0 div{s,u}.l #
68 # 64-bit divide instruction. #
84 # If the operands are signed, make them unsigned and save the #
85 # sign info for later. Separate out special cases like divide-by-zero #
86 # or 32-bit divides if possible. Else, use a special math algorithm #
90 # zero, then perform a divide-by-zero using a 16-bit implemented #
96 set POSNEG, -1
97 set NDIVISOR, -2
98 set NDIVIDEND, -3
99 set DDSECOND, -4
100 set DDNORMAL, -8
101 set DDQUOTIENT, -12
102 set DIV64_CC, -16
110 link.w %a6,&-16
111 movm.l &0x3f00,-(%sp) # save d2-d7
112 # fmovm.l &0x0,-(%sp) # save no fpregs
125 link.w %a6,&-16
126 movm.l &0x3f00,-(%sp) # save d2-d7
127 # fmovm.l &0x0,-(%sp) # save no fpregs
141 # separate signed and unsigned divide
164 # - is (dividend == 0) ?
165 # - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
186 # - is hi(dividend) >= divisor ? if yes, then overflow
193 # separate into signed and unsigned finishes.
207 # 0x80000000 is the largest number representable as a 32-bit negative
209 cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits?
212 neg.l %d6 # make (-quot) 2's comp
227 # here, the result is in d1 and d0. the current strategy is to save
235 movm.l (%sp)+,&0x00fc # restore d2-d7
260 movm.l (%sp)+,&0x00fc # restore d2-d7
271 # For this implementation b=2**16, and the target is U1U2U3U4/V1V2, #
272 # where U,V are words of the quadword dividend and longword divisor, #
273 # and U1, V1 are the most significant words. #
276 # in %d6. The divisor must be in the variable ddivisor, and the #
289 # Since the divisor is only a word (and larger than the mslw of the dividend),
295 # longword of the dividend as (0) remainder (see Knuth) and merely complete
296 # the last two divisions to get a quotient longword and word remainder:
299 swap %d5 # same as r*b if previous step rqd
300 swap %d6 # get u3 to lsw position
306 swap %d6 # get u4
311 swap %d1
314 swap %d5 # now remainder
315 mov.l %d1, %d6 # and quotient
322 # digit (word). After subtraction, the dividend is shifted and the
323 # process repeated. Before beginning, the divisor and quotient are
344 swap %d2
345 swap %d3
358 # now test the trial quotient and adjust. This step plus the
361 mov.l %d6, -(%sp)
363 swap %d6 # in lsw position
367 swap %d3
370 sub.l %d3, %d4 # U1U2 - V1q
372 swap %d4
380 # add.l %d6, %d4 # (U1U2 - V1q) + U3
383 bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ?
384 subq.l &0x1, %d1 # yes, decrement and recheck
387 # now test the word by multiplying it by the divisor (V1V2) and comparing
389 mov.l %d5, -(%sp) # save %d5 (%d6 already saved)
391 swap %d6 # shift answer to ms 3 words
403 # - according to Knuth, this is done only 2 out of 65536 times for random
407 swap %d3
413 swap %d3 # aligned with 2nd word of dividend
418 # first quotient digit now correct. store digit and shift the
422 swap %d5
423 swap %d6
433 swap %d6
434 swap %d5
448 # factors for the 32X32->64 multiplication are in %d5 and %d6.
456 swap %d3
457 swap %d4
458 mulu.w %d5, %d6 # %d6 <- lsw*lsw
459 mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source
460 mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest
461 mulu.w %d4, %d3 # %d3 <- msw*msw
462 # now use swap and addx to consolidate to two longwords
464 swap %d6
469 swap %d6 # %d6 is low 32 bits of final product
472 swap %d5 # now use msws of longwords
473 swap %d2
480 # _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction #
481 # _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. #
484 # and therefore does not work exactly like the 680X0 mul{s,u}.l #
485 # 64-bit multiply instruction. #
493 # 0xc(sp) = pointer to location to place 64-bit result #
496 # 0xc(sp) = points to location of 64-bit result #
499 # Perform the multiply in pieces using 16x16->32 unsigned #
500 # multiplies and "add" instructions. #
506 set MUL64_CC, -4
512 link.w %a6,&-4
513 movm.l &0x3800,-(%sp) # save d2-d4
514 # fmovm.l &0x0,-(%sp) # save no fpregs
527 # ---------------------------- #
529 # ---------------------------- #
530 # ----------------------------- #
532 # ----------------------------- #
533 # ----------------------------- #
535 # ----------------------------- #
536 # | ----------------------------- #
537 # --|-- | lo(mplier) * lo(mplicand) | #
538 # | ----------------------------- #
540 # -------------------------------------------------------- #
542 # -------------------------------------------------------- #
549 swap %d3 # hi(mr) in lo d3
550 swap %d4 # hi(md) in lo d4
562 swap %d0 # hi([1]) <==> lo([1])
567 swap %d0 # lo([1]) <==> hi([1])
570 # now, clear lo, put hi in lo reg, and add to [4]
573 swap %d1 # hi([2]) in lo d1
574 swap %d2 # hi([3]) in lo d2
588 # here, the result is in d1 and d0. the current strategy is to save
597 movm.l (%sp)+,&0x001c # restore d2-d4
604 # save the zero result to the register file and set the 'Z' ccode bit.
623 link.w %a6,&-4
624 movm.l &0x3c00,-(%sp) # save d2-d5
625 # fmovm.l &0x0,-(%sp) # save no fpregs
653 # ---------------------------- #
655 # ---------------------------- #
656 # ----------------------------- #
658 # ----------------------------- #
659 # ----------------------------- #
661 # ----------------------------- #
662 # | ----------------------------- #
663 # --|-- | lo(mplier) * lo(mplicand) | #
664 # | ----------------------------- #
666 # -------------------------------------------------------- #
668 # -------------------------------------------------------- #
675 swap %d3 # hi(mr) in lo d3
676 swap %d4 # hi(md) in lo d4
688 swap %d0 # hi([1]) <==> lo([1])
693 swap %d0 # lo([1]) <==> hi([1])
696 # now, clear lo, put hi in lo reg, and add to [4]
699 swap %d1 # hi([2]) in lo d1
700 swap %d2 # hi([3]) in lo d2
709 # -negate all bits and add 1
725 # here, the result is in d1 and d0. the current strategy is to save
734 movm.l (%sp)+,&0x003c # restore d2-d5
741 # save the zero result to the register file and set the 'Z' ccode bit.
763 # and therefore does not work exactly like the 680X0 "cmp2" #
786 set CMP2_CC, -4
792 link.w %a6,&-4
793 movm.l &0x3800,-(%sp) # save d2-d4
794 # fmovm.l &0x0,-(%sp) # save no fpregs
805 bra.w l_cmp2_cmp # go do the compare emulation
811 link.w %a6,&-4
812 movm.l &0x3800,-(%sp) # save d2-d4
813 # fmovm.l &0x0,-(%sp) # save no fpregs
824 bra.w l_cmp2_cmp # go do the compare emulation
830 link.w %a6,&-4
831 movm.l &0x3800,-(%sp) # save d2-d4
832 # fmovm.l &0x0,-(%sp) # save no fpregs
840 bra.w l_cmp2_cmp # go do the compare emulation
846 link.w %a6,&-4
847 movm.l &0x3800,-(%sp) # save d2-d4
848 # fmovm.l &0x0,-(%sp) # save no fpregs
860 # operation is a data register compare.
863 bra.w l_cmp2_cmp # go do the compare emulation
869 link.w %a6,&-4
870 movm.l &0x3800,-(%sp) # save d2-d4
871 # fmovm.l &0x0,-(%sp) # save no fpregs
883 # operation is a data register compare.
886 bra.w l_cmp2_cmp # go emulate compare
892 link.w %a6,&-4
893 movm.l &0x3800,-(%sp) # save d2-d4
894 # fmovm.l &0x0,-(%sp) # save no fpregs
905 # (1) save 'Z' bit from (Rn - lo)
906 # (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
907 # (3) keep 'X', 'N', and 'V' from before instruction
911 sub.l %d0, %d2 # (Rn - lo)
914 sub.l %d0, %d1 # (hi - lo)
915 cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi))
919 andi.b &0x5, %d3 # keep 'Z' and 'N'
928 movm.l (%sp)+,&0x001c # restore d2-d4