1*7c3d14c8STreehugger Robot// This file is dual licensed under the MIT and the University of Illinois Open 2*7c3d14c8STreehugger Robot// Source Licenses. See LICENSE.TXT for details. 3*7c3d14c8STreehugger Robot 4*7c3d14c8STreehugger Robot#include "../assembly.h" 5*7c3d14c8STreehugger Robot 6*7c3d14c8STreehugger Robot// di_int __divdi3(di_int a, di_int b); 7*7c3d14c8STreehugger Robot 8*7c3d14c8STreehugger Robot// result = a / b. 9*7c3d14c8STreehugger Robot// both inputs and the output are 64-bit signed integers. 10*7c3d14c8STreehugger Robot// This will do whatever the underlying hardware is set to do on division by zero. 11*7c3d14c8STreehugger Robot// No other exceptions are generated, as the divide cannot overflow. 12*7c3d14c8STreehugger Robot// 13*7c3d14c8STreehugger Robot// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 14*7c3d14c8STreehugger Robot// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 15*7c3d14c8STreehugger Robot// currently possible via simulation of integer divides on the x87 unit. 16*7c3d14c8STreehugger Robot// 17*7c3d14c8STreehugger Robot// Stephen Canon, December 2008 18*7c3d14c8STreehugger Robot 19*7c3d14c8STreehugger Robot#ifdef __i386__ 20*7c3d14c8STreehugger Robot 21*7c3d14c8STreehugger Robot.text 22*7c3d14c8STreehugger Robot.balign 4 23*7c3d14c8STreehugger RobotDEFINE_COMPILERRT_FUNCTION(__divdi3) 24*7c3d14c8STreehugger Robot 25*7c3d14c8STreehugger Robot/* This is currently implemented by wrapping the unsigned divide up in an absolute 26*7c3d14c8STreehugger Robot value, then restoring the correct sign at the end of the computation. This could 27*7c3d14c8STreehugger Robot certainly be improved upon. */ 28*7c3d14c8STreehugger Robot 29*7c3d14c8STreehugger Robot pushl %esi 30*7c3d14c8STreehugger Robot movl 20(%esp), %edx // high word of b 31*7c3d14c8STreehugger Robot movl 16(%esp), %eax // low word of b 32*7c3d14c8STreehugger Robot movl %edx, %ecx 33*7c3d14c8STreehugger Robot sarl $31, %ecx // (b < 0) ? -1 : 0 34*7c3d14c8STreehugger Robot xorl %ecx, %eax 35*7c3d14c8STreehugger Robot xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b 36*7c3d14c8STreehugger Robot subl %ecx, %eax 37*7c3d14c8STreehugger Robot sbbl %ecx, %edx // EDX:EAX = abs(b) 38*7c3d14c8STreehugger Robot movl %edx, 20(%esp) 39*7c3d14c8STreehugger Robot movl %eax, 16(%esp) // store abs(b) back to stack 40*7c3d14c8STreehugger Robot movl %ecx, %esi // set aside sign of b 41*7c3d14c8STreehugger Robot 42*7c3d14c8STreehugger Robot movl 12(%esp), %edx // high word of b 43*7c3d14c8STreehugger Robot movl 8(%esp), %eax // low word of b 44*7c3d14c8STreehugger Robot movl %edx, %ecx 45*7c3d14c8STreehugger Robot sarl $31, %ecx // (a < 0) ? -1 : 0 46*7c3d14c8STreehugger Robot xorl %ecx, %eax 47*7c3d14c8STreehugger Robot xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a 48*7c3d14c8STreehugger Robot subl %ecx, %eax 49*7c3d14c8STreehugger Robot sbbl %ecx, %edx // EDX:EAX = abs(a) 50*7c3d14c8STreehugger Robot movl %edx, 12(%esp) 51*7c3d14c8STreehugger Robot movl %eax, 8(%esp) // store abs(a) back to stack 52*7c3d14c8STreehugger Robot xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b) 53*7c3d14c8STreehugger Robot 54*7c3d14c8STreehugger Robot pushl %ebx 55*7c3d14c8STreehugger Robot movl 24(%esp), %ebx // Find the index i of the leading bit in b. 56*7c3d14c8STreehugger Robot bsrl %ebx, %ecx // If the high word of b is zero, jump to 57*7c3d14c8STreehugger Robot jz 9f // the code to handle that special case [9]. 58*7c3d14c8STreehugger Robot 59*7c3d14c8STreehugger Robot /* High word of b is known to be non-zero on this branch */ 60*7c3d14c8STreehugger Robot 61*7c3d14c8STreehugger Robot movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 62*7c3d14c8STreehugger Robot 63*7c3d14c8STreehugger Robot shrl %cl, %eax // Practically, this means that bhi is given by: 64*7c3d14c8STreehugger Robot shrl %eax // 65*7c3d14c8STreehugger Robot notl %ecx // bhi = (high word of b) << (31 - i) | 66*7c3d14c8STreehugger Robot shll %cl, %ebx // (low word of b) >> (1 + i) 67*7c3d14c8STreehugger Robot orl %eax, %ebx // 68*7c3d14c8STreehugger Robot movl 16(%esp), %edx // Load the high and low words of a, and jump 69*7c3d14c8STreehugger Robot movl 12(%esp), %eax // to [1] if the high word is larger than bhi 70*7c3d14c8STreehugger Robot cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 71*7c3d14c8STreehugger Robot jae 1f 72*7c3d14c8STreehugger Robot 73*7c3d14c8STreehugger Robot /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 74*7c3d14c8STreehugger Robot 75*7c3d14c8STreehugger Robot divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 76*7c3d14c8STreehugger Robot 77*7c3d14c8STreehugger Robot pushl %edi 78*7c3d14c8STreehugger Robot notl %ecx 79*7c3d14c8STreehugger Robot shrl %eax 80*7c3d14c8STreehugger Robot shrl %cl, %eax // q = qs >> (1 + i) 81*7c3d14c8STreehugger Robot movl %eax, %edi 82*7c3d14c8STreehugger Robot mull 24(%esp) // q*blo 83*7c3d14c8STreehugger Robot movl 16(%esp), %ebx 84*7c3d14c8STreehugger Robot movl 20(%esp), %ecx // ECX:EBX = a 85*7c3d14c8STreehugger Robot subl %eax, %ebx 86*7c3d14c8STreehugger Robot sbbl %edx, %ecx // ECX:EBX = a - q*blo 87*7c3d14c8STreehugger Robot movl 28(%esp), %eax 88*7c3d14c8STreehugger Robot imull %edi, %eax // q*bhi 89*7c3d14c8STreehugger Robot subl %eax, %ecx // ECX:EBX = a - q*b 90*7c3d14c8STreehugger Robot sbbl $0, %edi // decrement q if remainder is negative 91*7c3d14c8STreehugger Robot xorl %edx, %edx 92*7c3d14c8STreehugger Robot movl %edi, %eax 93*7c3d14c8STreehugger Robot 94*7c3d14c8STreehugger Robot addl %esi, %eax // Restore correct sign to result 95*7c3d14c8STreehugger Robot adcl %esi, %edx 96*7c3d14c8STreehugger Robot xorl %esi, %eax 97*7c3d14c8STreehugger Robot xorl %esi, %edx 98*7c3d14c8STreehugger Robot popl %edi // Restore callee-save registers 99*7c3d14c8STreehugger Robot popl %ebx 100*7c3d14c8STreehugger Robot popl %esi 101*7c3d14c8STreehugger Robot retl // Return 102*7c3d14c8STreehugger Robot 103*7c3d14c8STreehugger Robot 104*7c3d14c8STreehugger Robot1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 105*7c3d14c8STreehugger Robot 106*7c3d14c8STreehugger Robot subl %ebx, %edx // subtract bhi from ahi so that divide will not 107*7c3d14c8STreehugger Robot divl %ebx // overflow, and find q and r such that 108*7c3d14c8STreehugger Robot // 109*7c3d14c8STreehugger Robot // ahi:alo = (1:q)*bhi + r 110*7c3d14c8STreehugger Robot // 111*7c3d14c8STreehugger Robot // Note that q is a number in (31-i).(1+i) 112*7c3d14c8STreehugger Robot // fix point. 113*7c3d14c8STreehugger Robot 114*7c3d14c8STreehugger Robot pushl %edi 115*7c3d14c8STreehugger Robot notl %ecx 116*7c3d14c8STreehugger Robot shrl %eax 117*7c3d14c8STreehugger Robot orl $0x80000000, %eax 118*7c3d14c8STreehugger Robot shrl %cl, %eax // q = (1:qs) >> (1 + i) 119*7c3d14c8STreehugger Robot movl %eax, %edi 120*7c3d14c8STreehugger Robot mull 24(%esp) // q*blo 121*7c3d14c8STreehugger Robot movl 16(%esp), %ebx 122*7c3d14c8STreehugger Robot movl 20(%esp), %ecx // ECX:EBX = a 123*7c3d14c8STreehugger Robot subl %eax, %ebx 124*7c3d14c8STreehugger Robot sbbl %edx, %ecx // ECX:EBX = a - q*blo 125*7c3d14c8STreehugger Robot movl 28(%esp), %eax 126*7c3d14c8STreehugger Robot imull %edi, %eax // q*bhi 127*7c3d14c8STreehugger Robot subl %eax, %ecx // ECX:EBX = a - q*b 128*7c3d14c8STreehugger Robot sbbl $0, %edi // decrement q if remainder is negative 129*7c3d14c8STreehugger Robot xorl %edx, %edx 130*7c3d14c8STreehugger Robot movl %edi, %eax 131*7c3d14c8STreehugger Robot 132*7c3d14c8STreehugger Robot addl %esi, %eax // Restore correct sign to result 133*7c3d14c8STreehugger Robot adcl %esi, %edx 134*7c3d14c8STreehugger Robot xorl %esi, %eax 135*7c3d14c8STreehugger Robot xorl %esi, %edx 136*7c3d14c8STreehugger Robot popl %edi // Restore callee-save registers 137*7c3d14c8STreehugger Robot popl %ebx 138*7c3d14c8STreehugger Robot popl %esi 139*7c3d14c8STreehugger Robot retl // Return 140*7c3d14c8STreehugger Robot 141*7c3d14c8STreehugger Robot 142*7c3d14c8STreehugger Robot9: /* High word of b is zero on this branch */ 143*7c3d14c8STreehugger Robot 144*7c3d14c8STreehugger Robot movl 16(%esp), %eax // Find qhi and rhi such that 145*7c3d14c8STreehugger Robot movl 20(%esp), %ecx // 146*7c3d14c8STreehugger Robot xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b 147*7c3d14c8STreehugger Robot divl %ecx // 148*7c3d14c8STreehugger Robot movl %eax, %ebx // 149*7c3d14c8STreehugger Robot movl 12(%esp), %eax // Find qlo such that 150*7c3d14c8STreehugger Robot divl %ecx // 151*7c3d14c8STreehugger Robot movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b 152*7c3d14c8STreehugger Robot 153*7c3d14c8STreehugger Robot addl %esi, %eax // Restore correct sign to result 154*7c3d14c8STreehugger Robot adcl %esi, %edx 155*7c3d14c8STreehugger Robot xorl %esi, %eax 156*7c3d14c8STreehugger Robot xorl %esi, %edx 157*7c3d14c8STreehugger Robot popl %ebx // Restore callee-save registers 158*7c3d14c8STreehugger Robot popl %esi 159*7c3d14c8STreehugger Robot retl // Return 160*7c3d14c8STreehugger RobotEND_COMPILERRT_FUNCTION(__divdi3) 161*7c3d14c8STreehugger Robot 162*7c3d14c8STreehugger Robot#endif // __i386__ 163*7c3d14c8STreehugger Robot 164*7c3d14c8STreehugger RobotNO_EXEC_STACK_DIRECTIVE 165*7c3d14c8STreehugger Robot 166