1*7c3d14c8STreehugger Robot// This file is dual licensed under the MIT and the University of Illinois Open 2*7c3d14c8STreehugger Robot// Source Licenses. See LICENSE.TXT for details. 3*7c3d14c8STreehugger Robot 4*7c3d14c8STreehugger Robot#include "../assembly.h" 5*7c3d14c8STreehugger Robot 6*7c3d14c8STreehugger Robot// du_int __udivdi3(du_int a, du_int b); 7*7c3d14c8STreehugger Robot 8*7c3d14c8STreehugger Robot// result = a / b. 9*7c3d14c8STreehugger Robot// both inputs and the output are 64-bit unsigned integers. 10*7c3d14c8STreehugger Robot// This will do whatever the underlying hardware is set to do on division by zero. 11*7c3d14c8STreehugger Robot// No other exceptions are generated, as the divide cannot overflow. 12*7c3d14c8STreehugger Robot// 13*7c3d14c8STreehugger Robot// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 14*7c3d14c8STreehugger Robot// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 15*7c3d14c8STreehugger Robot// currently possible via simulation of integer divides on the x87 unit. 16*7c3d14c8STreehugger Robot// 17*7c3d14c8STreehugger Robot// Stephen Canon, December 2008 18*7c3d14c8STreehugger Robot 19*7c3d14c8STreehugger Robot#ifdef __i386__ 20*7c3d14c8STreehugger Robot 21*7c3d14c8STreehugger Robot.text 22*7c3d14c8STreehugger Robot.balign 4 23*7c3d14c8STreehugger RobotDEFINE_COMPILERRT_FUNCTION(__udivdi3) 24*7c3d14c8STreehugger Robot 25*7c3d14c8STreehugger Robot pushl %ebx 26*7c3d14c8STreehugger Robot movl 20(%esp), %ebx // Find the index i of the leading bit in b. 27*7c3d14c8STreehugger Robot bsrl %ebx, %ecx // If the high word of b is zero, jump to 28*7c3d14c8STreehugger Robot jz 9f // the code to handle that special case [9]. 29*7c3d14c8STreehugger Robot 30*7c3d14c8STreehugger Robot /* High word of b is known to be non-zero on this branch */ 31*7c3d14c8STreehugger Robot 32*7c3d14c8STreehugger Robot movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 33*7c3d14c8STreehugger Robot 34*7c3d14c8STreehugger Robot shrl %cl, %eax // Practically, this means that bhi is given by: 35*7c3d14c8STreehugger Robot shrl %eax // 36*7c3d14c8STreehugger Robot notl %ecx // bhi = (high word of b) << (31 - i) | 37*7c3d14c8STreehugger Robot shll %cl, %ebx // (low word of b) >> (1 + i) 38*7c3d14c8STreehugger Robot orl %eax, %ebx // 39*7c3d14c8STreehugger Robot movl 12(%esp), %edx // Load the high and low words of a, and jump 40*7c3d14c8STreehugger Robot movl 8(%esp), %eax // to [1] if the high word is larger than bhi 41*7c3d14c8STreehugger Robot cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 42*7c3d14c8STreehugger Robot jae 1f 43*7c3d14c8STreehugger Robot 44*7c3d14c8STreehugger Robot /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 45*7c3d14c8STreehugger Robot 46*7c3d14c8STreehugger Robot divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 47*7c3d14c8STreehugger Robot 48*7c3d14c8STreehugger Robot pushl %edi 49*7c3d14c8STreehugger Robot notl %ecx 50*7c3d14c8STreehugger Robot shrl %eax 51*7c3d14c8STreehugger Robot shrl %cl, %eax // q = qs >> (1 + i) 52*7c3d14c8STreehugger Robot movl %eax, %edi 53*7c3d14c8STreehugger Robot mull 20(%esp) // q*blo 54*7c3d14c8STreehugger Robot movl 12(%esp), %ebx 55*7c3d14c8STreehugger Robot movl 16(%esp), %ecx // ECX:EBX = a 56*7c3d14c8STreehugger Robot subl %eax, %ebx 57*7c3d14c8STreehugger Robot sbbl %edx, %ecx // ECX:EBX = a - q*blo 58*7c3d14c8STreehugger Robot movl 24(%esp), %eax 59*7c3d14c8STreehugger Robot imull %edi, %eax // q*bhi 60*7c3d14c8STreehugger Robot subl %eax, %ecx // ECX:EBX = a - q*b 61*7c3d14c8STreehugger Robot sbbl $0, %edi // decrement q if remainder is negative 62*7c3d14c8STreehugger Robot xorl %edx, %edx 63*7c3d14c8STreehugger Robot movl %edi, %eax 64*7c3d14c8STreehugger Robot popl %edi 65*7c3d14c8STreehugger Robot popl %ebx 66*7c3d14c8STreehugger Robot retl 67*7c3d14c8STreehugger Robot 68*7c3d14c8STreehugger Robot 69*7c3d14c8STreehugger Robot1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 70*7c3d14c8STreehugger Robot 71*7c3d14c8STreehugger Robot subl %ebx, %edx // subtract bhi from ahi so that divide will not 72*7c3d14c8STreehugger Robot divl %ebx // overflow, and find q and r such that 73*7c3d14c8STreehugger Robot // 74*7c3d14c8STreehugger Robot // ahi:alo = (1:q)*bhi + r 75*7c3d14c8STreehugger Robot // 76*7c3d14c8STreehugger Robot // Note that q is a number in (31-i).(1+i) 77*7c3d14c8STreehugger Robot // fix point. 78*7c3d14c8STreehugger Robot 79*7c3d14c8STreehugger Robot pushl %edi 80*7c3d14c8STreehugger Robot notl %ecx 81*7c3d14c8STreehugger Robot shrl %eax 82*7c3d14c8STreehugger Robot orl $0x80000000, %eax 83*7c3d14c8STreehugger Robot shrl %cl, %eax // q = (1:qs) >> (1 + i) 84*7c3d14c8STreehugger Robot movl %eax, %edi 85*7c3d14c8STreehugger Robot mull 20(%esp) // q*blo 86*7c3d14c8STreehugger Robot movl 12(%esp), %ebx 87*7c3d14c8STreehugger Robot movl 16(%esp), %ecx // ECX:EBX = a 88*7c3d14c8STreehugger Robot subl %eax, %ebx 89*7c3d14c8STreehugger Robot sbbl %edx, %ecx // ECX:EBX = a - q*blo 90*7c3d14c8STreehugger Robot movl 24(%esp), %eax 91*7c3d14c8STreehugger Robot imull %edi, %eax // q*bhi 92*7c3d14c8STreehugger Robot subl %eax, %ecx // ECX:EBX = a - q*b 93*7c3d14c8STreehugger Robot sbbl $0, %edi // decrement q if remainder is negative 94*7c3d14c8STreehugger Robot xorl %edx, %edx 95*7c3d14c8STreehugger Robot movl %edi, %eax 96*7c3d14c8STreehugger Robot popl %edi 97*7c3d14c8STreehugger Robot popl %ebx 98*7c3d14c8STreehugger Robot retl 99*7c3d14c8STreehugger Robot 100*7c3d14c8STreehugger Robot 101*7c3d14c8STreehugger Robot9: /* High word of b is zero on this branch */ 102*7c3d14c8STreehugger Robot 103*7c3d14c8STreehugger Robot movl 12(%esp), %eax // Find qhi and rhi such that 104*7c3d14c8STreehugger Robot movl 16(%esp), %ecx // 105*7c3d14c8STreehugger Robot xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b 106*7c3d14c8STreehugger Robot divl %ecx // 107*7c3d14c8STreehugger Robot movl %eax, %ebx // 108*7c3d14c8STreehugger Robot movl 8(%esp), %eax // Find qlo such that 109*7c3d14c8STreehugger Robot divl %ecx // 110*7c3d14c8STreehugger Robot movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b 111*7c3d14c8STreehugger Robot popl %ebx // 112*7c3d14c8STreehugger Robot retl // and return qhi:qlo 113*7c3d14c8STreehugger RobotEND_COMPILERRT_FUNCTION(__udivdi3) 114*7c3d14c8STreehugger Robot 115*7c3d14c8STreehugger Robot#endif // __i386__ 116*7c3d14c8STreehugger Robot 117*7c3d14c8STreehugger RobotNO_EXEC_STACK_DIRECTIVE 118*7c3d14c8STreehugger Robot 119