1*7c3d14c8STreehugger Robot// This file is dual licensed under the MIT and the University of Illinois Open 2*7c3d14c8STreehugger Robot// Source Licenses. See LICENSE.TXT for details. 3*7c3d14c8STreehugger Robot 4*7c3d14c8STreehugger Robot#include "../assembly.h" 5*7c3d14c8STreehugger Robot 6*7c3d14c8STreehugger Robot// du_int __umoddi3(du_int a, du_int b); 7*7c3d14c8STreehugger Robot 8*7c3d14c8STreehugger Robot// result = remainder of a / b. 9*7c3d14c8STreehugger Robot// both inputs and the output are 64-bit unsigned integers. 10*7c3d14c8STreehugger Robot// This will do whatever the underlying hardware is set to do on division by zero. 11*7c3d14c8STreehugger Robot// No other exceptions are generated, as the divide cannot overflow. 12*7c3d14c8STreehugger Robot// 13*7c3d14c8STreehugger Robot// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 14*7c3d14c8STreehugger Robot// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 15*7c3d14c8STreehugger Robot// currently possible via simulation of integer divides on the x87 unit. 16*7c3d14c8STreehugger Robot// 17*7c3d14c8STreehugger Robot 18*7c3d14c8STreehugger Robot// Stephen Canon, December 2008 19*7c3d14c8STreehugger Robot 20*7c3d14c8STreehugger Robot#ifdef __i386__ 21*7c3d14c8STreehugger Robot 22*7c3d14c8STreehugger Robot.text 23*7c3d14c8STreehugger Robot.balign 4 24*7c3d14c8STreehugger RobotDEFINE_COMPILERRT_FUNCTION(__umoddi3) 25*7c3d14c8STreehugger Robot 26*7c3d14c8STreehugger Robot pushl %ebx 27*7c3d14c8STreehugger Robot movl 20(%esp), %ebx // Find the index i of the leading bit in b. 28*7c3d14c8STreehugger Robot bsrl %ebx, %ecx // If the high word of b is zero, jump to 29*7c3d14c8STreehugger Robot jz 9f // the code to handle that special case [9]. 30*7c3d14c8STreehugger Robot 31*7c3d14c8STreehugger Robot /* High word of b is known to be non-zero on this branch */ 32*7c3d14c8STreehugger Robot 33*7c3d14c8STreehugger Robot movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 34*7c3d14c8STreehugger Robot 35*7c3d14c8STreehugger Robot shrl %cl, %eax // Practically, this means that bhi is given by: 36*7c3d14c8STreehugger Robot shrl %eax // 37*7c3d14c8STreehugger Robot notl %ecx // bhi = (high word of b) << (31 - i) | 38*7c3d14c8STreehugger Robot shll %cl, %ebx // (low word of b) >> (1 + i) 39*7c3d14c8STreehugger Robot orl %eax, %ebx // 40*7c3d14c8STreehugger Robot movl 12(%esp), %edx // Load the high and low words of a, and jump 41*7c3d14c8STreehugger Robot movl 8(%esp), %eax // to [2] if the high word is larger than bhi 42*7c3d14c8STreehugger Robot cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 43*7c3d14c8STreehugger Robot jae 2f 44*7c3d14c8STreehugger Robot 45*7c3d14c8STreehugger Robot /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 46*7c3d14c8STreehugger Robot 47*7c3d14c8STreehugger Robot divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 48*7c3d14c8STreehugger Robot 49*7c3d14c8STreehugger Robot pushl %edi 50*7c3d14c8STreehugger Robot notl %ecx 51*7c3d14c8STreehugger Robot shrl %eax 52*7c3d14c8STreehugger Robot shrl %cl, %eax // q = qs >> (1 + i) 53*7c3d14c8STreehugger Robot movl %eax, %edi 54*7c3d14c8STreehugger Robot mull 20(%esp) // q*blo 55*7c3d14c8STreehugger Robot movl 12(%esp), %ebx 56*7c3d14c8STreehugger Robot movl 16(%esp), %ecx // ECX:EBX = a 57*7c3d14c8STreehugger Robot subl %eax, %ebx 58*7c3d14c8STreehugger Robot sbbl %edx, %ecx // ECX:EBX = a - q*blo 59*7c3d14c8STreehugger Robot movl 24(%esp), %eax 60*7c3d14c8STreehugger Robot imull %edi, %eax // q*bhi 61*7c3d14c8STreehugger Robot subl %eax, %ecx // ECX:EBX = a - q*b 62*7c3d14c8STreehugger Robot 63*7c3d14c8STreehugger Robot jnc 1f // if positive, this is the result. 64*7c3d14c8STreehugger Robot addl 20(%esp), %ebx // otherwise 65*7c3d14c8STreehugger Robot adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result 66*7c3d14c8STreehugger Robot1: movl %ebx, %eax 67*7c3d14c8STreehugger Robot movl %ecx, %edx 68*7c3d14c8STreehugger Robot 69*7c3d14c8STreehugger Robot popl %edi 70*7c3d14c8STreehugger Robot popl %ebx 71*7c3d14c8STreehugger Robot retl 72*7c3d14c8STreehugger Robot 73*7c3d14c8STreehugger Robot 74*7c3d14c8STreehugger Robot2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 75*7c3d14c8STreehugger Robot 76*7c3d14c8STreehugger Robot subl %ebx, %edx // subtract bhi from ahi so that divide will not 77*7c3d14c8STreehugger Robot divl %ebx // overflow, and find q and r such that 78*7c3d14c8STreehugger Robot // 79*7c3d14c8STreehugger Robot // ahi:alo = (1:q)*bhi + r 80*7c3d14c8STreehugger Robot // 81*7c3d14c8STreehugger Robot // Note that q is a number in (31-i).(1+i) 82*7c3d14c8STreehugger Robot // fix point. 83*7c3d14c8STreehugger Robot 84*7c3d14c8STreehugger Robot pushl %edi 85*7c3d14c8STreehugger Robot notl %ecx 86*7c3d14c8STreehugger Robot shrl %eax 87*7c3d14c8STreehugger Robot orl $0x80000000, %eax 88*7c3d14c8STreehugger Robot shrl %cl, %eax // q = (1:qs) >> (1 + i) 89*7c3d14c8STreehugger Robot movl %eax, %edi 90*7c3d14c8STreehugger Robot mull 20(%esp) // q*blo 91*7c3d14c8STreehugger Robot movl 12(%esp), %ebx 92*7c3d14c8STreehugger Robot movl 16(%esp), %ecx // ECX:EBX = a 93*7c3d14c8STreehugger Robot subl %eax, %ebx 94*7c3d14c8STreehugger Robot sbbl %edx, %ecx // ECX:EBX = a - q*blo 95*7c3d14c8STreehugger Robot movl 24(%esp), %eax 96*7c3d14c8STreehugger Robot imull %edi, %eax // q*bhi 97*7c3d14c8STreehugger Robot subl %eax, %ecx // ECX:EBX = a - q*b 98*7c3d14c8STreehugger Robot 99*7c3d14c8STreehugger Robot jnc 3f // if positive, this is the result. 100*7c3d14c8STreehugger Robot addl 20(%esp), %ebx // otherwise 101*7c3d14c8STreehugger Robot adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result 102*7c3d14c8STreehugger Robot3: movl %ebx, %eax 103*7c3d14c8STreehugger Robot movl %ecx, %edx 104*7c3d14c8STreehugger Robot 105*7c3d14c8STreehugger Robot popl %edi 106*7c3d14c8STreehugger Robot popl %ebx 107*7c3d14c8STreehugger Robot retl 108*7c3d14c8STreehugger Robot 109*7c3d14c8STreehugger Robot 110*7c3d14c8STreehugger Robot 111*7c3d14c8STreehugger Robot9: /* High word of b is zero on this branch */ 112*7c3d14c8STreehugger Robot 113*7c3d14c8STreehugger Robot movl 12(%esp), %eax // Find qhi and rhi such that 114*7c3d14c8STreehugger Robot movl 16(%esp), %ecx // 115*7c3d14c8STreehugger Robot xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b 116*7c3d14c8STreehugger Robot divl %ecx // 117*7c3d14c8STreehugger Robot movl %eax, %ebx // 118*7c3d14c8STreehugger Robot movl 8(%esp), %eax // Find rlo such that 119*7c3d14c8STreehugger Robot divl %ecx // 120*7c3d14c8STreehugger Robot movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b 121*7c3d14c8STreehugger Robot popl %ebx // 122*7c3d14c8STreehugger Robot xorl %edx, %edx // and return 0:rlo 123*7c3d14c8STreehugger Robot retl // 124*7c3d14c8STreehugger RobotEND_COMPILERRT_FUNCTION(__umoddi3) 125*7c3d14c8STreehugger Robot 126*7c3d14c8STreehugger Robot#endif // __i386__ 127*7c3d14c8STreehugger Robot 128*7c3d14c8STreehugger RobotNO_EXEC_STACK_DIRECTIVE 129*7c3d14c8STreehugger Robot 130