1*7c3d14c8STreehugger Robot// This file is dual licensed under the MIT and the University of Illinois Open 2*7c3d14c8STreehugger Robot// Source Licenses. See LICENSE.TXT for details. 3*7c3d14c8STreehugger Robot 4*7c3d14c8STreehugger Robot#include "../assembly.h" 5*7c3d14c8STreehugger Robot 6*7c3d14c8STreehugger Robot// di_int __ashldi3(di_int input, int count); 7*7c3d14c8STreehugger Robot 8*7c3d14c8STreehugger Robot// This routine has some extra memory traffic, loading the 64-bit input via two 9*7c3d14c8STreehugger Robot// 32-bit loads, then immediately storing it back to the stack via a single 64-bit 10*7c3d14c8STreehugger Robot// store. This is to avoid a write-small, read-large stall. 11*7c3d14c8STreehugger Robot// However, if callers of this routine can be safely assumed to store the argument 12*7c3d14c8STreehugger Robot// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. 13*7c3d14c8STreehugger Robot// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. 14*7c3d14c8STreehugger Robot 15*7c3d14c8STreehugger Robot#ifdef __i386__ 16*7c3d14c8STreehugger Robot#ifdef __SSE2__ 17*7c3d14c8STreehugger Robot 18*7c3d14c8STreehugger Robot.text 19*7c3d14c8STreehugger Robot.balign 4 20*7c3d14c8STreehugger RobotDEFINE_COMPILERRT_FUNCTION(__ashldi3) 21*7c3d14c8STreehugger Robot movd 12(%esp), %xmm2 // Load count 22*7c3d14c8STreehugger Robot#ifndef TRUST_CALLERS_USE_64_BIT_STORES 23*7c3d14c8STreehugger Robot movd 4(%esp), %xmm0 24*7c3d14c8STreehugger Robot movd 8(%esp), %xmm1 25*7c3d14c8STreehugger Robot punpckldq %xmm1, %xmm0 // Load input 26*7c3d14c8STreehugger Robot#else 27*7c3d14c8STreehugger Robot movq 4(%esp), %xmm0 // Load input 28*7c3d14c8STreehugger Robot#endif 29*7c3d14c8STreehugger Robot psllq %xmm2, %xmm0 // shift input by count 30*7c3d14c8STreehugger Robot movd %xmm0, %eax 31*7c3d14c8STreehugger Robot psrlq $32, %xmm0 32*7c3d14c8STreehugger Robot movd %xmm0, %edx 33*7c3d14c8STreehugger Robot ret 34*7c3d14c8STreehugger RobotEND_COMPILERRT_FUNCTION(__ashldi3) 35*7c3d14c8STreehugger Robot 36*7c3d14c8STreehugger Robot#else // Use GPRs instead of SSE2 instructions, if they aren't available. 37*7c3d14c8STreehugger Robot 38*7c3d14c8STreehugger Robot.text 39*7c3d14c8STreehugger Robot.balign 4 40*7c3d14c8STreehugger RobotDEFINE_COMPILERRT_FUNCTION(__ashldi3) 41*7c3d14c8STreehugger Robot movl 12(%esp), %ecx // Load count 42*7c3d14c8STreehugger Robot movl 8(%esp), %edx // Load high 43*7c3d14c8STreehugger Robot movl 4(%esp), %eax // Load low 44*7c3d14c8STreehugger Robot 45*7c3d14c8STreehugger Robot testl $0x20, %ecx // If count >= 32 46*7c3d14c8STreehugger Robot jnz 1f // goto 1 47*7c3d14c8STreehugger Robot shldl %cl, %eax, %edx // left shift high by count 48*7c3d14c8STreehugger Robot shll %cl, %eax // left shift low by count 49*7c3d14c8STreehugger Robot ret 50*7c3d14c8STreehugger Robot 51*7c3d14c8STreehugger Robot1: movl %eax, %edx // Move low to high 52*7c3d14c8STreehugger Robot xorl %eax, %eax // clear low 53*7c3d14c8STreehugger Robot shll %cl, %edx // shift high by count - 32 54*7c3d14c8STreehugger Robot ret 55*7c3d14c8STreehugger RobotEND_COMPILERRT_FUNCTION(__ashldi3) 56*7c3d14c8STreehugger Robot 57*7c3d14c8STreehugger Robot#endif // __SSE2__ 58*7c3d14c8STreehugger Robot#endif // __i386__ 59*7c3d14c8STreehugger Robot 60*7c3d14c8STreehugger RobotNO_EXEC_STACK_DIRECTIVE 61*7c3d14c8STreehugger Robot 62