1*9880d681SAndroid Build Coastguard Worker//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===// 2*9880d681SAndroid Build Coastguard Worker// 3*9880d681SAndroid Build Coastguard Worker// The LLVM Compiler Infrastructure 4*9880d681SAndroid Build Coastguard Worker// 5*9880d681SAndroid Build Coastguard Worker// This file is distributed under the University of Illinois Open Source 6*9880d681SAndroid Build Coastguard Worker// License. See LICENSE.TXT for details. 7*9880d681SAndroid Build Coastguard Worker// 8*9880d681SAndroid Build Coastguard Worker//===----------------------------------------------------------------------===// 9*9880d681SAndroid Build Coastguard Worker// 10*9880d681SAndroid Build Coastguard Worker// This file describes FMA (Fused Multiply-Add) instructions. 11*9880d681SAndroid Build Coastguard Worker// 12*9880d681SAndroid Build Coastguard Worker//===----------------------------------------------------------------------===// 13*9880d681SAndroid Build Coastguard Worker 14*9880d681SAndroid Build Coastguard Worker//===----------------------------------------------------------------------===// 15*9880d681SAndroid Build Coastguard Worker// FMA3 - Intel 3 operand Fused Multiply-Add instructions 16*9880d681SAndroid Build Coastguard Worker//===----------------------------------------------------------------------===// 17*9880d681SAndroid Build Coastguard Worker 18*9880d681SAndroid Build Coastguard Worker// For all FMA opcodes declared in fma3p_rm and fma3s_rm milticlasses defined 19*9880d681SAndroid Build Coastguard Worker// below, both the register and memory variants are commutable. 20*9880d681SAndroid Build Coastguard Worker// For the register form the commutable operands are 1, 2 and 3. 21*9880d681SAndroid Build Coastguard Worker// For the memory variant the folded operand must be in 3. Thus, 22*9880d681SAndroid Build Coastguard Worker// in that case, only the operands 1 and 2 can be swapped. 23*9880d681SAndroid Build Coastguard Worker// Commuting some of operands may require the opcode change. 24*9880d681SAndroid Build Coastguard Worker// FMA*213*: 25*9880d681SAndroid Build Coastguard Worker// operands 1 and 2 (memory & register forms): *213* --> *213*(no changes); 26*9880d681SAndroid Build Coastguard Worker// operands 1 and 3 (register forms only): *213* --> *231*; 27*9880d681SAndroid Build Coastguard Worker// operands 2 and 3 (register forms only): *213* --> *132*. 28*9880d681SAndroid Build Coastguard Worker// FMA*132*: 29*9880d681SAndroid Build Coastguard Worker// operands 1 and 2 (memory & register forms): *132* --> *231*; 30*9880d681SAndroid Build Coastguard Worker// operands 1 and 3 (register forms only): *132* --> *132*(no changes); 31*9880d681SAndroid Build Coastguard Worker// operands 2 and 3 (register forms only): *132* --> *213*. 32*9880d681SAndroid Build Coastguard Worker// FMA*231*: 33*9880d681SAndroid Build Coastguard Worker// operands 1 and 2 (memory & register forms): *231* --> *132*; 34*9880d681SAndroid Build Coastguard Worker// operands 1 and 3 (register forms only): *231* --> *213*; 35*9880d681SAndroid Build Coastguard Worker// operands 2 and 3 (register forms only): *231* --> *231*(no changes). 36*9880d681SAndroid Build Coastguard Worker 37*9880d681SAndroid Build Coastguard Workerlet Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in 38*9880d681SAndroid Build Coastguard Workermulticlass fma3p_rm<bits<8> opc, string OpcodeStr, 39*9880d681SAndroid Build Coastguard Worker PatFrag MemFrag128, PatFrag MemFrag256, 40*9880d681SAndroid Build Coastguard Worker ValueType OpVT128, ValueType OpVT256, 41*9880d681SAndroid Build Coastguard Worker SDPatternOperator Op = null_frag> { 42*9880d681SAndroid Build Coastguard Worker let usesCustomInserter = 1 in 43*9880d681SAndroid Build Coastguard Worker def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), 44*9880d681SAndroid Build Coastguard Worker (ins VR128:$src1, VR128:$src2, VR128:$src3), 45*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 46*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 47*9880d681SAndroid Build Coastguard Worker [(set VR128:$dst, (OpVT128 (Op VR128:$src2, 48*9880d681SAndroid Build Coastguard Worker VR128:$src1, VR128:$src3)))]>; 49*9880d681SAndroid Build Coastguard Worker 50*9880d681SAndroid Build Coastguard Worker let mayLoad = 1 in 51*9880d681SAndroid Build Coastguard Worker def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), 52*9880d681SAndroid Build Coastguard Worker (ins VR128:$src1, VR128:$src2, f128mem:$src3), 53*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 54*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 55*9880d681SAndroid Build Coastguard Worker [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, 56*9880d681SAndroid Build Coastguard Worker (MemFrag128 addr:$src3))))]>; 57*9880d681SAndroid Build Coastguard Worker 58*9880d681SAndroid Build Coastguard Worker let usesCustomInserter = 1 in 59*9880d681SAndroid Build Coastguard Worker def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), 60*9880d681SAndroid Build Coastguard Worker (ins VR256:$src1, VR256:$src2, VR256:$src3), 61*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 62*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 63*9880d681SAndroid Build Coastguard Worker [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1, 64*9880d681SAndroid Build Coastguard Worker VR256:$src3)))]>, VEX_L; 65*9880d681SAndroid Build Coastguard Worker 66*9880d681SAndroid Build Coastguard Worker let mayLoad = 1 in 67*9880d681SAndroid Build Coastguard Worker def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), 68*9880d681SAndroid Build Coastguard Worker (ins VR256:$src1, VR256:$src2, f256mem:$src3), 69*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 70*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 71*9880d681SAndroid Build Coastguard Worker [(set VR256:$dst, 72*9880d681SAndroid Build Coastguard Worker (OpVT256 (Op VR256:$src2, VR256:$src1, 73*9880d681SAndroid Build Coastguard Worker (MemFrag256 addr:$src3))))]>, VEX_L; 74*9880d681SAndroid Build Coastguard Worker} 75*9880d681SAndroid Build Coastguard Worker 76*9880d681SAndroid Build Coastguard Workermulticlass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, 77*9880d681SAndroid Build Coastguard Worker string OpcodeStr, string PackTy, 78*9880d681SAndroid Build Coastguard Worker PatFrag MemFrag128, PatFrag MemFrag256, 79*9880d681SAndroid Build Coastguard Worker SDNode Op, ValueType OpTy128, ValueType OpTy256> { 80*9880d681SAndroid Build Coastguard Worker defm r213 : fma3p_rm<opc213, 81*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, "213", PackTy), 82*9880d681SAndroid Build Coastguard Worker MemFrag128, MemFrag256, OpTy128, OpTy256, Op>; 83*9880d681SAndroid Build Coastguard Worker defm r132 : fma3p_rm<opc132, 84*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, "132", PackTy), 85*9880d681SAndroid Build Coastguard Worker MemFrag128, MemFrag256, OpTy128, OpTy256>; 86*9880d681SAndroid Build Coastguard Worker defm r231 : fma3p_rm<opc231, 87*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, "231", PackTy), 88*9880d681SAndroid Build Coastguard Worker MemFrag128, MemFrag256, OpTy128, OpTy256>; 89*9880d681SAndroid Build Coastguard Worker} 90*9880d681SAndroid Build Coastguard Worker 91*9880d681SAndroid Build Coastguard Worker// Fused Multiply-Add 92*9880d681SAndroid Build Coastguard Workerlet ExeDomain = SSEPackedSingle in { 93*9880d681SAndroid Build Coastguard Worker defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", loadv4f32, 94*9880d681SAndroid Build Coastguard Worker loadv8f32, X86Fmadd, v4f32, v8f32>; 95*9880d681SAndroid Build Coastguard Worker defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", loadv4f32, 96*9880d681SAndroid Build Coastguard Worker loadv8f32, X86Fmsub, v4f32, v8f32>; 97*9880d681SAndroid Build Coastguard Worker defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", 98*9880d681SAndroid Build Coastguard Worker loadv4f32, loadv8f32, X86Fmaddsub, 99*9880d681SAndroid Build Coastguard Worker v4f32, v8f32>; 100*9880d681SAndroid Build Coastguard Worker defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", 101*9880d681SAndroid Build Coastguard Worker loadv4f32, loadv8f32, X86Fmsubadd, 102*9880d681SAndroid Build Coastguard Worker v4f32, v8f32>; 103*9880d681SAndroid Build Coastguard Worker} 104*9880d681SAndroid Build Coastguard Worker 105*9880d681SAndroid Build Coastguard Workerlet ExeDomain = SSEPackedDouble in { 106*9880d681SAndroid Build Coastguard Worker defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", loadv2f64, 107*9880d681SAndroid Build Coastguard Worker loadv4f64, X86Fmadd, v2f64, v4f64>, VEX_W; 108*9880d681SAndroid Build Coastguard Worker defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", loadv2f64, 109*9880d681SAndroid Build Coastguard Worker loadv4f64, X86Fmsub, v2f64, v4f64>, VEX_W; 110*9880d681SAndroid Build Coastguard Worker defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", 111*9880d681SAndroid Build Coastguard Worker loadv2f64, loadv4f64, X86Fmaddsub, 112*9880d681SAndroid Build Coastguard Worker v2f64, v4f64>, VEX_W; 113*9880d681SAndroid Build Coastguard Worker defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", 114*9880d681SAndroid Build Coastguard Worker loadv2f64, loadv4f64, X86Fmsubadd, 115*9880d681SAndroid Build Coastguard Worker v2f64, v4f64>, VEX_W; 116*9880d681SAndroid Build Coastguard Worker} 117*9880d681SAndroid Build Coastguard Worker 118*9880d681SAndroid Build Coastguard Worker// Fused Negative Multiply-Add 119*9880d681SAndroid Build Coastguard Workerlet ExeDomain = SSEPackedSingle in { 120*9880d681SAndroid Build Coastguard Worker defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", loadv4f32, 121*9880d681SAndroid Build Coastguard Worker loadv8f32, X86Fnmadd, v4f32, v8f32>; 122*9880d681SAndroid Build Coastguard Worker defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", loadv4f32, 123*9880d681SAndroid Build Coastguard Worker loadv8f32, X86Fnmsub, v4f32, v8f32>; 124*9880d681SAndroid Build Coastguard Worker} 125*9880d681SAndroid Build Coastguard Workerlet ExeDomain = SSEPackedDouble in { 126*9880d681SAndroid Build Coastguard Worker defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", loadv2f64, 127*9880d681SAndroid Build Coastguard Worker loadv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W; 128*9880d681SAndroid Build Coastguard Worker defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", 129*9880d681SAndroid Build Coastguard Worker loadv2f64, loadv4f64, X86Fnmsub, v2f64, 130*9880d681SAndroid Build Coastguard Worker v4f64>, VEX_W; 131*9880d681SAndroid Build Coastguard Worker} 132*9880d681SAndroid Build Coastguard Worker 133*9880d681SAndroid Build Coastguard Worker// All source register operands of FMA opcodes defined in fma3s_rm multiclass 134*9880d681SAndroid Build Coastguard Worker// can be commuted. In many cases such commute transformation requres an opcode 135*9880d681SAndroid Build Coastguard Worker// adjustment, for example, commuting the operands 1 and 2 in FMA*132 form 136*9880d681SAndroid Build Coastguard Worker// would require an opcode change to FMA*231: 137*9880d681SAndroid Build Coastguard Worker// FMA*132* reg1, reg2, reg3; // reg1 * reg3 + reg2; 138*9880d681SAndroid Build Coastguard Worker// --> 139*9880d681SAndroid Build Coastguard Worker// FMA*231* reg2, reg1, reg3; // reg1 * reg3 + reg2; 140*9880d681SAndroid Build Coastguard Worker// Please see more detailed comment at the very beginning of the section 141*9880d681SAndroid Build Coastguard Worker// defining FMA3 opcodes above. 142*9880d681SAndroid Build Coastguard Workerlet Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in 143*9880d681SAndroid Build Coastguard Workermulticlass fma3s_rm<bits<8> opc, string OpcodeStr, 144*9880d681SAndroid Build Coastguard Worker X86MemOperand x86memop, RegisterClass RC, 145*9880d681SAndroid Build Coastguard Worker SDPatternOperator OpNode = null_frag> { 146*9880d681SAndroid Build Coastguard Worker let usesCustomInserter = 1 in 147*9880d681SAndroid Build Coastguard Worker def r : FMA3<opc, MRMSrcReg, (outs RC:$dst), 148*9880d681SAndroid Build Coastguard Worker (ins RC:$src1, RC:$src2, RC:$src3), 149*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 150*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 151*9880d681SAndroid Build Coastguard Worker [(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>; 152*9880d681SAndroid Build Coastguard Worker 153*9880d681SAndroid Build Coastguard Worker let mayLoad = 1 in 154*9880d681SAndroid Build Coastguard Worker def m : FMA3<opc, MRMSrcMem, (outs RC:$dst), 155*9880d681SAndroid Build Coastguard Worker (ins RC:$src1, RC:$src2, x86memop:$src3), 156*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 157*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 158*9880d681SAndroid Build Coastguard Worker [(set RC:$dst, 159*9880d681SAndroid Build Coastguard Worker (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>; 160*9880d681SAndroid Build Coastguard Worker} 161*9880d681SAndroid Build Coastguard Worker 162*9880d681SAndroid Build Coastguard Worker// These FMA*_Int instructions are defined specially for being used when 163*9880d681SAndroid Build Coastguard Worker// the scalar FMA intrinsics are lowered to machine instructions, and in that 164*9880d681SAndroid Build Coastguard Worker// sense, they are similar to existing ADD*_Int, SUB*_Int, MUL*_Int, etc. 165*9880d681SAndroid Build Coastguard Worker// instructions. 166*9880d681SAndroid Build Coastguard Worker// 167*9880d681SAndroid Build Coastguard Worker// All of the FMA*_Int opcodes are defined as commutable here. 168*9880d681SAndroid Build Coastguard Worker// Commuting the 2nd and 3rd source register operands of FMAs is quite trivial 169*9880d681SAndroid Build Coastguard Worker// and the corresponding optimizations have been developed. 170*9880d681SAndroid Build Coastguard Worker// Commuting the 1st operand of FMA*_Int requires some additional analysis, 171*9880d681SAndroid Build Coastguard Worker// the commute optimization is legal only if all users of FMA*_Int use only 172*9880d681SAndroid Build Coastguard Worker// the lowest element of the FMA*_Int instruction. Even though such analysis 173*9880d681SAndroid Build Coastguard Worker// may be not implemented yet we allow the routines doing the actual commute 174*9880d681SAndroid Build Coastguard Worker// transformation to decide if one or another instruction is commutable or not. 175*9880d681SAndroid Build Coastguard Workerlet Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1, 176*9880d681SAndroid Build Coastguard Worker hasSideEffects = 0 in 177*9880d681SAndroid Build Coastguard Workermulticlass fma3s_rm_int<bits<8> opc, string OpcodeStr, 178*9880d681SAndroid Build Coastguard Worker Operand memopr, RegisterClass RC> { 179*9880d681SAndroid Build Coastguard Worker def r_Int : FMA3<opc, MRMSrcReg, (outs RC:$dst), 180*9880d681SAndroid Build Coastguard Worker (ins RC:$src1, RC:$src2, RC:$src3), 181*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 182*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 183*9880d681SAndroid Build Coastguard Worker []>; 184*9880d681SAndroid Build Coastguard Worker 185*9880d681SAndroid Build Coastguard Worker let mayLoad = 1 in 186*9880d681SAndroid Build Coastguard Worker def m_Int : FMA3<opc, MRMSrcMem, (outs RC:$dst), 187*9880d681SAndroid Build Coastguard Worker (ins RC:$src1, RC:$src2, memopr:$src3), 188*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 189*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 190*9880d681SAndroid Build Coastguard Worker []>; 191*9880d681SAndroid Build Coastguard Worker} 192*9880d681SAndroid Build Coastguard Worker 193*9880d681SAndroid Build Coastguard Workermulticlass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, 194*9880d681SAndroid Build Coastguard Worker string OpStr, string PackTy, 195*9880d681SAndroid Build Coastguard Worker SDNode OpNode, RegisterClass RC, 196*9880d681SAndroid Build Coastguard Worker X86MemOperand x86memop> { 197*9880d681SAndroid Build Coastguard Worker defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy), x86memop, RC>; 198*9880d681SAndroid Build Coastguard Worker defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy), x86memop, RC, 199*9880d681SAndroid Build Coastguard Worker OpNode>; 200*9880d681SAndroid Build Coastguard Worker defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy), x86memop, RC>; 201*9880d681SAndroid Build Coastguard Worker} 202*9880d681SAndroid Build Coastguard Worker 203*9880d681SAndroid Build Coastguard Worker// The FMA 213 form is created for lowering of scalar FMA intrinscis 204*9880d681SAndroid Build Coastguard Worker// to machine instructions. 205*9880d681SAndroid Build Coastguard Worker// The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands 206*9880d681SAndroid Build Coastguard Worker// of FMA 213 form. 207*9880d681SAndroid Build Coastguard Worker// The FMA 231 form can be get only by commuting the 1st operand of 213 or 132 208*9880d681SAndroid Build Coastguard Worker// forms and is possible only after special analysis of all uses of the initial 209*9880d681SAndroid Build Coastguard Worker// instruction. Such analysis do not exist yet and thus introducing the 231 210*9880d681SAndroid Build Coastguard Worker// form of FMA*_Int instructions is done using an optimistic assumption that 211*9880d681SAndroid Build Coastguard Worker// such analysis will be implemented eventually. 212*9880d681SAndroid Build Coastguard Workermulticlass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, 213*9880d681SAndroid Build Coastguard Worker string OpStr, string PackTy, 214*9880d681SAndroid Build Coastguard Worker RegisterClass RC, Operand memop> { 215*9880d681SAndroid Build Coastguard Worker defm r132 : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy), 216*9880d681SAndroid Build Coastguard Worker memop, RC>; 217*9880d681SAndroid Build Coastguard Worker defm r213 : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy), 218*9880d681SAndroid Build Coastguard Worker memop, RC>; 219*9880d681SAndroid Build Coastguard Worker defm r231 : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy), 220*9880d681SAndroid Build Coastguard Worker memop, RC>; 221*9880d681SAndroid Build Coastguard Worker} 222*9880d681SAndroid Build Coastguard Worker 223*9880d681SAndroid Build Coastguard Workermulticlass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231, 224*9880d681SAndroid Build Coastguard Worker string OpStr, Intrinsic IntF32, Intrinsic IntF64, 225*9880d681SAndroid Build Coastguard Worker SDNode OpNode> { 226*9880d681SAndroid Build Coastguard Worker let ExeDomain = SSEPackedSingle in 227*9880d681SAndroid Build Coastguard Worker defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", OpNode, 228*9880d681SAndroid Build Coastguard Worker FR32, f32mem>, 229*9880d681SAndroid Build Coastguard Worker fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", VR128, ssmem>; 230*9880d681SAndroid Build Coastguard Worker 231*9880d681SAndroid Build Coastguard Worker let ExeDomain = SSEPackedDouble in 232*9880d681SAndroid Build Coastguard Worker defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", OpNode, 233*9880d681SAndroid Build Coastguard Worker FR64, f64mem>, 234*9880d681SAndroid Build Coastguard Worker fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", VR128, sdmem>, 235*9880d681SAndroid Build Coastguard Worker VEX_W; 236*9880d681SAndroid Build Coastguard Worker 237*9880d681SAndroid Build Coastguard Worker // These patterns use the 123 ordering, instead of 213, even though 238*9880d681SAndroid Build Coastguard Worker // they match the intrinsic to the 213 version of the instruction. 239*9880d681SAndroid Build Coastguard Worker // This is because src1 is tied to dest, and the scalar intrinsics 240*9880d681SAndroid Build Coastguard Worker // require the pass-through values to come from the first source 241*9880d681SAndroid Build Coastguard Worker // operand, not the second. 242*9880d681SAndroid Build Coastguard Worker def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3), 243*9880d681SAndroid Build Coastguard Worker (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SSr213r_Int") 244*9880d681SAndroid Build Coastguard Worker $src1, $src2, $src3), VR128)>; 245*9880d681SAndroid Build Coastguard Worker 246*9880d681SAndroid Build Coastguard Worker def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3), 247*9880d681SAndroid Build Coastguard Worker (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SDr213r_Int") 248*9880d681SAndroid Build Coastguard Worker $src1, $src2, $src3), VR128)>; 249*9880d681SAndroid Build Coastguard Worker} 250*9880d681SAndroid Build Coastguard Worker 251*9880d681SAndroid Build Coastguard Workerdefm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, 252*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG; 253*9880d681SAndroid Build Coastguard Workerdefm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, 254*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG; 255*9880d681SAndroid Build Coastguard Worker 256*9880d681SAndroid Build Coastguard Workerdefm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, 257*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG; 258*9880d681SAndroid Build Coastguard Workerdefm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, 259*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG; 260*9880d681SAndroid Build Coastguard Worker 261*9880d681SAndroid Build Coastguard Worker 262*9880d681SAndroid Build Coastguard Worker//===----------------------------------------------------------------------===// 263*9880d681SAndroid Build Coastguard Worker// FMA4 - AMD 4 operand Fused Multiply-Add instructions 264*9880d681SAndroid Build Coastguard Worker//===----------------------------------------------------------------------===// 265*9880d681SAndroid Build Coastguard Worker 266*9880d681SAndroid Build Coastguard Worker 267*9880d681SAndroid Build Coastguard Workermulticlass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC, 268*9880d681SAndroid Build Coastguard Worker X86MemOperand x86memop, ValueType OpVT, SDNode OpNode, 269*9880d681SAndroid Build Coastguard Worker PatFrag mem_frag> { 270*9880d681SAndroid Build Coastguard Worker let isCommutable = 1 in 271*9880d681SAndroid Build Coastguard Worker def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst), 272*9880d681SAndroid Build Coastguard Worker (ins RC:$src1, RC:$src2, RC:$src3), 273*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 274*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 275*9880d681SAndroid Build Coastguard Worker [(set RC:$dst, 276*9880d681SAndroid Build Coastguard Worker (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG, MemOp4; 277*9880d681SAndroid Build Coastguard Worker def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst), 278*9880d681SAndroid Build Coastguard Worker (ins RC:$src1, RC:$src2, x86memop:$src3), 279*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 280*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 281*9880d681SAndroid Build Coastguard Worker [(set RC:$dst, (OpNode RC:$src1, RC:$src2, 282*9880d681SAndroid Build Coastguard Worker (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG, MemOp4; 283*9880d681SAndroid Build Coastguard Worker def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst), 284*9880d681SAndroid Build Coastguard Worker (ins RC:$src1, x86memop:$src2, RC:$src3), 285*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 286*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 287*9880d681SAndroid Build Coastguard Worker [(set RC:$dst, 288*9880d681SAndroid Build Coastguard Worker (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG; 289*9880d681SAndroid Build Coastguard Worker// For disassembler 290*9880d681SAndroid Build Coastguard Workerlet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 291*9880d681SAndroid Build Coastguard Worker def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst), 292*9880d681SAndroid Build Coastguard Worker (ins RC:$src1, RC:$src2, RC:$src3), 293*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 294*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, 295*9880d681SAndroid Build Coastguard Worker VEX_LIG; 296*9880d681SAndroid Build Coastguard Worker} 297*9880d681SAndroid Build Coastguard Worker 298*9880d681SAndroid Build Coastguard Workermulticlass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop, 299*9880d681SAndroid Build Coastguard Worker ComplexPattern mem_cpat, Intrinsic Int> { 300*9880d681SAndroid Build Coastguard Workerlet isCodeGenOnly = 1 in { 301*9880d681SAndroid Build Coastguard Worker let isCommutable = 1 in 302*9880d681SAndroid Build Coastguard Worker def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst), 303*9880d681SAndroid Build Coastguard Worker (ins VR128:$src1, VR128:$src2, VR128:$src3), 304*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 305*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 306*9880d681SAndroid Build Coastguard Worker [(set VR128:$dst, 307*9880d681SAndroid Build Coastguard Worker (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, VEX_LIG, MemOp4; 308*9880d681SAndroid Build Coastguard Worker def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), 309*9880d681SAndroid Build Coastguard Worker (ins VR128:$src1, VR128:$src2, memop:$src3), 310*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 311*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 312*9880d681SAndroid Build Coastguard Worker [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, 313*9880d681SAndroid Build Coastguard Worker mem_cpat:$src3))]>, VEX_W, VEX_LIG, MemOp4; 314*9880d681SAndroid Build Coastguard Worker def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), 315*9880d681SAndroid Build Coastguard Worker (ins VR128:$src1, memop:$src2, VR128:$src3), 316*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 317*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 318*9880d681SAndroid Build Coastguard Worker [(set VR128:$dst, 319*9880d681SAndroid Build Coastguard Worker (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG; 320*9880d681SAndroid Build Coastguard Worker} // isCodeGenOnly = 1 321*9880d681SAndroid Build Coastguard Worker} 322*9880d681SAndroid Build Coastguard Worker 323*9880d681SAndroid Build Coastguard Workermulticlass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode, 324*9880d681SAndroid Build Coastguard Worker ValueType OpVT128, ValueType OpVT256, 325*9880d681SAndroid Build Coastguard Worker PatFrag ld_frag128, PatFrag ld_frag256> { 326*9880d681SAndroid Build Coastguard Worker let isCommutable = 1 in 327*9880d681SAndroid Build Coastguard Worker def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), 328*9880d681SAndroid Build Coastguard Worker (ins VR128:$src1, VR128:$src2, VR128:$src3), 329*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 330*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 331*9880d681SAndroid Build Coastguard Worker [(set VR128:$dst, 332*9880d681SAndroid Build Coastguard Worker (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, 333*9880d681SAndroid Build Coastguard Worker VEX_W, MemOp4; 334*9880d681SAndroid Build Coastguard Worker def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), 335*9880d681SAndroid Build Coastguard Worker (ins VR128:$src1, VR128:$src2, f128mem:$src3), 336*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 337*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 338*9880d681SAndroid Build Coastguard Worker [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2, 339*9880d681SAndroid Build Coastguard Worker (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4; 340*9880d681SAndroid Build Coastguard Worker def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), 341*9880d681SAndroid Build Coastguard Worker (ins VR128:$src1, f128mem:$src2, VR128:$src3), 342*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 343*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 344*9880d681SAndroid Build Coastguard Worker [(set VR128:$dst, 345*9880d681SAndroid Build Coastguard Worker (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>; 346*9880d681SAndroid Build Coastguard Worker let isCommutable = 1 in 347*9880d681SAndroid Build Coastguard Worker def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst), 348*9880d681SAndroid Build Coastguard Worker (ins VR256:$src1, VR256:$src2, VR256:$src3), 349*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 350*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 351*9880d681SAndroid Build Coastguard Worker [(set VR256:$dst, 352*9880d681SAndroid Build Coastguard Worker (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>, 353*9880d681SAndroid Build Coastguard Worker VEX_W, MemOp4, VEX_L; 354*9880d681SAndroid Build Coastguard Worker def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), 355*9880d681SAndroid Build Coastguard Worker (ins VR256:$src1, VR256:$src2, f256mem:$src3), 356*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 357*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 358*9880d681SAndroid Build Coastguard Worker [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2, 359*9880d681SAndroid Build Coastguard Worker (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4, VEX_L; 360*9880d681SAndroid Build Coastguard Worker def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), 361*9880d681SAndroid Build Coastguard Worker (ins VR256:$src1, f256mem:$src2, VR256:$src3), 362*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 363*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 364*9880d681SAndroid Build Coastguard Worker [(set VR256:$dst, (OpNode VR256:$src1, 365*9880d681SAndroid Build Coastguard Worker (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L; 366*9880d681SAndroid Build Coastguard Worker// For disassembler 367*9880d681SAndroid Build Coastguard Workerlet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 368*9880d681SAndroid Build Coastguard Worker def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), 369*9880d681SAndroid Build Coastguard Worker (ins VR128:$src1, VR128:$src2, VR128:$src3), 370*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 371*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>; 372*9880d681SAndroid Build Coastguard Worker def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst), 373*9880d681SAndroid Build Coastguard Worker (ins VR256:$src1, VR256:$src2, VR256:$src3), 374*9880d681SAndroid Build Coastguard Worker !strconcat(OpcodeStr, 375*9880d681SAndroid Build Coastguard Worker "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, 376*9880d681SAndroid Build Coastguard Worker VEX_L; 377*9880d681SAndroid Build Coastguard Worker} // isCodeGenOnly = 1 378*9880d681SAndroid Build Coastguard Worker} 379*9880d681SAndroid Build Coastguard Worker 380*9880d681SAndroid Build Coastguard Workerlet ExeDomain = SSEPackedSingle in { 381*9880d681SAndroid Build Coastguard Worker // Scalar Instructions 382*9880d681SAndroid Build Coastguard Worker defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, 383*9880d681SAndroid Build Coastguard Worker fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, 384*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfmadd_ss>; 385*9880d681SAndroid Build Coastguard Worker defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, 386*9880d681SAndroid Build Coastguard Worker fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, 387*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfmsub_ss>; 388*9880d681SAndroid Build Coastguard Worker defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, 389*9880d681SAndroid Build Coastguard Worker X86Fnmadd, loadf32>, 390*9880d681SAndroid Build Coastguard Worker fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, 391*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfnmadd_ss>; 392*9880d681SAndroid Build Coastguard Worker defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, 393*9880d681SAndroid Build Coastguard Worker X86Fnmsub, loadf32>, 394*9880d681SAndroid Build Coastguard Worker fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, 395*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfnmsub_ss>; 396*9880d681SAndroid Build Coastguard Worker // Packed Instructions 397*9880d681SAndroid Build Coastguard Worker defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, 398*9880d681SAndroid Build Coastguard Worker loadv4f32, loadv8f32>; 399*9880d681SAndroid Build Coastguard Worker defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32, 400*9880d681SAndroid Build Coastguard Worker loadv4f32, loadv8f32>; 401*9880d681SAndroid Build Coastguard Worker defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32, 402*9880d681SAndroid Build Coastguard Worker loadv4f32, loadv8f32>; 403*9880d681SAndroid Build Coastguard Worker defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32, 404*9880d681SAndroid Build Coastguard Worker loadv4f32, loadv8f32>; 405*9880d681SAndroid Build Coastguard Worker defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32, 406*9880d681SAndroid Build Coastguard Worker loadv4f32, loadv8f32>; 407*9880d681SAndroid Build Coastguard Worker defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32, 408*9880d681SAndroid Build Coastguard Worker loadv4f32, loadv8f32>; 409*9880d681SAndroid Build Coastguard Worker} 410*9880d681SAndroid Build Coastguard Worker 411*9880d681SAndroid Build Coastguard Workerlet ExeDomain = SSEPackedDouble in { 412*9880d681SAndroid Build Coastguard Worker // Scalar Instructions 413*9880d681SAndroid Build Coastguard Worker defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, 414*9880d681SAndroid Build Coastguard Worker fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, 415*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfmadd_sd>; 416*9880d681SAndroid Build Coastguard Worker defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, 417*9880d681SAndroid Build Coastguard Worker fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, 418*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfmsub_sd>; 419*9880d681SAndroid Build Coastguard Worker defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, 420*9880d681SAndroid Build Coastguard Worker X86Fnmadd, loadf64>, 421*9880d681SAndroid Build Coastguard Worker fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, 422*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfnmadd_sd>; 423*9880d681SAndroid Build Coastguard Worker defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, 424*9880d681SAndroid Build Coastguard Worker X86Fnmsub, loadf64>, 425*9880d681SAndroid Build Coastguard Worker fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, 426*9880d681SAndroid Build Coastguard Worker int_x86_fma_vfnmsub_sd>; 427*9880d681SAndroid Build Coastguard Worker // Packed Instructions 428*9880d681SAndroid Build Coastguard Worker defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, 429*9880d681SAndroid Build Coastguard Worker loadv2f64, loadv4f64>; 430*9880d681SAndroid Build Coastguard Worker defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64, 431*9880d681SAndroid Build Coastguard Worker loadv2f64, loadv4f64>; 432*9880d681SAndroid Build Coastguard Worker defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64, 433*9880d681SAndroid Build Coastguard Worker loadv2f64, loadv4f64>; 434*9880d681SAndroid Build Coastguard Worker defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64, 435*9880d681SAndroid Build Coastguard Worker loadv2f64, loadv4f64>; 436*9880d681SAndroid Build Coastguard Worker defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64, 437*9880d681SAndroid Build Coastguard Worker loadv2f64, loadv4f64>; 438*9880d681SAndroid Build Coastguard Worker defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64, 439*9880d681SAndroid Build Coastguard Worker loadv2f64, loadv4f64>; 440*9880d681SAndroid Build Coastguard Worker} 441*9880d681SAndroid Build Coastguard Worker 442