1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14 15//===----------------------------------------------------------------------===// 16// NEON-specific Operands. 17//===----------------------------------------------------------------------===// 18def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20} 21 22def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26} 27def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31} 32def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36} 37def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 38def nImmSplatNotI16 : Operand<i32> { 39 let ParserMatchClass = nImmSplatNotI16AsmOperand; 40} 41def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 42def nImmSplatNotI32 : Operand<i32> { 43 let ParserMatchClass = nImmSplatNotI32AsmOperand; 44} 45def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 46def nImmVMOVI32 : Operand<i32> { 47 let PrintMethod = "printNEONModImmOperand"; 48 let ParserMatchClass = nImmVMOVI32AsmOperand; 49} 50 51def nImmVMOVI16AsmOperandByteReplicate : 52 AsmOperandClass { 53 let Name = "NEONi16vmovByteReplicate"; 54 let PredicateMethod = "isNEONi16ByteReplicate"; 55 let RenderMethod = "addNEONvmovByteReplicateOperands"; 56} 57def nImmVMOVI32AsmOperandByteReplicate : 58 AsmOperandClass { 59 let Name = "NEONi32vmovByteReplicate"; 60 let PredicateMethod = "isNEONi32ByteReplicate"; 61 let RenderMethod = "addNEONvmovByteReplicateOperands"; 62} 63def nImmVMVNI16AsmOperandByteReplicate : 64 AsmOperandClass { 65 let Name = "NEONi16invByteReplicate"; 66 let PredicateMethod = "isNEONi16ByteReplicate"; 67 let RenderMethod = "addNEONinvByteReplicateOperands"; 68} 69def nImmVMVNI32AsmOperandByteReplicate : 70 AsmOperandClass { 71 let Name = "NEONi32invByteReplicate"; 72 let PredicateMethod = "isNEONi32ByteReplicate"; 73 let RenderMethod = "addNEONinvByteReplicateOperands"; 74} 75 76def nImmVMOVI16ByteReplicate : Operand<i32> { 77 let PrintMethod = "printNEONModImmOperand"; 78 let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate; 79} 80def nImmVMOVI32ByteReplicate : Operand<i32> { 81 let PrintMethod = "printNEONModImmOperand"; 82 let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate; 83} 84def nImmVMVNI16ByteReplicate : Operand<i32> { 85 let PrintMethod = "printNEONModImmOperand"; 86 let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate; 87} 88def nImmVMVNI32ByteReplicate : Operand<i32> { 89 let PrintMethod = "printNEONModImmOperand"; 90 let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate; 91} 92 93def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 94def nImmVMOVI32Neg : Operand<i32> { 95 let PrintMethod = "printNEONModImmOperand"; 96 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 97} 98def nImmVMOVF32 : Operand<i32> { 99 let PrintMethod = "printFPImmOperand"; 100 let ParserMatchClass = FPImmOperand; 101} 102def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 103def nImmSplatI64 : Operand<i32> { 104 let PrintMethod = "printNEONModImmOperand"; 105 let ParserMatchClass = nImmSplatI64AsmOperand; 106} 107 108def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 109def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 110def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 111def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 112 return ((uint64_t)Imm) < 8; 113}]> { 114 let ParserMatchClass = VectorIndex8Operand; 115 let PrintMethod = "printVectorIndex"; 116 let MIOperandInfo = (ops i32imm); 117} 118def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 119 return ((uint64_t)Imm) < 4; 120}]> { 121 let ParserMatchClass = VectorIndex16Operand; 122 let PrintMethod = "printVectorIndex"; 123 let MIOperandInfo = (ops i32imm); 124} 125def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 126 return ((uint64_t)Imm) < 2; 127}]> { 128 let ParserMatchClass = VectorIndex32Operand; 129 let PrintMethod = "printVectorIndex"; 130 let MIOperandInfo = (ops i32imm); 131} 132 133// Register list of one D register. 134def VecListOneDAsmOperand : AsmOperandClass { 135 let Name = "VecListOneD"; 136 let ParserMethod = "parseVectorList"; 137 let RenderMethod = "addVecListOperands"; 138} 139def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 140 let ParserMatchClass = VecListOneDAsmOperand; 141} 142// Register list of two sequential D registers. 143def VecListDPairAsmOperand : AsmOperandClass { 144 let Name = "VecListDPair"; 145 let ParserMethod = "parseVectorList"; 146 let RenderMethod = "addVecListOperands"; 147} 148def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 149 let ParserMatchClass = VecListDPairAsmOperand; 150} 151// Register list of three sequential D registers. 152def VecListThreeDAsmOperand : AsmOperandClass { 153 let Name = "VecListThreeD"; 154 let ParserMethod = "parseVectorList"; 155 let RenderMethod = "addVecListOperands"; 156} 157def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 158 let ParserMatchClass = VecListThreeDAsmOperand; 159} 160// Register list of four sequential D registers. 161def VecListFourDAsmOperand : AsmOperandClass { 162 let Name = "VecListFourD"; 163 let ParserMethod = "parseVectorList"; 164 let RenderMethod = "addVecListOperands"; 165} 166def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 167 let ParserMatchClass = VecListFourDAsmOperand; 168} 169// Register list of two D registers spaced by 2 (two sequential Q registers). 170def VecListDPairSpacedAsmOperand : AsmOperandClass { 171 let Name = "VecListDPairSpaced"; 172 let ParserMethod = "parseVectorList"; 173 let RenderMethod = "addVecListOperands"; 174} 175def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 176 let ParserMatchClass = VecListDPairSpacedAsmOperand; 177} 178// Register list of three D registers spaced by 2 (three Q registers). 179def VecListThreeQAsmOperand : AsmOperandClass { 180 let Name = "VecListThreeQ"; 181 let ParserMethod = "parseVectorList"; 182 let RenderMethod = "addVecListOperands"; 183} 184def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 185 let ParserMatchClass = VecListThreeQAsmOperand; 186} 187// Register list of three D registers spaced by 2 (three Q registers). 188def VecListFourQAsmOperand : AsmOperandClass { 189 let Name = "VecListFourQ"; 190 let ParserMethod = "parseVectorList"; 191 let RenderMethod = "addVecListOperands"; 192} 193def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 194 let ParserMatchClass = VecListFourQAsmOperand; 195} 196 197// Register list of one D register, with "all lanes" subscripting. 198def VecListOneDAllLanesAsmOperand : AsmOperandClass { 199 let Name = "VecListOneDAllLanes"; 200 let ParserMethod = "parseVectorList"; 201 let RenderMethod = "addVecListOperands"; 202} 203def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 204 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 205} 206// Register list of two D registers, with "all lanes" subscripting. 207def VecListDPairAllLanesAsmOperand : AsmOperandClass { 208 let Name = "VecListDPairAllLanes"; 209 let ParserMethod = "parseVectorList"; 210 let RenderMethod = "addVecListOperands"; 211} 212def VecListDPairAllLanes : RegisterOperand<DPair, 213 "printVectorListTwoAllLanes"> { 214 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 215} 216// Register list of two D registers spaced by 2 (two sequential Q registers). 217def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 218 let Name = "VecListDPairSpacedAllLanes"; 219 let ParserMethod = "parseVectorList"; 220 let RenderMethod = "addVecListOperands"; 221} 222def VecListDPairSpacedAllLanes : RegisterOperand<DPair, 223 "printVectorListTwoSpacedAllLanes"> { 224 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 225} 226// Register list of three D registers, with "all lanes" subscripting. 227def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 228 let Name = "VecListThreeDAllLanes"; 229 let ParserMethod = "parseVectorList"; 230 let RenderMethod = "addVecListOperands"; 231} 232def VecListThreeDAllLanes : RegisterOperand<DPR, 233 "printVectorListThreeAllLanes"> { 234 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 235} 236// Register list of three D registers spaced by 2 (three sequential Q regs). 237def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 238 let Name = "VecListThreeQAllLanes"; 239 let ParserMethod = "parseVectorList"; 240 let RenderMethod = "addVecListOperands"; 241} 242def VecListThreeQAllLanes : RegisterOperand<DPR, 243 "printVectorListThreeSpacedAllLanes"> { 244 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 245} 246// Register list of four D registers, with "all lanes" subscripting. 247def VecListFourDAllLanesAsmOperand : AsmOperandClass { 248 let Name = "VecListFourDAllLanes"; 249 let ParserMethod = "parseVectorList"; 250 let RenderMethod = "addVecListOperands"; 251} 252def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 253 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 254} 255// Register list of four D registers spaced by 2 (four sequential Q regs). 256def VecListFourQAllLanesAsmOperand : AsmOperandClass { 257 let Name = "VecListFourQAllLanes"; 258 let ParserMethod = "parseVectorList"; 259 let RenderMethod = "addVecListOperands"; 260} 261def VecListFourQAllLanes : RegisterOperand<DPR, 262 "printVectorListFourSpacedAllLanes"> { 263 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 264} 265 266 267// Register list of one D register, with byte lane subscripting. 268def VecListOneDByteIndexAsmOperand : AsmOperandClass { 269 let Name = "VecListOneDByteIndexed"; 270 let ParserMethod = "parseVectorList"; 271 let RenderMethod = "addVecListIndexedOperands"; 272} 273def VecListOneDByteIndexed : Operand<i32> { 274 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 276} 277// ...with half-word lane subscripting. 278def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 279 let Name = "VecListOneDHWordIndexed"; 280 let ParserMethod = "parseVectorList"; 281 let RenderMethod = "addVecListIndexedOperands"; 282} 283def VecListOneDHWordIndexed : Operand<i32> { 284 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 286} 287// ...with word lane subscripting. 288def VecListOneDWordIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListOneDWordIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListOneDWordIndexed : Operand<i32> { 294 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297 298// Register list of two D registers with byte lane subscripting. 299def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 300 let Name = "VecListTwoDByteIndexed"; 301 let ParserMethod = "parseVectorList"; 302 let RenderMethod = "addVecListIndexedOperands"; 303} 304def VecListTwoDByteIndexed : Operand<i32> { 305 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 306 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 307} 308// ...with half-word lane subscripting. 309def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 310 let Name = "VecListTwoDHWordIndexed"; 311 let ParserMethod = "parseVectorList"; 312 let RenderMethod = "addVecListIndexedOperands"; 313} 314def VecListTwoDHWordIndexed : Operand<i32> { 315 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 316 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 317} 318// ...with word lane subscripting. 319def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 320 let Name = "VecListTwoDWordIndexed"; 321 let ParserMethod = "parseVectorList"; 322 let RenderMethod = "addVecListIndexedOperands"; 323} 324def VecListTwoDWordIndexed : Operand<i32> { 325 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 326 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 327} 328// Register list of two Q registers with half-word lane subscripting. 329def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 330 let Name = "VecListTwoQHWordIndexed"; 331 let ParserMethod = "parseVectorList"; 332 let RenderMethod = "addVecListIndexedOperands"; 333} 334def VecListTwoQHWordIndexed : Operand<i32> { 335 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 336 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 337} 338// ...with word lane subscripting. 339def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 340 let Name = "VecListTwoQWordIndexed"; 341 let ParserMethod = "parseVectorList"; 342 let RenderMethod = "addVecListIndexedOperands"; 343} 344def VecListTwoQWordIndexed : Operand<i32> { 345 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 346 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 347} 348 349 350// Register list of three D registers with byte lane subscripting. 351def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 352 let Name = "VecListThreeDByteIndexed"; 353 let ParserMethod = "parseVectorList"; 354 let RenderMethod = "addVecListIndexedOperands"; 355} 356def VecListThreeDByteIndexed : Operand<i32> { 357 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 359} 360// ...with half-word lane subscripting. 361def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 362 let Name = "VecListThreeDHWordIndexed"; 363 let ParserMethod = "parseVectorList"; 364 let RenderMethod = "addVecListIndexedOperands"; 365} 366def VecListThreeDHWordIndexed : Operand<i32> { 367 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 369} 370// ...with word lane subscripting. 371def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 372 let Name = "VecListThreeDWordIndexed"; 373 let ParserMethod = "parseVectorList"; 374 let RenderMethod = "addVecListIndexedOperands"; 375} 376def VecListThreeDWordIndexed : Operand<i32> { 377 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 379} 380// Register list of three Q registers with half-word lane subscripting. 381def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 382 let Name = "VecListThreeQHWordIndexed"; 383 let ParserMethod = "parseVectorList"; 384 let RenderMethod = "addVecListIndexedOperands"; 385} 386def VecListThreeQHWordIndexed : Operand<i32> { 387 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 389} 390// ...with word lane subscripting. 391def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListThreeQWordIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListThreeQWordIndexed : Operand<i32> { 397 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400 401// Register list of four D registers with byte lane subscripting. 402def VecListFourDByteIndexAsmOperand : AsmOperandClass { 403 let Name = "VecListFourDByteIndexed"; 404 let ParserMethod = "parseVectorList"; 405 let RenderMethod = "addVecListIndexedOperands"; 406} 407def VecListFourDByteIndexed : Operand<i32> { 408 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 409 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 410} 411// ...with half-word lane subscripting. 412def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 413 let Name = "VecListFourDHWordIndexed"; 414 let ParserMethod = "parseVectorList"; 415 let RenderMethod = "addVecListIndexedOperands"; 416} 417def VecListFourDHWordIndexed : Operand<i32> { 418 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 419 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 420} 421// ...with word lane subscripting. 422def VecListFourDWordIndexAsmOperand : AsmOperandClass { 423 let Name = "VecListFourDWordIndexed"; 424 let ParserMethod = "parseVectorList"; 425 let RenderMethod = "addVecListIndexedOperands"; 426} 427def VecListFourDWordIndexed : Operand<i32> { 428 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 429 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 430} 431// Register list of four Q registers with half-word lane subscripting. 432def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 433 let Name = "VecListFourQHWordIndexed"; 434 let ParserMethod = "parseVectorList"; 435 let RenderMethod = "addVecListIndexedOperands"; 436} 437def VecListFourQHWordIndexed : Operand<i32> { 438 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 439 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 440} 441// ...with word lane subscripting. 442def VecListFourQWordIndexAsmOperand : AsmOperandClass { 443 let Name = "VecListFourQWordIndexed"; 444 let ParserMethod = "parseVectorList"; 445 let RenderMethod = "addVecListIndexedOperands"; 446} 447def VecListFourQWordIndexed : Operand<i32> { 448 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 449 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 450} 451 452def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 453 return cast<LoadSDNode>(N)->getAlignment() >= 8; 454}]>; 455def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 456 (store node:$val, node:$ptr), [{ 457 return cast<StoreSDNode>(N)->getAlignment() >= 8; 458}]>; 459def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 460 return cast<LoadSDNode>(N)->getAlignment() == 4; 461}]>; 462def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 463 (store node:$val, node:$ptr), [{ 464 return cast<StoreSDNode>(N)->getAlignment() == 4; 465}]>; 466def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 467 return cast<LoadSDNode>(N)->getAlignment() == 2; 468}]>; 469def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 470 (store node:$val, node:$ptr), [{ 471 return cast<StoreSDNode>(N)->getAlignment() == 2; 472}]>; 473def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 474 return cast<LoadSDNode>(N)->getAlignment() == 1; 475}]>; 476def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 477 (store node:$val, node:$ptr), [{ 478 return cast<StoreSDNode>(N)->getAlignment() == 1; 479}]>; 480def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 481 return cast<LoadSDNode>(N)->getAlignment() < 4; 482}]>; 483def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 484 (store node:$val, node:$ptr), [{ 485 return cast<StoreSDNode>(N)->getAlignment() < 4; 486}]>; 487 488//===----------------------------------------------------------------------===// 489// NEON-specific DAG Nodes. 490//===----------------------------------------------------------------------===// 491 492def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 493def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 494 495def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 496def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 497def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 498def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 499def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 500def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 501def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 502def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 503def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 504def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 505def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 506 507// Types for vector shift by immediates. The "SHX" version is for long and 508// narrow operations where the source and destination vectors have different 509// types. The "SHINS" version is for shift and insert operations. 510def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 511 SDTCisVT<2, i32>]>; 512def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 513 SDTCisVT<2, i32>]>; 514def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 515 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 516 517def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 518def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 519def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 520def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 521 522def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 523def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 524def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 525 526def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 527def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 528def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 529def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 530def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 531def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 532 533def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 534def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 535def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 536 537def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 538def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 539 540def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 541 SDTCisVT<2, i32>]>; 542def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 543def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 544 545def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 546def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 547def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 548def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 549 550def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 551 SDTCisVT<2, i32>]>; 552def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 553def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 554 555def NEONvbsl : SDNode<"ARMISD::VBSL", 556 SDTypeProfile<1, 3, [SDTCisVec<0>, 557 SDTCisSameAs<0, 1>, 558 SDTCisSameAs<0, 2>, 559 SDTCisSameAs<0, 3>]>>; 560 561def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 562 563// VDUPLANE can produce a quad-register result from a double-register source, 564// so the result is not constrained to match the source. 565def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 566 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 567 SDTCisVT<2, i32>]>>; 568 569def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 570 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 571def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 572 573def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 574def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 575def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 576def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 577 578def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 579 SDTCisSameAs<0, 2>, 580 SDTCisSameAs<0, 3>]>; 581def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 582def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 583def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 584 585def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 586 SDTCisSameAs<1, 2>]>; 587def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 588def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 589 590def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 591 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 592 unsigned EltBits = 0; 593 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 594 return (EltBits == 32 && EltVal == 0); 595}]>; 596 597def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 598 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 599 unsigned EltBits = 0; 600 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 601 return (EltBits == 8 && EltVal == 0xff); 602}]>; 603 604//===----------------------------------------------------------------------===// 605// NEON load / store instructions 606//===----------------------------------------------------------------------===// 607 608// Use VLDM to load a Q register as a D register pair. 609// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 610def VLDMQIA 611 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 612 IIC_fpLoad_m, "", 613 [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; 614 615// Use VSTM to store a Q register as a D register pair. 616// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 617def VSTMQIA 618 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 619 IIC_fpStore_m, "", 620 [(store (v2f64 DPair:$src), GPR:$Rn)]>; 621 622// Classes for VLD* pseudo-instructions with multi-register operands. 623// These are expanded to real instructions after register allocation. 624class VLDQPseudo<InstrItinClass itin> 625 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 626class VLDQWBPseudo<InstrItinClass itin> 627 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 628 (ins addrmode6:$addr, am6offset:$offset), itin, 629 "$addr.addr = $wb">; 630class VLDQWBfixedPseudo<InstrItinClass itin> 631 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 632 (ins addrmode6:$addr), itin, 633 "$addr.addr = $wb">; 634class VLDQWBregisterPseudo<InstrItinClass itin> 635 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 636 (ins addrmode6:$addr, rGPR:$offset), itin, 637 "$addr.addr = $wb">; 638 639class VLDQQPseudo<InstrItinClass itin> 640 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 641class VLDQQWBPseudo<InstrItinClass itin> 642 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 643 (ins addrmode6:$addr, am6offset:$offset), itin, 644 "$addr.addr = $wb">; 645class VLDQQWBfixedPseudo<InstrItinClass itin> 646 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 647 (ins addrmode6:$addr), itin, 648 "$addr.addr = $wb">; 649class VLDQQWBregisterPseudo<InstrItinClass itin> 650 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 651 (ins addrmode6:$addr, rGPR:$offset), itin, 652 "$addr.addr = $wb">; 653 654 655class VLDQQQQPseudo<InstrItinClass itin> 656 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 657 "$src = $dst">; 658class VLDQQQQWBPseudo<InstrItinClass itin> 659 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 660 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 661 "$addr.addr = $wb, $src = $dst">; 662 663let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 664 665// VLD1 : Vector Load (multiple single elements) 666class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 667 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 668 (ins AddrMode:$Rn), IIC_VLD1, 669 "vld1", Dt, "$Vd, $Rn", "", []> { 670 let Rm = 0b1111; 671 let Inst{4} = Rn{4}; 672 let DecoderMethod = "DecodeVLDST1Instruction"; 673} 674class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 675 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 676 (ins AddrMode:$Rn), IIC_VLD1x2, 677 "vld1", Dt, "$Vd, $Rn", "", []> { 678 let Rm = 0b1111; 679 let Inst{5-4} = Rn{5-4}; 680 let DecoderMethod = "DecodeVLDST1Instruction"; 681} 682 683def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 684def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 685def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 686def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 687 688def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 689def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 690def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 691def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 692 693// ...with address register writeback: 694multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 695 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 696 (ins AddrMode:$Rn), IIC_VLD1u, 697 "vld1", Dt, "$Vd, $Rn!", 698 "$Rn.addr = $wb", []> { 699 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 700 let Inst{4} = Rn{4}; 701 let DecoderMethod = "DecodeVLDST1Instruction"; 702 } 703 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 704 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 705 "vld1", Dt, "$Vd, $Rn, $Rm", 706 "$Rn.addr = $wb", []> { 707 let Inst{4} = Rn{4}; 708 let DecoderMethod = "DecodeVLDST1Instruction"; 709 } 710} 711multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 712 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 713 (ins AddrMode:$Rn), IIC_VLD1x2u, 714 "vld1", Dt, "$Vd, $Rn!", 715 "$Rn.addr = $wb", []> { 716 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 717 let Inst{5-4} = Rn{5-4}; 718 let DecoderMethod = "DecodeVLDST1Instruction"; 719 } 720 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 721 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 722 "vld1", Dt, "$Vd, $Rn, $Rm", 723 "$Rn.addr = $wb", []> { 724 let Inst{5-4} = Rn{5-4}; 725 let DecoderMethod = "DecodeVLDST1Instruction"; 726 } 727} 728 729defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 730defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 731defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 732defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 733defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 734defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 735defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 736defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 737 738// ...with 3 registers 739class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 740 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 741 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 742 "$Vd, $Rn", "", []> { 743 let Rm = 0b1111; 744 let Inst{4} = Rn{4}; 745 let DecoderMethod = "DecodeVLDST1Instruction"; 746} 747multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 748 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 749 (ins AddrMode:$Rn), IIC_VLD1x2u, 750 "vld1", Dt, "$Vd, $Rn!", 751 "$Rn.addr = $wb", []> { 752 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 753 let Inst{4} = Rn{4}; 754 let DecoderMethod = "DecodeVLDST1Instruction"; 755 } 756 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 757 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 758 "vld1", Dt, "$Vd, $Rn, $Rm", 759 "$Rn.addr = $wb", []> { 760 let Inst{4} = Rn{4}; 761 let DecoderMethod = "DecodeVLDST1Instruction"; 762 } 763} 764 765def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 766def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 767def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 768def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 769 770defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 771defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 772defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 773defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 774 775def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 776def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>; 777def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>; 778 779// ...with 4 registers 780class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 781 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 782 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 783 "$Vd, $Rn", "", []> { 784 let Rm = 0b1111; 785 let Inst{5-4} = Rn{5-4}; 786 let DecoderMethod = "DecodeVLDST1Instruction"; 787} 788multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 789 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 790 (ins AddrMode:$Rn), IIC_VLD1x2u, 791 "vld1", Dt, "$Vd, $Rn!", 792 "$Rn.addr = $wb", []> { 793 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 794 let Inst{5-4} = Rn{5-4}; 795 let DecoderMethod = "DecodeVLDST1Instruction"; 796 } 797 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 798 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 799 "vld1", Dt, "$Vd, $Rn, $Rm", 800 "$Rn.addr = $wb", []> { 801 let Inst{5-4} = Rn{5-4}; 802 let DecoderMethod = "DecodeVLDST1Instruction"; 803 } 804} 805 806def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 807def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 808def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 809def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 810 811defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 812defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 813defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 814defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 815 816def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 817def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>; 818def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>; 819 820// VLD2 : Vector Load (multiple 2-element structures) 821class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 822 InstrItinClass itin, Operand AddrMode> 823 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 824 (ins AddrMode:$Rn), itin, 825 "vld2", Dt, "$Vd, $Rn", "", []> { 826 let Rm = 0b1111; 827 let Inst{5-4} = Rn{5-4}; 828 let DecoderMethod = "DecodeVLDST2Instruction"; 829} 830 831def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 832 addrmode6align64or128>; 833def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 834 addrmode6align64or128>; 835def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 836 addrmode6align64or128>; 837 838def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 839 addrmode6align64or128or256>; 840def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 841 addrmode6align64or128or256>; 842def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 843 addrmode6align64or128or256>; 844 845def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 846def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 847def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 848 849// ...with address register writeback: 850multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 851 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 852 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 853 (ins AddrMode:$Rn), itin, 854 "vld2", Dt, "$Vd, $Rn!", 855 "$Rn.addr = $wb", []> { 856 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 857 let Inst{5-4} = Rn{5-4}; 858 let DecoderMethod = "DecodeVLDST2Instruction"; 859 } 860 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 861 (ins AddrMode:$Rn, rGPR:$Rm), itin, 862 "vld2", Dt, "$Vd, $Rn, $Rm", 863 "$Rn.addr = $wb", []> { 864 let Inst{5-4} = Rn{5-4}; 865 let DecoderMethod = "DecodeVLDST2Instruction"; 866 } 867} 868 869defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 870 addrmode6align64or128>; 871defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 872 addrmode6align64or128>; 873defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 874 addrmode6align64or128>; 875 876defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 877 addrmode6align64or128or256>; 878defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 879 addrmode6align64or128or256>; 880defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 881 addrmode6align64or128or256>; 882 883def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 884def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 885def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 886def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 887def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 888def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 889 890// ...with double-spaced registers 891def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 892 addrmode6align64or128>; 893def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 894 addrmode6align64or128>; 895def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 896 addrmode6align64or128>; 897defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 898 addrmode6align64or128>; 899defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 900 addrmode6align64or128>; 901defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 902 addrmode6align64or128>; 903 904// VLD3 : Vector Load (multiple 3-element structures) 905class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 906 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 907 (ins addrmode6:$Rn), IIC_VLD3, 908 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 909 let Rm = 0b1111; 910 let Inst{4} = Rn{4}; 911 let DecoderMethod = "DecodeVLDST3Instruction"; 912} 913 914def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 915def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 916def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 917 918def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 919def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 920def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 921 922// ...with address register writeback: 923class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 924 : NLdSt<0, 0b10, op11_8, op7_4, 925 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 926 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 927 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 928 "$Rn.addr = $wb", []> { 929 let Inst{4} = Rn{4}; 930 let DecoderMethod = "DecodeVLDST3Instruction"; 931} 932 933def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 934def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 935def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 936 937def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 938def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 939def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 940 941// ...with double-spaced registers: 942def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 943def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 944def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 945def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 946def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 947def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 948 949def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 950def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 951def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 952 953// ...alternate versions to be allocated odd register numbers: 954def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 955def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 956def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 957 958def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 959def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 960def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 961 962// VLD4 : Vector Load (multiple 4-element structures) 963class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 964 : NLdSt<0, 0b10, op11_8, op7_4, 965 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 966 (ins addrmode6:$Rn), IIC_VLD4, 967 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 968 let Rm = 0b1111; 969 let Inst{5-4} = Rn{5-4}; 970 let DecoderMethod = "DecodeVLDST4Instruction"; 971} 972 973def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 974def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 975def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 976 977def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 978def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 979def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 980 981// ...with address register writeback: 982class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 983 : NLdSt<0, 0b10, op11_8, op7_4, 984 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 985 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 986 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 987 "$Rn.addr = $wb", []> { 988 let Inst{5-4} = Rn{5-4}; 989 let DecoderMethod = "DecodeVLDST4Instruction"; 990} 991 992def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 993def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 994def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 995 996def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 997def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 998def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 999 1000// ...with double-spaced registers: 1001def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 1002def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 1003def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 1004def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 1005def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 1006def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 1007 1008def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1009def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1010def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1011 1012// ...alternate versions to be allocated odd register numbers: 1013def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1014def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1015def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1016 1017def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1018def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1019def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1020 1021} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1022 1023// Classes for VLD*LN pseudo-instructions with multi-register operands. 1024// These are expanded to real instructions after register allocation. 1025class VLDQLNPseudo<InstrItinClass itin> 1026 : PseudoNLdSt<(outs QPR:$dst), 1027 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1028 itin, "$src = $dst">; 1029class VLDQLNWBPseudo<InstrItinClass itin> 1030 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 1031 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1032 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1033class VLDQQLNPseudo<InstrItinClass itin> 1034 : PseudoNLdSt<(outs QQPR:$dst), 1035 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1036 itin, "$src = $dst">; 1037class VLDQQLNWBPseudo<InstrItinClass itin> 1038 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 1039 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1040 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1041class VLDQQQQLNPseudo<InstrItinClass itin> 1042 : PseudoNLdSt<(outs QQQQPR:$dst), 1043 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1044 itin, "$src = $dst">; 1045class VLDQQQQLNWBPseudo<InstrItinClass itin> 1046 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1047 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1048 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1049 1050// VLD1LN : Vector Load (single element to one lane) 1051class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1052 PatFrag LoadOp> 1053 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1054 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1055 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1056 "$src = $Vd", 1057 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1058 (i32 (LoadOp addrmode6:$Rn)), 1059 imm:$lane))]> { 1060 let Rm = 0b1111; 1061 let DecoderMethod = "DecodeVLD1LN"; 1062} 1063class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1064 PatFrag LoadOp> 1065 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1066 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1067 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1068 "$src = $Vd", 1069 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1070 (i32 (LoadOp addrmode6oneL32:$Rn)), 1071 imm:$lane))]> { 1072 let Rm = 0b1111; 1073 let DecoderMethod = "DecodeVLD1LN"; 1074} 1075class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 1076 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1077 (i32 (LoadOp addrmode6:$addr)), 1078 imm:$lane))]; 1079} 1080 1081def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1082 let Inst{7-5} = lane{2-0}; 1083} 1084def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1085 let Inst{7-6} = lane{1-0}; 1086 let Inst{5-4} = Rn{5-4}; 1087} 1088def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1089 let Inst{7} = lane{0}; 1090 let Inst{5-4} = Rn{5-4}; 1091} 1092 1093def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1094def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1095def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1096 1097def : Pat<(vector_insert (v2f32 DPR:$src), 1098 (f32 (load addrmode6:$addr)), imm:$lane), 1099 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1100def : Pat<(vector_insert (v4f32 QPR:$src), 1101 (f32 (load addrmode6:$addr)), imm:$lane), 1102 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1103 1104let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1105 1106// ...with address register writeback: 1107class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1108 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1109 (ins addrmode6:$Rn, am6offset:$Rm, 1110 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1111 "\\{$Vd[$lane]\\}, $Rn$Rm", 1112 "$src = $Vd, $Rn.addr = $wb", []> { 1113 let DecoderMethod = "DecodeVLD1LN"; 1114} 1115 1116def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1117 let Inst{7-5} = lane{2-0}; 1118} 1119def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1120 let Inst{7-6} = lane{1-0}; 1121 let Inst{4} = Rn{4}; 1122} 1123def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1124 let Inst{7} = lane{0}; 1125 let Inst{5} = Rn{4}; 1126 let Inst{4} = Rn{4}; 1127} 1128 1129def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1130def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1131def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1132 1133// VLD2LN : Vector Load (single 2-element structure to one lane) 1134class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1135 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1136 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1137 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1138 "$src1 = $Vd, $src2 = $dst2", []> { 1139 let Rm = 0b1111; 1140 let Inst{4} = Rn{4}; 1141 let DecoderMethod = "DecodeVLD2LN"; 1142} 1143 1144def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1145 let Inst{7-5} = lane{2-0}; 1146} 1147def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1148 let Inst{7-6} = lane{1-0}; 1149} 1150def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1151 let Inst{7} = lane{0}; 1152} 1153 1154def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1155def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1156def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1157 1158// ...with double-spaced registers: 1159def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1160 let Inst{7-6} = lane{1-0}; 1161} 1162def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1163 let Inst{7} = lane{0}; 1164} 1165 1166def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1167def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1168 1169// ...with address register writeback: 1170class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1171 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1172 (ins addrmode6:$Rn, am6offset:$Rm, 1173 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1174 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1175 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1176 let Inst{4} = Rn{4}; 1177 let DecoderMethod = "DecodeVLD2LN"; 1178} 1179 1180def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1181 let Inst{7-5} = lane{2-0}; 1182} 1183def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1184 let Inst{7-6} = lane{1-0}; 1185} 1186def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1187 let Inst{7} = lane{0}; 1188} 1189 1190def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1191def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1192def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1193 1194def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1195 let Inst{7-6} = lane{1-0}; 1196} 1197def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1198 let Inst{7} = lane{0}; 1199} 1200 1201def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1202def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1203 1204// VLD3LN : Vector Load (single 3-element structure to one lane) 1205class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1206 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1207 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1208 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1209 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1210 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 1211 let Rm = 0b1111; 1212 let DecoderMethod = "DecodeVLD3LN"; 1213} 1214 1215def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1216 let Inst{7-5} = lane{2-0}; 1217} 1218def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1219 let Inst{7-6} = lane{1-0}; 1220} 1221def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1222 let Inst{7} = lane{0}; 1223} 1224 1225def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1226def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1227def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1228 1229// ...with double-spaced registers: 1230def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1231 let Inst{7-6} = lane{1-0}; 1232} 1233def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1234 let Inst{7} = lane{0}; 1235} 1236 1237def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1238def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1239 1240// ...with address register writeback: 1241class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1242 : NLdStLn<1, 0b10, op11_8, op7_4, 1243 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1244 (ins addrmode6:$Rn, am6offset:$Rm, 1245 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1246 IIC_VLD3lnu, "vld3", Dt, 1247 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1248 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1249 []> { 1250 let DecoderMethod = "DecodeVLD3LN"; 1251} 1252 1253def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1254 let Inst{7-5} = lane{2-0}; 1255} 1256def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1257 let Inst{7-6} = lane{1-0}; 1258} 1259def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1260 let Inst{7} = lane{0}; 1261} 1262 1263def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1264def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1265def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1266 1267def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1268 let Inst{7-6} = lane{1-0}; 1269} 1270def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1271 let Inst{7} = lane{0}; 1272} 1273 1274def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1275def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1276 1277// VLD4LN : Vector Load (single 4-element structure to one lane) 1278class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1279 : NLdStLn<1, 0b10, op11_8, op7_4, 1280 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1281 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1282 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1283 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1284 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 1285 let Rm = 0b1111; 1286 let Inst{4} = Rn{4}; 1287 let DecoderMethod = "DecodeVLD4LN"; 1288} 1289 1290def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1291 let Inst{7-5} = lane{2-0}; 1292} 1293def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1294 let Inst{7-6} = lane{1-0}; 1295} 1296def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1297 let Inst{7} = lane{0}; 1298 let Inst{5} = Rn{5}; 1299} 1300 1301def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1302def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1303def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1304 1305// ...with double-spaced registers: 1306def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1307 let Inst{7-6} = lane{1-0}; 1308} 1309def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1310 let Inst{7} = lane{0}; 1311 let Inst{5} = Rn{5}; 1312} 1313 1314def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1315def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1316 1317// ...with address register writeback: 1318class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1319 : NLdStLn<1, 0b10, op11_8, op7_4, 1320 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1321 (ins addrmode6:$Rn, am6offset:$Rm, 1322 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1323 IIC_VLD4lnu, "vld4", Dt, 1324"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1325"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1326 []> { 1327 let Inst{4} = Rn{4}; 1328 let DecoderMethod = "DecodeVLD4LN" ; 1329} 1330 1331def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1332 let Inst{7-5} = lane{2-0}; 1333} 1334def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1335 let Inst{7-6} = lane{1-0}; 1336} 1337def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1338 let Inst{7} = lane{0}; 1339 let Inst{5} = Rn{5}; 1340} 1341 1342def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1343def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1344def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1345 1346def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1347 let Inst{7-6} = lane{1-0}; 1348} 1349def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1350 let Inst{7} = lane{0}; 1351 let Inst{5} = Rn{5}; 1352} 1353 1354def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1355def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1356 1357} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1358 1359// VLD1DUP : Vector Load (single element to all lanes) 1360class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1361 Operand AddrMode> 1362 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1363 (ins AddrMode:$Rn), 1364 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1365 [(set VecListOneDAllLanes:$Vd, 1366 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1367 let Rm = 0b1111; 1368 let Inst{4} = Rn{4}; 1369 let DecoderMethod = "DecodeVLD1DupInstruction"; 1370} 1371def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1372 addrmode6dupalignNone>; 1373def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1374 addrmode6dupalign16>; 1375def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1376 addrmode6dupalign32>; 1377 1378def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1379 (VLD1DUPd32 addrmode6:$addr)>; 1380 1381class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1382 Operand AddrMode> 1383 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1384 (ins AddrMode:$Rn), IIC_VLD1dup, 1385 "vld1", Dt, "$Vd, $Rn", "", 1386 [(set VecListDPairAllLanes:$Vd, 1387 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1388 let Rm = 0b1111; 1389 let Inst{4} = Rn{4}; 1390 let DecoderMethod = "DecodeVLD1DupInstruction"; 1391} 1392 1393def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1394 addrmode6dupalignNone>; 1395def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1396 addrmode6dupalign16>; 1397def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1398 addrmode6dupalign32>; 1399 1400def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1401 (VLD1DUPq32 addrmode6:$addr)>; 1402 1403let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1404// ...with address register writeback: 1405multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1406 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1407 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1408 (ins AddrMode:$Rn), IIC_VLD1dupu, 1409 "vld1", Dt, "$Vd, $Rn!", 1410 "$Rn.addr = $wb", []> { 1411 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1412 let Inst{4} = Rn{4}; 1413 let DecoderMethod = "DecodeVLD1DupInstruction"; 1414 } 1415 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1416 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1417 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1418 "vld1", Dt, "$Vd, $Rn, $Rm", 1419 "$Rn.addr = $wb", []> { 1420 let Inst{4} = Rn{4}; 1421 let DecoderMethod = "DecodeVLD1DupInstruction"; 1422 } 1423} 1424multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1425 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1426 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1427 (ins AddrMode:$Rn), IIC_VLD1dupu, 1428 "vld1", Dt, "$Vd, $Rn!", 1429 "$Rn.addr = $wb", []> { 1430 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1431 let Inst{4} = Rn{4}; 1432 let DecoderMethod = "DecodeVLD1DupInstruction"; 1433 } 1434 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1435 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1436 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1437 "vld1", Dt, "$Vd, $Rn, $Rm", 1438 "$Rn.addr = $wb", []> { 1439 let Inst{4} = Rn{4}; 1440 let DecoderMethod = "DecodeVLD1DupInstruction"; 1441 } 1442} 1443 1444defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1445defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1446defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1447 1448defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1449defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1450defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1451 1452// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1453class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1454 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1455 (ins AddrMode:$Rn), IIC_VLD2dup, 1456 "vld2", Dt, "$Vd, $Rn", "", []> { 1457 let Rm = 0b1111; 1458 let Inst{4} = Rn{4}; 1459 let DecoderMethod = "DecodeVLD2DupInstruction"; 1460} 1461 1462def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1463 addrmode6dupalign16>; 1464def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1465 addrmode6dupalign32>; 1466def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1467 addrmode6dupalign64>; 1468 1469// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1470// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1471// ...with double-spaced registers 1472def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1473 addrmode6dupalign16>; 1474def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1475 addrmode6dupalign32>; 1476def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1477 addrmode6dupalign64>; 1478 1479// ...with address register writeback: 1480multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1481 Operand AddrMode> { 1482 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1483 (outs VdTy:$Vd, GPR:$wb), 1484 (ins AddrMode:$Rn), IIC_VLD2dupu, 1485 "vld2", Dt, "$Vd, $Rn!", 1486 "$Rn.addr = $wb", []> { 1487 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1488 let Inst{4} = Rn{4}; 1489 let DecoderMethod = "DecodeVLD2DupInstruction"; 1490 } 1491 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1492 (outs VdTy:$Vd, GPR:$wb), 1493 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1494 "vld2", Dt, "$Vd, $Rn, $Rm", 1495 "$Rn.addr = $wb", []> { 1496 let Inst{4} = Rn{4}; 1497 let DecoderMethod = "DecodeVLD2DupInstruction"; 1498 } 1499} 1500 1501defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1502 addrmode6dupalign16>; 1503defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1504 addrmode6dupalign32>; 1505defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1506 addrmode6dupalign64>; 1507 1508defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1509 addrmode6dupalign16>; 1510defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1511 addrmode6dupalign32>; 1512defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1513 addrmode6dupalign64>; 1514 1515// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1516class VLD3DUP<bits<4> op7_4, string Dt> 1517 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1518 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1519 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1520 let Rm = 0b1111; 1521 let Inst{4} = 0; 1522 let DecoderMethod = "DecodeVLD3DupInstruction"; 1523} 1524 1525def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1526def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1527def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1528 1529def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1530def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1531def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1532 1533// ...with double-spaced registers (not used for codegen): 1534def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1535def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1536def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1537 1538// ...with address register writeback: 1539class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1540 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1541 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1542 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1543 "$Rn.addr = $wb", []> { 1544 let Inst{4} = 0; 1545 let DecoderMethod = "DecodeVLD3DupInstruction"; 1546} 1547 1548def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1549def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1550def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1551 1552def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1553def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1554def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1555 1556def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1557def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1558def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1559 1560// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1561class VLD4DUP<bits<4> op7_4, string Dt> 1562 : NLdSt<1, 0b10, 0b1111, op7_4, 1563 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1564 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1565 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1566 let Rm = 0b1111; 1567 let Inst{4} = Rn{4}; 1568 let DecoderMethod = "DecodeVLD4DupInstruction"; 1569} 1570 1571def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1572def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1573def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1574 1575def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1576def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1577def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1578 1579// ...with double-spaced registers (not used for codegen): 1580def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1581def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1582def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1583 1584// ...with address register writeback: 1585class VLD4DUPWB<bits<4> op7_4, string Dt> 1586 : NLdSt<1, 0b10, 0b1111, op7_4, 1587 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1588 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1589 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1590 "$Rn.addr = $wb", []> { 1591 let Inst{4} = Rn{4}; 1592 let DecoderMethod = "DecodeVLD4DupInstruction"; 1593} 1594 1595def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1596def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1597def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1598 1599def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1600def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1601def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1602 1603def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1604def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1605def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1606 1607} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1608 1609let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1610 1611// Classes for VST* pseudo-instructions with multi-register operands. 1612// These are expanded to real instructions after register allocation. 1613class VSTQPseudo<InstrItinClass itin> 1614 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1615class VSTQWBPseudo<InstrItinClass itin> 1616 : PseudoNLdSt<(outs GPR:$wb), 1617 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1618 "$addr.addr = $wb">; 1619class VSTQWBfixedPseudo<InstrItinClass itin> 1620 : PseudoNLdSt<(outs GPR:$wb), 1621 (ins addrmode6:$addr, QPR:$src), itin, 1622 "$addr.addr = $wb">; 1623class VSTQWBregisterPseudo<InstrItinClass itin> 1624 : PseudoNLdSt<(outs GPR:$wb), 1625 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1626 "$addr.addr = $wb">; 1627class VSTQQPseudo<InstrItinClass itin> 1628 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1629class VSTQQWBPseudo<InstrItinClass itin> 1630 : PseudoNLdSt<(outs GPR:$wb), 1631 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1632 "$addr.addr = $wb">; 1633class VSTQQWBfixedPseudo<InstrItinClass itin> 1634 : PseudoNLdSt<(outs GPR:$wb), 1635 (ins addrmode6:$addr, QQPR:$src), itin, 1636 "$addr.addr = $wb">; 1637class VSTQQWBregisterPseudo<InstrItinClass itin> 1638 : PseudoNLdSt<(outs GPR:$wb), 1639 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1640 "$addr.addr = $wb">; 1641 1642class VSTQQQQPseudo<InstrItinClass itin> 1643 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1644class VSTQQQQWBPseudo<InstrItinClass itin> 1645 : PseudoNLdSt<(outs GPR:$wb), 1646 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1647 "$addr.addr = $wb">; 1648 1649// VST1 : Vector Store (multiple single elements) 1650class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1651 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1652 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1653 let Rm = 0b1111; 1654 let Inst{4} = Rn{4}; 1655 let DecoderMethod = "DecodeVLDST1Instruction"; 1656} 1657class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1658 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1659 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1660 let Rm = 0b1111; 1661 let Inst{5-4} = Rn{5-4}; 1662 let DecoderMethod = "DecodeVLDST1Instruction"; 1663} 1664 1665def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1666def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1667def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1668def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1669 1670def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1671def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1672def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1673def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1674 1675// ...with address register writeback: 1676multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1677 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1678 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1679 "vst1", Dt, "$Vd, $Rn!", 1680 "$Rn.addr = $wb", []> { 1681 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1682 let Inst{4} = Rn{4}; 1683 let DecoderMethod = "DecodeVLDST1Instruction"; 1684 } 1685 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1686 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1687 IIC_VLD1u, 1688 "vst1", Dt, "$Vd, $Rn, $Rm", 1689 "$Rn.addr = $wb", []> { 1690 let Inst{4} = Rn{4}; 1691 let DecoderMethod = "DecodeVLDST1Instruction"; 1692 } 1693} 1694multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1695 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1696 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1697 "vst1", Dt, "$Vd, $Rn!", 1698 "$Rn.addr = $wb", []> { 1699 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1700 let Inst{5-4} = Rn{5-4}; 1701 let DecoderMethod = "DecodeVLDST1Instruction"; 1702 } 1703 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1704 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1705 IIC_VLD1x2u, 1706 "vst1", Dt, "$Vd, $Rn, $Rm", 1707 "$Rn.addr = $wb", []> { 1708 let Inst{5-4} = Rn{5-4}; 1709 let DecoderMethod = "DecodeVLDST1Instruction"; 1710 } 1711} 1712 1713defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1714defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1715defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1716defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1717 1718defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1719defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1720defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1721defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1722 1723// ...with 3 registers 1724class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1725 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1726 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1727 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { 1728 let Rm = 0b1111; 1729 let Inst{4} = Rn{4}; 1730 let DecoderMethod = "DecodeVLDST1Instruction"; 1731} 1732multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1733 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1734 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1735 "vst1", Dt, "$Vd, $Rn!", 1736 "$Rn.addr = $wb", []> { 1737 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1738 let Inst{5-4} = Rn{5-4}; 1739 let DecoderMethod = "DecodeVLDST1Instruction"; 1740 } 1741 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1742 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1743 IIC_VLD1x3u, 1744 "vst1", Dt, "$Vd, $Rn, $Rm", 1745 "$Rn.addr = $wb", []> { 1746 let Inst{5-4} = Rn{5-4}; 1747 let DecoderMethod = "DecodeVLDST1Instruction"; 1748 } 1749} 1750 1751def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1752def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1753def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1754def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1755 1756defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1757defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1758defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1759defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1760 1761def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1762def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>; 1763def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; 1764 1765// ...with 4 registers 1766class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1767 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1768 (ins AddrMode:$Rn, VecListFourD:$Vd), 1769 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1770 []> { 1771 let Rm = 0b1111; 1772 let Inst{5-4} = Rn{5-4}; 1773 let DecoderMethod = "DecodeVLDST1Instruction"; 1774} 1775multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1776 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1777 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1778 "vst1", Dt, "$Vd, $Rn!", 1779 "$Rn.addr = $wb", []> { 1780 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1781 let Inst{5-4} = Rn{5-4}; 1782 let DecoderMethod = "DecodeVLDST1Instruction"; 1783 } 1784 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1785 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1786 IIC_VLD1x4u, 1787 "vst1", Dt, "$Vd, $Rn, $Rm", 1788 "$Rn.addr = $wb", []> { 1789 let Inst{5-4} = Rn{5-4}; 1790 let DecoderMethod = "DecodeVLDST1Instruction"; 1791 } 1792} 1793 1794def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1795def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1796def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1797def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1798 1799defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1800defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1801defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1802defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1803 1804def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1805def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>; 1806def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; 1807 1808// VST2 : Vector Store (multiple 2-element structures) 1809class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1810 InstrItinClass itin, Operand AddrMode> 1811 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1812 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1813 let Rm = 0b1111; 1814 let Inst{5-4} = Rn{5-4}; 1815 let DecoderMethod = "DecodeVLDST2Instruction"; 1816} 1817 1818def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1819 addrmode6align64or128>; 1820def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1821 addrmode6align64or128>; 1822def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1823 addrmode6align64or128>; 1824 1825def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1826 addrmode6align64or128or256>; 1827def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1828 addrmode6align64or128or256>; 1829def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1830 addrmode6align64or128or256>; 1831 1832def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1833def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1834def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1835 1836// ...with address register writeback: 1837multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1838 RegisterOperand VdTy, Operand AddrMode> { 1839 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1840 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1841 "vst2", Dt, "$Vd, $Rn!", 1842 "$Rn.addr = $wb", []> { 1843 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1844 let Inst{5-4} = Rn{5-4}; 1845 let DecoderMethod = "DecodeVLDST2Instruction"; 1846 } 1847 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1848 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1849 "vst2", Dt, "$Vd, $Rn, $Rm", 1850 "$Rn.addr = $wb", []> { 1851 let Inst{5-4} = Rn{5-4}; 1852 let DecoderMethod = "DecodeVLDST2Instruction"; 1853 } 1854} 1855multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1856 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1857 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1858 "vst2", Dt, "$Vd, $Rn!", 1859 "$Rn.addr = $wb", []> { 1860 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1861 let Inst{5-4} = Rn{5-4}; 1862 let DecoderMethod = "DecodeVLDST2Instruction"; 1863 } 1864 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1865 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1866 IIC_VLD1u, 1867 "vst2", Dt, "$Vd, $Rn, $Rm", 1868 "$Rn.addr = $wb", []> { 1869 let Inst{5-4} = Rn{5-4}; 1870 let DecoderMethod = "DecodeVLDST2Instruction"; 1871 } 1872} 1873 1874defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1875 addrmode6align64or128>; 1876defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1877 addrmode6align64or128>; 1878defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1879 addrmode6align64or128>; 1880 1881defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1882defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1883defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1884 1885def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1886def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1887def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1888def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1889def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1890def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1891 1892// ...with double-spaced registers 1893def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1894 addrmode6align64or128>; 1895def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1896 addrmode6align64or128>; 1897def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1898 addrmode6align64or128>; 1899defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 1900 addrmode6align64or128>; 1901defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 1902 addrmode6align64or128>; 1903defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 1904 addrmode6align64or128>; 1905 1906// VST3 : Vector Store (multiple 3-element structures) 1907class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1908 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1909 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1910 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1911 let Rm = 0b1111; 1912 let Inst{4} = Rn{4}; 1913 let DecoderMethod = "DecodeVLDST3Instruction"; 1914} 1915 1916def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1917def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1918def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1919 1920def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1921def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1922def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1923 1924// ...with address register writeback: 1925class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1926 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1927 (ins addrmode6:$Rn, am6offset:$Rm, 1928 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1929 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1930 "$Rn.addr = $wb", []> { 1931 let Inst{4} = Rn{4}; 1932 let DecoderMethod = "DecodeVLDST3Instruction"; 1933} 1934 1935def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1936def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1937def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1938 1939def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1940def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1941def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1942 1943// ...with double-spaced registers: 1944def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1945def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1946def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1947def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1948def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1949def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1950 1951def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1952def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1953def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1954 1955// ...alternate versions to be allocated odd register numbers: 1956def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1957def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1958def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1959 1960def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1961def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1962def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1963 1964// VST4 : Vector Store (multiple 4-element structures) 1965class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1966 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1967 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1968 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1969 "", []> { 1970 let Rm = 0b1111; 1971 let Inst{5-4} = Rn{5-4}; 1972 let DecoderMethod = "DecodeVLDST4Instruction"; 1973} 1974 1975def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1976def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1977def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1978 1979def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1980def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1981def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1982 1983// ...with address register writeback: 1984class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1985 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1986 (ins addrmode6:$Rn, am6offset:$Rm, 1987 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1988 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1989 "$Rn.addr = $wb", []> { 1990 let Inst{5-4} = Rn{5-4}; 1991 let DecoderMethod = "DecodeVLDST4Instruction"; 1992} 1993 1994def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1995def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1996def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1997 1998def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1999def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 2000def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 2001 2002// ...with double-spaced registers: 2003def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2004def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2005def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2006def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2007def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2008def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2009 2010def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2011def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2012def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2013 2014// ...alternate versions to be allocated odd register numbers: 2015def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2016def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2017def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2018 2019def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2020def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2021def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2022 2023} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2024 2025// Classes for VST*LN pseudo-instructions with multi-register operands. 2026// These are expanded to real instructions after register allocation. 2027class VSTQLNPseudo<InstrItinClass itin> 2028 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2029 itin, "">; 2030class VSTQLNWBPseudo<InstrItinClass itin> 2031 : PseudoNLdSt<(outs GPR:$wb), 2032 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2033 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2034class VSTQQLNPseudo<InstrItinClass itin> 2035 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2036 itin, "">; 2037class VSTQQLNWBPseudo<InstrItinClass itin> 2038 : PseudoNLdSt<(outs GPR:$wb), 2039 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2040 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2041class VSTQQQQLNPseudo<InstrItinClass itin> 2042 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2043 itin, "">; 2044class VSTQQQQLNWBPseudo<InstrItinClass itin> 2045 : PseudoNLdSt<(outs GPR:$wb), 2046 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2047 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2048 2049// VST1LN : Vector Store (single element from one lane) 2050class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2051 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2052 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2053 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2054 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2055 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { 2056 let Rm = 0b1111; 2057 let DecoderMethod = "DecodeVST1LN"; 2058} 2059class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2060 : VSTQLNPseudo<IIC_VST1ln> { 2061 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2062 addrmode6:$addr)]; 2063} 2064 2065def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2066 NEONvgetlaneu, addrmode6> { 2067 let Inst{7-5} = lane{2-0}; 2068} 2069def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2070 NEONvgetlaneu, addrmode6> { 2071 let Inst{7-6} = lane{1-0}; 2072 let Inst{4} = Rn{4}; 2073} 2074 2075def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2076 addrmode6oneL32> { 2077 let Inst{7} = lane{0}; 2078 let Inst{5-4} = Rn{5-4}; 2079} 2080 2081def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 2082def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 2083def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2084 2085def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2086 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2087def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2088 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2089 2090// ...with address register writeback: 2091class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2092 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2093 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2094 (ins AdrMode:$Rn, am6offset:$Rm, 2095 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2096 "\\{$Vd[$lane]\\}, $Rn$Rm", 2097 "$Rn.addr = $wb", 2098 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2099 AdrMode:$Rn, am6offset:$Rm))]> { 2100 let DecoderMethod = "DecodeVST1LN"; 2101} 2102class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2103 : VSTQLNWBPseudo<IIC_VST1lnu> { 2104 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2105 addrmode6:$addr, am6offset:$offset))]; 2106} 2107 2108def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2109 NEONvgetlaneu, addrmode6> { 2110 let Inst{7-5} = lane{2-0}; 2111} 2112def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2113 NEONvgetlaneu, addrmode6> { 2114 let Inst{7-6} = lane{1-0}; 2115 let Inst{4} = Rn{4}; 2116} 2117def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2118 extractelt, addrmode6oneL32> { 2119 let Inst{7} = lane{0}; 2120 let Inst{5-4} = Rn{5-4}; 2121} 2122 2123def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 2124def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 2125def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2126 2127let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2128 2129// VST2LN : Vector Store (single 2-element structure from one lane) 2130class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2131 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2132 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2133 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2134 "", []> { 2135 let Rm = 0b1111; 2136 let Inst{4} = Rn{4}; 2137 let DecoderMethod = "DecodeVST2LN"; 2138} 2139 2140def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2141 let Inst{7-5} = lane{2-0}; 2142} 2143def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2144 let Inst{7-6} = lane{1-0}; 2145} 2146def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2147 let Inst{7} = lane{0}; 2148} 2149 2150def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2151def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2152def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2153 2154// ...with double-spaced registers: 2155def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2156 let Inst{7-6} = lane{1-0}; 2157 let Inst{4} = Rn{4}; 2158} 2159def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2160 let Inst{7} = lane{0}; 2161 let Inst{4} = Rn{4}; 2162} 2163 2164def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2165def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2166 2167// ...with address register writeback: 2168class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2169 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2170 (ins addrmode6:$Rn, am6offset:$Rm, 2171 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2172 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2173 "$Rn.addr = $wb", []> { 2174 let Inst{4} = Rn{4}; 2175 let DecoderMethod = "DecodeVST2LN"; 2176} 2177 2178def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2179 let Inst{7-5} = lane{2-0}; 2180} 2181def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2182 let Inst{7-6} = lane{1-0}; 2183} 2184def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2185 let Inst{7} = lane{0}; 2186} 2187 2188def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2189def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2190def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2191 2192def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2193 let Inst{7-6} = lane{1-0}; 2194} 2195def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2196 let Inst{7} = lane{0}; 2197} 2198 2199def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2200def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2201 2202// VST3LN : Vector Store (single 3-element structure from one lane) 2203class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2204 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2205 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2206 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2207 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 2208 let Rm = 0b1111; 2209 let DecoderMethod = "DecodeVST3LN"; 2210} 2211 2212def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2213 let Inst{7-5} = lane{2-0}; 2214} 2215def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2216 let Inst{7-6} = lane{1-0}; 2217} 2218def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2219 let Inst{7} = lane{0}; 2220} 2221 2222def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2223def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2224def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2225 2226// ...with double-spaced registers: 2227def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2228 let Inst{7-6} = lane{1-0}; 2229} 2230def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2231 let Inst{7} = lane{0}; 2232} 2233 2234def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2235def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2236 2237// ...with address register writeback: 2238class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2239 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2240 (ins addrmode6:$Rn, am6offset:$Rm, 2241 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2242 IIC_VST3lnu, "vst3", Dt, 2243 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2244 "$Rn.addr = $wb", []> { 2245 let DecoderMethod = "DecodeVST3LN"; 2246} 2247 2248def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2249 let Inst{7-5} = lane{2-0}; 2250} 2251def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2252 let Inst{7-6} = lane{1-0}; 2253} 2254def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2255 let Inst{7} = lane{0}; 2256} 2257 2258def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2259def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2260def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2261 2262def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2263 let Inst{7-6} = lane{1-0}; 2264} 2265def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2266 let Inst{7} = lane{0}; 2267} 2268 2269def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2270def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2271 2272// VST4LN : Vector Store (single 4-element structure from one lane) 2273class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2274 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2275 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2276 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2277 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2278 "", []> { 2279 let Rm = 0b1111; 2280 let Inst{4} = Rn{4}; 2281 let DecoderMethod = "DecodeVST4LN"; 2282} 2283 2284def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2285 let Inst{7-5} = lane{2-0}; 2286} 2287def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2288 let Inst{7-6} = lane{1-0}; 2289} 2290def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2291 let Inst{7} = lane{0}; 2292 let Inst{5} = Rn{5}; 2293} 2294 2295def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2296def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2297def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2298 2299// ...with double-spaced registers: 2300def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2301 let Inst{7-6} = lane{1-0}; 2302} 2303def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2304 let Inst{7} = lane{0}; 2305 let Inst{5} = Rn{5}; 2306} 2307 2308def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2309def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2310 2311// ...with address register writeback: 2312class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2313 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2314 (ins addrmode6:$Rn, am6offset:$Rm, 2315 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2316 IIC_VST4lnu, "vst4", Dt, 2317 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2318 "$Rn.addr = $wb", []> { 2319 let Inst{4} = Rn{4}; 2320 let DecoderMethod = "DecodeVST4LN"; 2321} 2322 2323def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2324 let Inst{7-5} = lane{2-0}; 2325} 2326def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2327 let Inst{7-6} = lane{1-0}; 2328} 2329def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2330 let Inst{7} = lane{0}; 2331 let Inst{5} = Rn{5}; 2332} 2333 2334def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2335def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2336def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2337 2338def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2339 let Inst{7-6} = lane{1-0}; 2340} 2341def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2342 let Inst{7} = lane{0}; 2343 let Inst{5} = Rn{5}; 2344} 2345 2346def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2347def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2348 2349} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2350 2351// Use vld1/vst1 for unaligned f64 load / store 2352def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2353 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; 2354def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2355 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2356def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2357 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; 2358def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2359 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2360def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2361 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; 2362def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2363 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; 2364 2365// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2366// load / store if it's legal. 2367def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2368 (VLD1q64 addrmode6:$addr)>; 2369def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2370 (VST1q64 addrmode6:$addr, QPR:$value)>; 2371def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2372 (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>; 2373def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2374 (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2375def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2376 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; 2377def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2378 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2379def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2380 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; 2381def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2382 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2383 2384//===----------------------------------------------------------------------===// 2385// NEON pattern fragments 2386//===----------------------------------------------------------------------===// 2387 2388// Extract D sub-registers of Q registers. 2389def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2390 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2391 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N), 2392 MVT::i32); 2393}]>; 2394def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2395 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2396 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N), 2397 MVT::i32); 2398}]>; 2399def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2400 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2401 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N), 2402 MVT::i32); 2403}]>; 2404def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2405 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2406 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N), 2407 MVT::i32); 2408}]>; 2409 2410// Extract S sub-registers of Q/D registers. 2411def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2412 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2413 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N), 2414 MVT::i32); 2415}]>; 2416 2417// Translate lane numbers from Q registers to D subregs. 2418def SubReg_i8_lane : SDNodeXForm<imm, [{ 2419 return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32); 2420}]>; 2421def SubReg_i16_lane : SDNodeXForm<imm, [{ 2422 return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32); 2423}]>; 2424def SubReg_i32_lane : SDNodeXForm<imm, [{ 2425 return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32); 2426}]>; 2427 2428//===----------------------------------------------------------------------===// 2429// Instruction Classes 2430//===----------------------------------------------------------------------===// 2431 2432// Basic 2-register operations: double- and quad-register. 2433class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2434 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2435 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2436 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2437 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2438 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2439class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2440 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2441 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2442 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2443 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2444 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2445 2446// Basic 2-register intrinsics, both double- and quad-register. 2447class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2448 bits<2> op17_16, bits<5> op11_7, bit op4, 2449 InstrItinClass itin, string OpcodeStr, string Dt, 2450 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2451 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2452 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2453 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2454class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2455 bits<2> op17_16, bits<5> op11_7, bit op4, 2456 InstrItinClass itin, string OpcodeStr, string Dt, 2457 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2458 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2459 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2460 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2461 2462// Same as above, but not predicated. 2463class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2464 InstrItinClass itin, string OpcodeStr, string Dt, 2465 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2466 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2467 itin, OpcodeStr, Dt, 2468 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2469 2470class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2471 InstrItinClass itin, string OpcodeStr, string Dt, 2472 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2473 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2474 itin, OpcodeStr, Dt, 2475 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2476 2477// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2478class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2479 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2480 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2481 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2482 itin, OpcodeStr, Dt, 2483 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2484 2485// Same as N2VQIntXnp but with Vd as a src register. 2486class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2487 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2488 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2489 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2490 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2491 itin, OpcodeStr, Dt, 2492 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2493 let Constraints = "$src = $Vd"; 2494} 2495 2496// Narrow 2-register operations. 2497class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2498 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2499 InstrItinClass itin, string OpcodeStr, string Dt, 2500 ValueType TyD, ValueType TyQ, SDNode OpNode> 2501 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2502 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2503 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2504 2505// Narrow 2-register intrinsics. 2506class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2507 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2508 InstrItinClass itin, string OpcodeStr, string Dt, 2509 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2510 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2511 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2512 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2513 2514// Long 2-register operations (currently only used for VMOVL). 2515class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2516 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2517 InstrItinClass itin, string OpcodeStr, string Dt, 2518 ValueType TyQ, ValueType TyD, SDNode OpNode> 2519 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2520 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2521 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2522 2523// Long 2-register intrinsics. 2524class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2525 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2526 InstrItinClass itin, string OpcodeStr, string Dt, 2527 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2528 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2529 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2530 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2531 2532// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2533class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2534 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2535 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2536 OpcodeStr, Dt, "$Vd, $Vm", 2537 "$src1 = $Vd, $src2 = $Vm", []>; 2538class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2539 InstrItinClass itin, string OpcodeStr, string Dt> 2540 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2541 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2542 "$src1 = $Vd, $src2 = $Vm", []>; 2543 2544// Basic 3-register operations: double- and quad-register. 2545class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2546 InstrItinClass itin, string OpcodeStr, string Dt, 2547 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2548 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2549 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2550 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2551 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2552 // All of these have a two-operand InstAlias. 2553 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2554 let isCommutable = Commutable; 2555} 2556// Same as N3VD but no data type. 2557class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2558 InstrItinClass itin, string OpcodeStr, 2559 ValueType ResTy, ValueType OpTy, 2560 SDNode OpNode, bit Commutable> 2561 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2562 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2563 OpcodeStr, "$Vd, $Vn, $Vm", "", 2564 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2565 // All of these have a two-operand InstAlias. 2566 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2567 let isCommutable = Commutable; 2568} 2569 2570class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2571 InstrItinClass itin, string OpcodeStr, string Dt, 2572 ValueType Ty, SDNode ShOp> 2573 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2574 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2575 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2576 [(set (Ty DPR:$Vd), 2577 (Ty (ShOp (Ty DPR:$Vn), 2578 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2579 // All of these have a two-operand InstAlias. 2580 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2581 let isCommutable = 0; 2582} 2583class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2584 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2585 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2586 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2587 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2588 [(set (Ty DPR:$Vd), 2589 (Ty (ShOp (Ty DPR:$Vn), 2590 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2591 // All of these have a two-operand InstAlias. 2592 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2593 let isCommutable = 0; 2594} 2595 2596class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2597 InstrItinClass itin, string OpcodeStr, string Dt, 2598 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2599 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2600 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2601 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2602 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2603 // All of these have a two-operand InstAlias. 2604 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2605 let isCommutable = Commutable; 2606} 2607class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2608 InstrItinClass itin, string OpcodeStr, 2609 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2610 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2611 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2612 OpcodeStr, "$Vd, $Vn, $Vm", "", 2613 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2614 // All of these have a two-operand InstAlias. 2615 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2616 let isCommutable = Commutable; 2617} 2618class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2619 InstrItinClass itin, string OpcodeStr, string Dt, 2620 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2621 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2622 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2623 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2624 [(set (ResTy QPR:$Vd), 2625 (ResTy (ShOp (ResTy QPR:$Vn), 2626 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2627 imm:$lane)))))]> { 2628 // All of these have a two-operand InstAlias. 2629 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2630 let isCommutable = 0; 2631} 2632class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2633 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2634 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2635 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2636 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2637 [(set (ResTy QPR:$Vd), 2638 (ResTy (ShOp (ResTy QPR:$Vn), 2639 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2640 imm:$lane)))))]> { 2641 // All of these have a two-operand InstAlias. 2642 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2643 let isCommutable = 0; 2644} 2645 2646// Basic 3-register intrinsics, both double- and quad-register. 2647class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2648 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2649 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2650 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2651 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2652 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2653 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2654 // All of these have a two-operand InstAlias. 2655 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2656 let isCommutable = Commutable; 2657} 2658 2659class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2660 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2661 string Dt, ValueType ResTy, ValueType OpTy, 2662 SDPatternOperator IntOp, bit Commutable> 2663 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2664 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2665 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2666 2667class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2668 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2669 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2670 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2671 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2672 [(set (Ty DPR:$Vd), 2673 (Ty (IntOp (Ty DPR:$Vn), 2674 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2675 imm:$lane)))))]> { 2676 let isCommutable = 0; 2677} 2678 2679class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2680 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2681 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2682 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2683 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2684 [(set (Ty DPR:$Vd), 2685 (Ty (IntOp (Ty DPR:$Vn), 2686 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2687 let isCommutable = 0; 2688} 2689class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2690 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2691 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2692 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2693 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2694 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2695 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2696 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2697 let isCommutable = 0; 2698} 2699 2700class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2701 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2702 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2703 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2704 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2705 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2706 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2707 // All of these have a two-operand InstAlias. 2708 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2709 let isCommutable = Commutable; 2710} 2711 2712class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2713 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2714 string Dt, ValueType ResTy, ValueType OpTy, 2715 SDPatternOperator IntOp, bit Commutable> 2716 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2717 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2718 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2719 2720// Same as N3VQIntnp but with Vd as a src register. 2721class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2722 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2723 string Dt, ValueType ResTy, ValueType OpTy, 2724 SDPatternOperator IntOp, bit Commutable> 2725 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2726 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2727 f, itin, OpcodeStr, Dt, 2728 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2729 (OpTy QPR:$Vm))))]> { 2730 let Constraints = "$src = $Vd"; 2731} 2732 2733class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2734 string OpcodeStr, string Dt, 2735 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2736 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2737 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2738 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2739 [(set (ResTy QPR:$Vd), 2740 (ResTy (IntOp (ResTy QPR:$Vn), 2741 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2742 imm:$lane)))))]> { 2743 let isCommutable = 0; 2744} 2745class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2746 string OpcodeStr, string Dt, 2747 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2748 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2749 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2750 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2751 [(set (ResTy QPR:$Vd), 2752 (ResTy (IntOp (ResTy QPR:$Vn), 2753 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2754 imm:$lane)))))]> { 2755 let isCommutable = 0; 2756} 2757class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2758 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2759 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2760 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2761 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2762 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2763 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2764 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2765 let isCommutable = 0; 2766} 2767 2768// Multiply-Add/Sub operations: double- and quad-register. 2769class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2770 InstrItinClass itin, string OpcodeStr, string Dt, 2771 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2772 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2773 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2774 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2775 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2776 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2777 2778class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2779 string OpcodeStr, string Dt, 2780 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2781 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2782 (outs DPR:$Vd), 2783 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2784 NVMulSLFrm, itin, 2785 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2786 [(set (Ty DPR:$Vd), 2787 (Ty (ShOp (Ty DPR:$src1), 2788 (Ty (MulOp DPR:$Vn, 2789 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2790 imm:$lane)))))))]>; 2791class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2792 string OpcodeStr, string Dt, 2793 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2794 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2795 (outs DPR:$Vd), 2796 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2797 NVMulSLFrm, itin, 2798 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2799 [(set (Ty DPR:$Vd), 2800 (Ty (ShOp (Ty DPR:$src1), 2801 (Ty (MulOp DPR:$Vn, 2802 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2803 imm:$lane)))))))]>; 2804 2805class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2806 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2807 SDPatternOperator MulOp, SDPatternOperator OpNode> 2808 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2809 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2810 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2811 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2812 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2813class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2814 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2815 SDPatternOperator MulOp, SDPatternOperator ShOp> 2816 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2817 (outs QPR:$Vd), 2818 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2819 NVMulSLFrm, itin, 2820 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2821 [(set (ResTy QPR:$Vd), 2822 (ResTy (ShOp (ResTy QPR:$src1), 2823 (ResTy (MulOp QPR:$Vn, 2824 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2825 imm:$lane)))))))]>; 2826class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2827 string OpcodeStr, string Dt, 2828 ValueType ResTy, ValueType OpTy, 2829 SDPatternOperator MulOp, SDPatternOperator ShOp> 2830 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2831 (outs QPR:$Vd), 2832 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2833 NVMulSLFrm, itin, 2834 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2835 [(set (ResTy QPR:$Vd), 2836 (ResTy (ShOp (ResTy QPR:$src1), 2837 (ResTy (MulOp QPR:$Vn, 2838 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2839 imm:$lane)))))))]>; 2840 2841// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2842class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2843 InstrItinClass itin, string OpcodeStr, string Dt, 2844 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2845 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2846 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2847 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2848 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2849 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2850class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2851 InstrItinClass itin, string OpcodeStr, string Dt, 2852 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2853 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2854 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2855 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2856 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2857 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2858 2859// Neon 3-argument intrinsics, both double- and quad-register. 2860// The destination register is also used as the first source operand register. 2861class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2862 InstrItinClass itin, string OpcodeStr, string Dt, 2863 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2864 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2865 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2866 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2867 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2868 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2869class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2870 InstrItinClass itin, string OpcodeStr, string Dt, 2871 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2872 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2873 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2874 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2875 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2876 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2877 2878// Long Multiply-Add/Sub operations. 2879class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2880 InstrItinClass itin, string OpcodeStr, string Dt, 2881 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2882 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2883 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2884 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2885 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2886 (TyQ (MulOp (TyD DPR:$Vn), 2887 (TyD DPR:$Vm)))))]>; 2888class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2889 InstrItinClass itin, string OpcodeStr, string Dt, 2890 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2891 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2892 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2893 NVMulSLFrm, itin, 2894 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2895 [(set QPR:$Vd, 2896 (OpNode (TyQ QPR:$src1), 2897 (TyQ (MulOp (TyD DPR:$Vn), 2898 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2899 imm:$lane))))))]>; 2900class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2901 InstrItinClass itin, string OpcodeStr, string Dt, 2902 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2903 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2904 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2905 NVMulSLFrm, itin, 2906 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2907 [(set QPR:$Vd, 2908 (OpNode (TyQ QPR:$src1), 2909 (TyQ (MulOp (TyD DPR:$Vn), 2910 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2911 imm:$lane))))))]>; 2912 2913// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2914class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2915 InstrItinClass itin, string OpcodeStr, string Dt, 2916 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2917 SDNode OpNode> 2918 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2919 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2920 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2921 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2922 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2923 (TyD DPR:$Vm)))))))]>; 2924 2925// Neon Long 3-argument intrinsic. The destination register is 2926// a quad-register and is also used as the first source operand register. 2927class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2928 InstrItinClass itin, string OpcodeStr, string Dt, 2929 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2930 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2931 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2932 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2933 [(set QPR:$Vd, 2934 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2935class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2936 string OpcodeStr, string Dt, 2937 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2938 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2939 (outs QPR:$Vd), 2940 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2941 NVMulSLFrm, itin, 2942 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2943 [(set (ResTy QPR:$Vd), 2944 (ResTy (IntOp (ResTy QPR:$src1), 2945 (OpTy DPR:$Vn), 2946 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2947 imm:$lane)))))]>; 2948class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2949 InstrItinClass itin, string OpcodeStr, string Dt, 2950 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2951 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2952 (outs QPR:$Vd), 2953 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2954 NVMulSLFrm, itin, 2955 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2956 [(set (ResTy QPR:$Vd), 2957 (ResTy (IntOp (ResTy QPR:$src1), 2958 (OpTy DPR:$Vn), 2959 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2960 imm:$lane)))))]>; 2961 2962// Narrowing 3-register intrinsics. 2963class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2964 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2965 SDPatternOperator IntOp, bit Commutable> 2966 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2967 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2968 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2969 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2970 let isCommutable = Commutable; 2971} 2972 2973// Long 3-register operations. 2974class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2975 InstrItinClass itin, string OpcodeStr, string Dt, 2976 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2977 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2978 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2979 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2980 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2981 let isCommutable = Commutable; 2982} 2983 2984class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2985 InstrItinClass itin, string OpcodeStr, string Dt, 2986 ValueType TyQ, ValueType TyD, SDNode OpNode> 2987 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2988 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2989 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2990 [(set QPR:$Vd, 2991 (TyQ (OpNode (TyD DPR:$Vn), 2992 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2993class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2994 InstrItinClass itin, string OpcodeStr, string Dt, 2995 ValueType TyQ, ValueType TyD, SDNode OpNode> 2996 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2997 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2998 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2999 [(set QPR:$Vd, 3000 (TyQ (OpNode (TyD DPR:$Vn), 3001 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3002 3003// Long 3-register operations with explicitly extended operands. 3004class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3005 InstrItinClass itin, string OpcodeStr, string Dt, 3006 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 3007 bit Commutable> 3008 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3009 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3010 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3011 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3012 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3013 let isCommutable = Commutable; 3014} 3015 3016// Long 3-register intrinsics with explicit extend (VABDL). 3017class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3018 InstrItinClass itin, string OpcodeStr, string Dt, 3019 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3020 bit Commutable> 3021 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3022 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3023 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3024 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3025 (TyD DPR:$Vm))))))]> { 3026 let isCommutable = Commutable; 3027} 3028 3029// Long 3-register intrinsics. 3030class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3031 InstrItinClass itin, string OpcodeStr, string Dt, 3032 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3033 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3034 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3035 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3036 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3037 let isCommutable = Commutable; 3038} 3039 3040// Same as above, but not predicated. 3041class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3042 bit op4, InstrItinClass itin, string OpcodeStr, 3043 string Dt, ValueType ResTy, ValueType OpTy, 3044 SDPatternOperator IntOp, bit Commutable> 3045 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3046 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3047 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 3048 3049class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3050 string OpcodeStr, string Dt, 3051 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3052 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3053 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3054 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3055 [(set (ResTy QPR:$Vd), 3056 (ResTy (IntOp (OpTy DPR:$Vn), 3057 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 3058 imm:$lane)))))]>; 3059class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3060 InstrItinClass itin, string OpcodeStr, string Dt, 3061 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3062 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3063 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3064 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3065 [(set (ResTy QPR:$Vd), 3066 (ResTy (IntOp (OpTy DPR:$Vn), 3067 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 3068 imm:$lane)))))]>; 3069 3070// Wide 3-register operations. 3071class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3072 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3073 SDNode OpNode, SDNode ExtOp, bit Commutable> 3074 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3075 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3076 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3077 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3078 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3079 // All of these have a two-operand InstAlias. 3080 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3081 let isCommutable = Commutable; 3082} 3083 3084// Pairwise long 2-register intrinsics, both double- and quad-register. 3085class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3086 bits<2> op17_16, bits<5> op11_7, bit op4, 3087 string OpcodeStr, string Dt, 3088 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3089 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3090 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3091 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3092class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3093 bits<2> op17_16, bits<5> op11_7, bit op4, 3094 string OpcodeStr, string Dt, 3095 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3096 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3097 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3098 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3099 3100// Pairwise long 2-register accumulate intrinsics, 3101// both double- and quad-register. 3102// The destination register is also used as the first source operand register. 3103class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3104 bits<2> op17_16, bits<5> op11_7, bit op4, 3105 string OpcodeStr, string Dt, 3106 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3107 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3108 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3109 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3110 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3111class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3112 bits<2> op17_16, bits<5> op11_7, bit op4, 3113 string OpcodeStr, string Dt, 3114 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3115 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3116 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3117 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3118 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3119 3120// Shift by immediate, 3121// both double- and quad-register. 3122let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3123class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3124 Format f, InstrItinClass itin, Operand ImmTy, 3125 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3126 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3127 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3128 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3129 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3130class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3131 Format f, InstrItinClass itin, Operand ImmTy, 3132 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3133 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3134 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3135 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3136 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3137} 3138 3139// Long shift by immediate. 3140class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3141 string OpcodeStr, string Dt, 3142 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3143 SDPatternOperator OpNode> 3144 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3145 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3146 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3147 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3148 3149// Narrow shift by immediate. 3150class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3151 InstrItinClass itin, string OpcodeStr, string Dt, 3152 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3153 SDPatternOperator OpNode> 3154 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3155 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3156 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3157 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3158 (i32 ImmTy:$SIMM))))]>; 3159 3160// Shift right by immediate and accumulate, 3161// both double- and quad-register. 3162let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3163class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3164 Operand ImmTy, string OpcodeStr, string Dt, 3165 ValueType Ty, SDNode ShOp> 3166 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3167 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3168 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3169 [(set DPR:$Vd, (Ty (add DPR:$src1, 3170 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3171class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3172 Operand ImmTy, string OpcodeStr, string Dt, 3173 ValueType Ty, SDNode ShOp> 3174 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3175 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3176 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3177 [(set QPR:$Vd, (Ty (add QPR:$src1, 3178 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3179} 3180 3181// Shift by immediate and insert, 3182// both double- and quad-register. 3183let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3184class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3185 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3186 ValueType Ty,SDNode ShOp> 3187 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3188 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3189 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3190 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3191class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3192 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3193 ValueType Ty,SDNode ShOp> 3194 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3195 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3196 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3197 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3198} 3199 3200// Convert, with fractional bits immediate, 3201// both double- and quad-register. 3202class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3203 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3204 SDPatternOperator IntOp> 3205 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3206 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3207 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3208 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3209class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3210 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3211 SDPatternOperator IntOp> 3212 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3213 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3214 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3215 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3216 3217//===----------------------------------------------------------------------===// 3218// Multiclasses 3219//===----------------------------------------------------------------------===// 3220 3221// Abbreviations used in multiclass suffixes: 3222// Q = quarter int (8 bit) elements 3223// H = half int (16 bit) elements 3224// S = single int (32 bit) elements 3225// D = double int (64 bit) elements 3226 3227// Neon 2-register vector operations and intrinsics. 3228 3229// Neon 2-register comparisons. 3230// source operand element sizes of 8, 16 and 32 bits: 3231multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3232 bits<5> op11_7, bit op4, string opc, string Dt, 3233 string asm, SDNode OpNode> { 3234 // 64-bit vector types. 3235 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3236 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3237 opc, !strconcat(Dt, "8"), asm, "", 3238 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 3239 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3240 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3241 opc, !strconcat(Dt, "16"), asm, "", 3242 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 3243 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3244 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3245 opc, !strconcat(Dt, "32"), asm, "", 3246 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 3247 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3248 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3249 opc, "f32", asm, "", 3250 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 3251 let Inst{10} = 1; // overwrite F = 1 3252 } 3253 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3254 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3255 opc, "f16", asm, "", 3256 [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>, 3257 Requires<[HasNEON,HasFullFP16]> { 3258 let Inst{10} = 1; // overwrite F = 1 3259 } 3260 3261 // 128-bit vector types. 3262 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3263 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3264 opc, !strconcat(Dt, "8"), asm, "", 3265 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3266 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3267 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3268 opc, !strconcat(Dt, "16"), asm, "", 3269 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3270 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3271 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3272 opc, !strconcat(Dt, "32"), asm, "", 3273 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3274 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3275 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3276 opc, "f32", asm, "", 3277 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3278 let Inst{10} = 1; // overwrite F = 1 3279 } 3280 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3281 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3282 opc, "f16", asm, "", 3283 [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>, 3284 Requires<[HasNEON,HasFullFP16]> { 3285 let Inst{10} = 1; // overwrite F = 1 3286 } 3287} 3288 3289 3290// Neon 2-register vector intrinsics, 3291// element sizes of 8, 16 and 32 bits: 3292multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3293 bits<5> op11_7, bit op4, 3294 InstrItinClass itinD, InstrItinClass itinQ, 3295 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3296 // 64-bit vector types. 3297 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3298 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3299 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3300 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3301 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3302 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3303 3304 // 128-bit vector types. 3305 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3306 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3307 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3308 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3309 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3310 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3311} 3312 3313 3314// Neon Narrowing 2-register vector operations, 3315// source operand element sizes of 16, 32 and 64 bits: 3316multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3317 bits<5> op11_7, bit op6, bit op4, 3318 InstrItinClass itin, string OpcodeStr, string Dt, 3319 SDNode OpNode> { 3320 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3321 itin, OpcodeStr, !strconcat(Dt, "16"), 3322 v8i8, v8i16, OpNode>; 3323 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3324 itin, OpcodeStr, !strconcat(Dt, "32"), 3325 v4i16, v4i32, OpNode>; 3326 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3327 itin, OpcodeStr, !strconcat(Dt, "64"), 3328 v2i32, v2i64, OpNode>; 3329} 3330 3331// Neon Narrowing 2-register vector intrinsics, 3332// source operand element sizes of 16, 32 and 64 bits: 3333multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3334 bits<5> op11_7, bit op6, bit op4, 3335 InstrItinClass itin, string OpcodeStr, string Dt, 3336 SDPatternOperator IntOp> { 3337 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3338 itin, OpcodeStr, !strconcat(Dt, "16"), 3339 v8i8, v8i16, IntOp>; 3340 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3341 itin, OpcodeStr, !strconcat(Dt, "32"), 3342 v4i16, v4i32, IntOp>; 3343 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3344 itin, OpcodeStr, !strconcat(Dt, "64"), 3345 v2i32, v2i64, IntOp>; 3346} 3347 3348 3349// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3350// source operand element sizes of 16, 32 and 64 bits: 3351multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3352 string OpcodeStr, string Dt, SDNode OpNode> { 3353 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3354 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3355 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3356 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3357 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3358 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3359} 3360 3361 3362// Neon 3-register vector operations. 3363 3364// First with only element sizes of 8, 16 and 32 bits: 3365multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3366 InstrItinClass itinD16, InstrItinClass itinD32, 3367 InstrItinClass itinQ16, InstrItinClass itinQ32, 3368 string OpcodeStr, string Dt, 3369 SDNode OpNode, bit Commutable = 0> { 3370 // 64-bit vector types. 3371 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3372 OpcodeStr, !strconcat(Dt, "8"), 3373 v8i8, v8i8, OpNode, Commutable>; 3374 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3375 OpcodeStr, !strconcat(Dt, "16"), 3376 v4i16, v4i16, OpNode, Commutable>; 3377 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3378 OpcodeStr, !strconcat(Dt, "32"), 3379 v2i32, v2i32, OpNode, Commutable>; 3380 3381 // 128-bit vector types. 3382 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3383 OpcodeStr, !strconcat(Dt, "8"), 3384 v16i8, v16i8, OpNode, Commutable>; 3385 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3386 OpcodeStr, !strconcat(Dt, "16"), 3387 v8i16, v8i16, OpNode, Commutable>; 3388 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3389 OpcodeStr, !strconcat(Dt, "32"), 3390 v4i32, v4i32, OpNode, Commutable>; 3391} 3392 3393multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3394 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3395 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3396 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3397 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3398 v4i32, v2i32, ShOp>; 3399} 3400 3401// ....then also with element size 64 bits: 3402multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3403 InstrItinClass itinD, InstrItinClass itinQ, 3404 string OpcodeStr, string Dt, 3405 SDNode OpNode, bit Commutable = 0> 3406 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3407 OpcodeStr, Dt, OpNode, Commutable> { 3408 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3409 OpcodeStr, !strconcat(Dt, "64"), 3410 v1i64, v1i64, OpNode, Commutable>; 3411 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3412 OpcodeStr, !strconcat(Dt, "64"), 3413 v2i64, v2i64, OpNode, Commutable>; 3414} 3415 3416 3417// Neon 3-register vector intrinsics. 3418 3419// First with only element sizes of 16 and 32 bits: 3420multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3421 InstrItinClass itinD16, InstrItinClass itinD32, 3422 InstrItinClass itinQ16, InstrItinClass itinQ32, 3423 string OpcodeStr, string Dt, 3424 SDPatternOperator IntOp, bit Commutable = 0> { 3425 // 64-bit vector types. 3426 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3427 OpcodeStr, !strconcat(Dt, "16"), 3428 v4i16, v4i16, IntOp, Commutable>; 3429 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3430 OpcodeStr, !strconcat(Dt, "32"), 3431 v2i32, v2i32, IntOp, Commutable>; 3432 3433 // 128-bit vector types. 3434 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3435 OpcodeStr, !strconcat(Dt, "16"), 3436 v8i16, v8i16, IntOp, Commutable>; 3437 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3438 OpcodeStr, !strconcat(Dt, "32"), 3439 v4i32, v4i32, IntOp, Commutable>; 3440} 3441multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3442 InstrItinClass itinD16, InstrItinClass itinD32, 3443 InstrItinClass itinQ16, InstrItinClass itinQ32, 3444 string OpcodeStr, string Dt, 3445 SDPatternOperator IntOp> { 3446 // 64-bit vector types. 3447 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3448 OpcodeStr, !strconcat(Dt, "16"), 3449 v4i16, v4i16, IntOp>; 3450 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3451 OpcodeStr, !strconcat(Dt, "32"), 3452 v2i32, v2i32, IntOp>; 3453 3454 // 128-bit vector types. 3455 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3456 OpcodeStr, !strconcat(Dt, "16"), 3457 v8i16, v8i16, IntOp>; 3458 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3459 OpcodeStr, !strconcat(Dt, "32"), 3460 v4i32, v4i32, IntOp>; 3461} 3462 3463multiclass N3VIntSL_HS<bits<4> op11_8, 3464 InstrItinClass itinD16, InstrItinClass itinD32, 3465 InstrItinClass itinQ16, InstrItinClass itinQ32, 3466 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3467 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3468 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3469 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3470 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3471 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3472 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3473 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3474 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3475} 3476 3477// ....then also with element size of 8 bits: 3478multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3479 InstrItinClass itinD16, InstrItinClass itinD32, 3480 InstrItinClass itinQ16, InstrItinClass itinQ32, 3481 string OpcodeStr, string Dt, 3482 SDPatternOperator IntOp, bit Commutable = 0> 3483 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3484 OpcodeStr, Dt, IntOp, Commutable> { 3485 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3486 OpcodeStr, !strconcat(Dt, "8"), 3487 v8i8, v8i8, IntOp, Commutable>; 3488 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3489 OpcodeStr, !strconcat(Dt, "8"), 3490 v16i8, v16i8, IntOp, Commutable>; 3491} 3492multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3493 InstrItinClass itinD16, InstrItinClass itinD32, 3494 InstrItinClass itinQ16, InstrItinClass itinQ32, 3495 string OpcodeStr, string Dt, 3496 SDPatternOperator IntOp> 3497 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3498 OpcodeStr, Dt, IntOp> { 3499 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3500 OpcodeStr, !strconcat(Dt, "8"), 3501 v8i8, v8i8, IntOp>; 3502 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3503 OpcodeStr, !strconcat(Dt, "8"), 3504 v16i8, v16i8, IntOp>; 3505} 3506 3507 3508// ....then also with element size of 64 bits: 3509multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3510 InstrItinClass itinD16, InstrItinClass itinD32, 3511 InstrItinClass itinQ16, InstrItinClass itinQ32, 3512 string OpcodeStr, string Dt, 3513 SDPatternOperator IntOp, bit Commutable = 0> 3514 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3515 OpcodeStr, Dt, IntOp, Commutable> { 3516 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3517 OpcodeStr, !strconcat(Dt, "64"), 3518 v1i64, v1i64, IntOp, Commutable>; 3519 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3520 OpcodeStr, !strconcat(Dt, "64"), 3521 v2i64, v2i64, IntOp, Commutable>; 3522} 3523multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3524 InstrItinClass itinD16, InstrItinClass itinD32, 3525 InstrItinClass itinQ16, InstrItinClass itinQ32, 3526 string OpcodeStr, string Dt, 3527 SDPatternOperator IntOp> 3528 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3529 OpcodeStr, Dt, IntOp> { 3530 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3531 OpcodeStr, !strconcat(Dt, "64"), 3532 v1i64, v1i64, IntOp>; 3533 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3534 OpcodeStr, !strconcat(Dt, "64"), 3535 v2i64, v2i64, IntOp>; 3536} 3537 3538// Neon Narrowing 3-register vector intrinsics, 3539// source operand element sizes of 16, 32 and 64 bits: 3540multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3541 string OpcodeStr, string Dt, 3542 SDPatternOperator IntOp, bit Commutable = 0> { 3543 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3544 OpcodeStr, !strconcat(Dt, "16"), 3545 v8i8, v8i16, IntOp, Commutable>; 3546 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3547 OpcodeStr, !strconcat(Dt, "32"), 3548 v4i16, v4i32, IntOp, Commutable>; 3549 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3550 OpcodeStr, !strconcat(Dt, "64"), 3551 v2i32, v2i64, IntOp, Commutable>; 3552} 3553 3554 3555// Neon Long 3-register vector operations. 3556 3557multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3558 InstrItinClass itin16, InstrItinClass itin32, 3559 string OpcodeStr, string Dt, 3560 SDNode OpNode, bit Commutable = 0> { 3561 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3562 OpcodeStr, !strconcat(Dt, "8"), 3563 v8i16, v8i8, OpNode, Commutable>; 3564 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3565 OpcodeStr, !strconcat(Dt, "16"), 3566 v4i32, v4i16, OpNode, Commutable>; 3567 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3568 OpcodeStr, !strconcat(Dt, "32"), 3569 v2i64, v2i32, OpNode, Commutable>; 3570} 3571 3572multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3573 InstrItinClass itin, string OpcodeStr, string Dt, 3574 SDNode OpNode> { 3575 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3576 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3577 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3578 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3579} 3580 3581multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3582 InstrItinClass itin16, InstrItinClass itin32, 3583 string OpcodeStr, string Dt, 3584 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3585 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3586 OpcodeStr, !strconcat(Dt, "8"), 3587 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3588 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3589 OpcodeStr, !strconcat(Dt, "16"), 3590 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3591 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3592 OpcodeStr, !strconcat(Dt, "32"), 3593 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3594} 3595 3596// Neon Long 3-register vector intrinsics. 3597 3598// First with only element sizes of 16 and 32 bits: 3599multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3600 InstrItinClass itin16, InstrItinClass itin32, 3601 string OpcodeStr, string Dt, 3602 SDPatternOperator IntOp, bit Commutable = 0> { 3603 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3604 OpcodeStr, !strconcat(Dt, "16"), 3605 v4i32, v4i16, IntOp, Commutable>; 3606 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3607 OpcodeStr, !strconcat(Dt, "32"), 3608 v2i64, v2i32, IntOp, Commutable>; 3609} 3610 3611multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3612 InstrItinClass itin, string OpcodeStr, string Dt, 3613 SDPatternOperator IntOp> { 3614 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3615 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3616 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3617 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3618} 3619 3620// ....then also with element size of 8 bits: 3621multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3622 InstrItinClass itin16, InstrItinClass itin32, 3623 string OpcodeStr, string Dt, 3624 SDPatternOperator IntOp, bit Commutable = 0> 3625 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3626 IntOp, Commutable> { 3627 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3628 OpcodeStr, !strconcat(Dt, "8"), 3629 v8i16, v8i8, IntOp, Commutable>; 3630} 3631 3632// ....with explicit extend (VABDL). 3633multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3634 InstrItinClass itin, string OpcodeStr, string Dt, 3635 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3636 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3637 OpcodeStr, !strconcat(Dt, "8"), 3638 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3639 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3640 OpcodeStr, !strconcat(Dt, "16"), 3641 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3642 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3643 OpcodeStr, !strconcat(Dt, "32"), 3644 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3645} 3646 3647 3648// Neon Wide 3-register vector intrinsics, 3649// source operand element sizes of 8, 16 and 32 bits: 3650multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3651 string OpcodeStr, string Dt, 3652 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3653 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3654 OpcodeStr, !strconcat(Dt, "8"), 3655 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3656 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3657 OpcodeStr, !strconcat(Dt, "16"), 3658 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3659 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3660 OpcodeStr, !strconcat(Dt, "32"), 3661 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3662} 3663 3664 3665// Neon Multiply-Op vector operations, 3666// element sizes of 8, 16 and 32 bits: 3667multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3668 InstrItinClass itinD16, InstrItinClass itinD32, 3669 InstrItinClass itinQ16, InstrItinClass itinQ32, 3670 string OpcodeStr, string Dt, SDNode OpNode> { 3671 // 64-bit vector types. 3672 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3673 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3674 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3675 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3676 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3677 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3678 3679 // 128-bit vector types. 3680 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3681 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3682 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3683 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3684 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3685 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3686} 3687 3688multiclass N3VMulOpSL_HS<bits<4> op11_8, 3689 InstrItinClass itinD16, InstrItinClass itinD32, 3690 InstrItinClass itinQ16, InstrItinClass itinQ32, 3691 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3692 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3693 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3694 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3695 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3696 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3697 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3698 mul, ShOp>; 3699 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3700 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3701 mul, ShOp>; 3702} 3703 3704// Neon Intrinsic-Op vector operations, 3705// element sizes of 8, 16 and 32 bits: 3706multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3707 InstrItinClass itinD, InstrItinClass itinQ, 3708 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3709 SDNode OpNode> { 3710 // 64-bit vector types. 3711 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3712 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3713 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3714 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3715 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3716 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3717 3718 // 128-bit vector types. 3719 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3720 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3721 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3722 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3723 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3724 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3725} 3726 3727// Neon 3-argument intrinsics, 3728// element sizes of 16 and 32 bits: 3729multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3730 InstrItinClass itinD16, InstrItinClass itinD32, 3731 InstrItinClass itinQ16, InstrItinClass itinQ32, 3732 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3733 // 64-bit vector types. 3734 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3735 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3736 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3737 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3738 3739 // 128-bit vector types. 3740 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3741 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3742 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3743 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3744} 3745 3746// element sizes of 8, 16 and 32 bits: 3747multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3748 InstrItinClass itinD16, InstrItinClass itinD32, 3749 InstrItinClass itinQ16, InstrItinClass itinQ32, 3750 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3751 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3752 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3753 // 64-bit vector types. 3754 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3755 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3756 // 128-bit vector types. 3757 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3758 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3759} 3760 3761// Neon Long Multiply-Op vector operations, 3762// element sizes of 8, 16 and 32 bits: 3763multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3764 InstrItinClass itin16, InstrItinClass itin32, 3765 string OpcodeStr, string Dt, SDNode MulOp, 3766 SDNode OpNode> { 3767 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3768 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3769 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3770 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3771 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3772 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3773} 3774 3775multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3776 string Dt, SDNode MulOp, SDNode OpNode> { 3777 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3778 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3779 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3780 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3781} 3782 3783 3784// Neon Long 3-argument intrinsics. 3785 3786// First with only element sizes of 16 and 32 bits: 3787multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3788 InstrItinClass itin16, InstrItinClass itin32, 3789 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3790 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3791 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3792 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3793 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3794} 3795 3796multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3797 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3798 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3799 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3800 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3801 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3802} 3803 3804// ....then also with element size of 8 bits: 3805multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3806 InstrItinClass itin16, InstrItinClass itin32, 3807 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3808 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3809 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3810 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3811} 3812 3813// ....with explicit extend (VABAL). 3814multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3815 InstrItinClass itin, string OpcodeStr, string Dt, 3816 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3817 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3818 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3819 IntOp, ExtOp, OpNode>; 3820 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3821 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3822 IntOp, ExtOp, OpNode>; 3823 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3824 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3825 IntOp, ExtOp, OpNode>; 3826} 3827 3828 3829// Neon Pairwise long 2-register intrinsics, 3830// element sizes of 8, 16 and 32 bits: 3831multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3832 bits<5> op11_7, bit op4, 3833 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3834 // 64-bit vector types. 3835 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3836 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3837 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3838 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3839 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3840 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3841 3842 // 128-bit vector types. 3843 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3844 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3845 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3846 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3847 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3848 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3849} 3850 3851 3852// Neon Pairwise long 2-register accumulate intrinsics, 3853// element sizes of 8, 16 and 32 bits: 3854multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3855 bits<5> op11_7, bit op4, 3856 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3857 // 64-bit vector types. 3858 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3859 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3860 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3861 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3862 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3863 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3864 3865 // 128-bit vector types. 3866 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3867 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3868 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3869 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3870 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3871 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3872} 3873 3874 3875// Neon 2-register vector shift by immediate, 3876// with f of either N2RegVShLFrm or N2RegVShRFrm 3877// element sizes of 8, 16, 32 and 64 bits: 3878multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3879 InstrItinClass itin, string OpcodeStr, string Dt, 3880 SDNode OpNode> { 3881 // 64-bit vector types. 3882 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3883 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3884 let Inst{21-19} = 0b001; // imm6 = 001xxx 3885 } 3886 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3887 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3888 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3889 } 3890 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3891 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3892 let Inst{21} = 0b1; // imm6 = 1xxxxx 3893 } 3894 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3895 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3896 // imm6 = xxxxxx 3897 3898 // 128-bit vector types. 3899 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3900 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3901 let Inst{21-19} = 0b001; // imm6 = 001xxx 3902 } 3903 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3904 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3905 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3906 } 3907 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3908 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3909 let Inst{21} = 0b1; // imm6 = 1xxxxx 3910 } 3911 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3912 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3913 // imm6 = xxxxxx 3914} 3915multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3916 InstrItinClass itin, string OpcodeStr, string Dt, 3917 string baseOpc, SDNode OpNode> { 3918 // 64-bit vector types. 3919 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3920 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3921 let Inst{21-19} = 0b001; // imm6 = 001xxx 3922 } 3923 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3924 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3925 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3926 } 3927 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3928 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3929 let Inst{21} = 0b1; // imm6 = 1xxxxx 3930 } 3931 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3932 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3933 // imm6 = xxxxxx 3934 3935 // 128-bit vector types. 3936 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3937 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3938 let Inst{21-19} = 0b001; // imm6 = 001xxx 3939 } 3940 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3941 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3942 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3943 } 3944 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3945 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3946 let Inst{21} = 0b1; // imm6 = 1xxxxx 3947 } 3948 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3949 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3950 // imm6 = xxxxxx 3951} 3952 3953// Neon Shift-Accumulate vector operations, 3954// element sizes of 8, 16, 32 and 64 bits: 3955multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3956 string OpcodeStr, string Dt, SDNode ShOp> { 3957 // 64-bit vector types. 3958 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3959 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3960 let Inst{21-19} = 0b001; // imm6 = 001xxx 3961 } 3962 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3963 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3964 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3965 } 3966 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3967 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3968 let Inst{21} = 0b1; // imm6 = 1xxxxx 3969 } 3970 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3971 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3972 // imm6 = xxxxxx 3973 3974 // 128-bit vector types. 3975 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3976 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3977 let Inst{21-19} = 0b001; // imm6 = 001xxx 3978 } 3979 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3980 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3981 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3982 } 3983 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3984 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3985 let Inst{21} = 0b1; // imm6 = 1xxxxx 3986 } 3987 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3988 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3989 // imm6 = xxxxxx 3990} 3991 3992// Neon Shift-Insert vector operations, 3993// with f of either N2RegVShLFrm or N2RegVShRFrm 3994// element sizes of 8, 16, 32 and 64 bits: 3995multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3996 string OpcodeStr> { 3997 // 64-bit vector types. 3998 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3999 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 4000 let Inst{21-19} = 0b001; // imm6 = 001xxx 4001 } 4002 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4003 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 4004 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4005 } 4006 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4007 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 4008 let Inst{21} = 0b1; // imm6 = 1xxxxx 4009 } 4010 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 4011 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 4012 // imm6 = xxxxxx 4013 4014 // 128-bit vector types. 4015 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4016 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 4017 let Inst{21-19} = 0b001; // imm6 = 001xxx 4018 } 4019 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4020 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 4021 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4022 } 4023 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4024 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 4025 let Inst{21} = 0b1; // imm6 = 1xxxxx 4026 } 4027 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4028 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 4029 // imm6 = xxxxxx 4030} 4031multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4032 string OpcodeStr> { 4033 // 64-bit vector types. 4034 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4035 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 4036 let Inst{21-19} = 0b001; // imm6 = 001xxx 4037 } 4038 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4039 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 4040 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4041 } 4042 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4043 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 4044 let Inst{21} = 0b1; // imm6 = 1xxxxx 4045 } 4046 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4047 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 4048 // imm6 = xxxxxx 4049 4050 // 128-bit vector types. 4051 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4052 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 4053 let Inst{21-19} = 0b001; // imm6 = 001xxx 4054 } 4055 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4056 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 4057 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4058 } 4059 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4060 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 4061 let Inst{21} = 0b1; // imm6 = 1xxxxx 4062 } 4063 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4064 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 4065 // imm6 = xxxxxx 4066} 4067 4068// Neon Shift Long operations, 4069// element sizes of 8, 16, 32 bits: 4070multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4071 bit op4, string OpcodeStr, string Dt, 4072 SDPatternOperator OpNode> { 4073 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4074 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4075 let Inst{21-19} = 0b001; // imm6 = 001xxx 4076 } 4077 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4078 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4079 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4080 } 4081 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4082 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4083 let Inst{21} = 0b1; // imm6 = 1xxxxx 4084 } 4085} 4086 4087// Neon Shift Narrow operations, 4088// element sizes of 16, 32, 64 bits: 4089multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4090 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4091 SDPatternOperator OpNode> { 4092 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4093 OpcodeStr, !strconcat(Dt, "16"), 4094 v8i8, v8i16, shr_imm8, OpNode> { 4095 let Inst{21-19} = 0b001; // imm6 = 001xxx 4096 } 4097 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4098 OpcodeStr, !strconcat(Dt, "32"), 4099 v4i16, v4i32, shr_imm16, OpNode> { 4100 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4101 } 4102 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4103 OpcodeStr, !strconcat(Dt, "64"), 4104 v2i32, v2i64, shr_imm32, OpNode> { 4105 let Inst{21} = 0b1; // imm6 = 1xxxxx 4106 } 4107} 4108 4109//===----------------------------------------------------------------------===// 4110// Instruction Definitions. 4111//===----------------------------------------------------------------------===// 4112 4113// Vector Add Operations. 4114 4115// VADD : Vector Add (integer and floating-point) 4116defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4117 add, 1>; 4118def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4119 v2f32, v2f32, fadd, 1>; 4120def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4121 v4f32, v4f32, fadd, 1>; 4122def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", 4123 v4f16, v4f16, fadd, 1>, 4124 Requires<[HasNEON,HasFullFP16]>; 4125def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", 4126 v8f16, v8f16, fadd, 1>, 4127 Requires<[HasNEON,HasFullFP16]>; 4128// VADDL : Vector Add Long (Q = D + D) 4129defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4130 "vaddl", "s", add, sext, 1>; 4131defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4132 "vaddl", "u", add, zext, 1>; 4133// VADDW : Vector Add Wide (Q = Q + D) 4134defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4135defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 4136// VHADD : Vector Halving Add 4137defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4138 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4139 "vhadd", "s", int_arm_neon_vhadds, 1>; 4140defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4141 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4142 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4143// VRHADD : Vector Rounding Halving Add 4144defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4145 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4146 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4147defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4148 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4149 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4150// VQADD : Vector Saturating Add 4151defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4152 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4153 "vqadd", "s", int_arm_neon_vqadds, 1>; 4154defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4155 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4156 "vqadd", "u", int_arm_neon_vqaddu, 1>; 4157// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4158defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4159// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4160defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4161 int_arm_neon_vraddhn, 1>; 4162 4163def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4164 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4165def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4166 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4167def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4168 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4169 4170// Vector Multiply Operations. 4171 4172// VMUL : Vector Multiply (integer, polynomial and floating-point) 4173defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4174 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4175def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4176 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4177def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4178 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4179def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4180 v2f32, v2f32, fmul, 1>; 4181def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4182 v4f32, v4f32, fmul, 1>; 4183def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", 4184 v4f16, v4f16, fmul, 1>, 4185 Requires<[HasNEON,HasFullFP16]>; 4186def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", 4187 v8f16, v8f16, fmul, 1>, 4188 Requires<[HasNEON,HasFullFP16]>; 4189defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4190def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4191def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4192 v2f32, fmul>; 4193def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, 4194 Requires<[HasNEON,HasFullFP16]>; 4195def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, 4196 v4f16, fmul>, 4197 Requires<[HasNEON,HasFullFP16]>; 4198 4199def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4200 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 4201 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4202 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4203 (DSubReg_i16_reg imm:$lane))), 4204 (SubReg_i16_lane imm:$lane)))>; 4205def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4206 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 4207 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4208 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4209 (DSubReg_i32_reg imm:$lane))), 4210 (SubReg_i32_lane imm:$lane)))>; 4211def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4212 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 4213 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4214 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4215 (DSubReg_i32_reg imm:$lane))), 4216 (SubReg_i32_lane imm:$lane)))>; 4217 4218 4219def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4220 (VMULslfd DPR:$Rn, 4221 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4222 (i32 0))>; 4223def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4224 (VMULslfq QPR:$Rn, 4225 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4226 (i32 0))>; 4227 4228 4229// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4230defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4231 IIC_VMULi16Q, IIC_VMULi32Q, 4232 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4233defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4234 IIC_VMULi16Q, IIC_VMULi32Q, 4235 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4236def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4237 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4238 imm:$lane)))), 4239 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4240 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4241 (DSubReg_i16_reg imm:$lane))), 4242 (SubReg_i16_lane imm:$lane)))>; 4243def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4244 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4245 imm:$lane)))), 4246 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4247 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4248 (DSubReg_i32_reg imm:$lane))), 4249 (SubReg_i32_lane imm:$lane)))>; 4250 4251// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4252defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4253 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4254 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4255defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4256 IIC_VMULi16Q, IIC_VMULi32Q, 4257 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4258def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4259 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4260 imm:$lane)))), 4261 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4262 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4263 (DSubReg_i16_reg imm:$lane))), 4264 (SubReg_i16_lane imm:$lane)))>; 4265def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4266 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4267 imm:$lane)))), 4268 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4269 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4270 (DSubReg_i32_reg imm:$lane))), 4271 (SubReg_i32_lane imm:$lane)))>; 4272 4273// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4274let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4275 DecoderNamespace = "NEONData" in { 4276 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4277 "vmull", "s", NEONvmulls, 1>; 4278 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4279 "vmull", "u", NEONvmullu, 1>; 4280 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4281 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4282 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4283 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4284 Requires<[HasV8, HasCrypto]>; 4285} 4286defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4287defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4288 4289// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4290defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4291 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4292defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4293 "vqdmull", "s", int_arm_neon_vqdmull>; 4294 4295// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4296 4297// VMLA : Vector Multiply Accumulate (integer and floating-point) 4298defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4299 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4300def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4301 v2f32, fmul_su, fadd_mlx>, 4302 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4303def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4304 v4f32, fmul_su, fadd_mlx>, 4305 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4306def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", 4307 v4f16, fmul_su, fadd_mlx>, 4308 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4309def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", 4310 v8f16, fmul_su, fadd_mlx>, 4311 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4312defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4313 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4314def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4315 v2f32, fmul_su, fadd_mlx>, 4316 Requires<[HasNEON, UseFPVMLx]>; 4317def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4318 v4f32, v2f32, fmul_su, fadd_mlx>, 4319 Requires<[HasNEON, UseFPVMLx]>; 4320def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", 4321 v4f16, fmul, fadd>, 4322 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4323def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", 4324 v8f16, v4f16, fmul, fadd>, 4325 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4326 4327def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4328 (mul (v8i16 QPR:$src2), 4329 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4330 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4331 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4332 (DSubReg_i16_reg imm:$lane))), 4333 (SubReg_i16_lane imm:$lane)))>; 4334 4335def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4336 (mul (v4i32 QPR:$src2), 4337 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4338 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4339 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4340 (DSubReg_i32_reg imm:$lane))), 4341 (SubReg_i32_lane imm:$lane)))>; 4342 4343def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4344 (fmul_su (v4f32 QPR:$src2), 4345 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4346 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4347 (v4f32 QPR:$src2), 4348 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4349 (DSubReg_i32_reg imm:$lane))), 4350 (SubReg_i32_lane imm:$lane)))>, 4351 Requires<[HasNEON, UseFPVMLx]>; 4352 4353// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4354defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4355 "vmlal", "s", NEONvmulls, add>; 4356defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4357 "vmlal", "u", NEONvmullu, add>; 4358 4359defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4360defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4361 4362let Predicates = [HasNEON, HasV8_1a] in { 4363 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4364 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4365 // (Q += D * D) 4366 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4367 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4368 null_frag>; 4369 def : Pat<(v4i16 (int_arm_neon_vqadds 4370 (v4i16 DPR:$src1), 4371 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4372 (v4i16 DPR:$Vm))))), 4373 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4374 def : Pat<(v2i32 (int_arm_neon_vqadds 4375 (v2i32 DPR:$src1), 4376 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4377 (v2i32 DPR:$Vm))))), 4378 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4379 def : Pat<(v8i16 (int_arm_neon_vqadds 4380 (v8i16 QPR:$src1), 4381 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4382 (v8i16 QPR:$Vm))))), 4383 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4384 def : Pat<(v4i32 (int_arm_neon_vqadds 4385 (v4i32 QPR:$src1), 4386 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4387 (v4i32 QPR:$Vm))))), 4388 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4389 4390 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4391 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4392 null_frag>; 4393 def : Pat<(v4i16 (int_arm_neon_vqadds 4394 (v4i16 DPR:$src1), 4395 (v4i16 (int_arm_neon_vqrdmulh 4396 (v4i16 DPR:$Vn), 4397 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4398 imm:$lane)))))), 4399 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4400 imm:$lane))>; 4401 def : Pat<(v2i32 (int_arm_neon_vqadds 4402 (v2i32 DPR:$src1), 4403 (v2i32 (int_arm_neon_vqrdmulh 4404 (v2i32 DPR:$Vn), 4405 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4406 imm:$lane)))))), 4407 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4408 imm:$lane))>; 4409 def : Pat<(v8i16 (int_arm_neon_vqadds 4410 (v8i16 QPR:$src1), 4411 (v8i16 (int_arm_neon_vqrdmulh 4412 (v8i16 QPR:$src2), 4413 (v8i16 (NEONvduplane (v8i16 QPR:$src3), 4414 imm:$lane)))))), 4415 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4416 (v8i16 QPR:$src2), 4417 (v4i16 (EXTRACT_SUBREG 4418 QPR:$src3, 4419 (DSubReg_i16_reg imm:$lane))), 4420 (SubReg_i16_lane imm:$lane)))>; 4421 def : Pat<(v4i32 (int_arm_neon_vqadds 4422 (v4i32 QPR:$src1), 4423 (v4i32 (int_arm_neon_vqrdmulh 4424 (v4i32 QPR:$src2), 4425 (v4i32 (NEONvduplane (v4i32 QPR:$src3), 4426 imm:$lane)))))), 4427 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4428 (v4i32 QPR:$src2), 4429 (v2i32 (EXTRACT_SUBREG 4430 QPR:$src3, 4431 (DSubReg_i32_reg imm:$lane))), 4432 (SubReg_i32_lane imm:$lane)))>; 4433 4434 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4435 // (Q -= D * D) 4436 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4437 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4438 null_frag>; 4439 def : Pat<(v4i16 (int_arm_neon_vqsubs 4440 (v4i16 DPR:$src1), 4441 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4442 (v4i16 DPR:$Vm))))), 4443 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4444 def : Pat<(v2i32 (int_arm_neon_vqsubs 4445 (v2i32 DPR:$src1), 4446 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4447 (v2i32 DPR:$Vm))))), 4448 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4449 def : Pat<(v8i16 (int_arm_neon_vqsubs 4450 (v8i16 QPR:$src1), 4451 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4452 (v8i16 QPR:$Vm))))), 4453 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4454 def : Pat<(v4i32 (int_arm_neon_vqsubs 4455 (v4i32 QPR:$src1), 4456 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4457 (v4i32 QPR:$Vm))))), 4458 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4459 4460 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4461 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4462 null_frag>; 4463 def : Pat<(v4i16 (int_arm_neon_vqsubs 4464 (v4i16 DPR:$src1), 4465 (v4i16 (int_arm_neon_vqrdmulh 4466 (v4i16 DPR:$Vn), 4467 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4468 imm:$lane)))))), 4469 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4470 def : Pat<(v2i32 (int_arm_neon_vqsubs 4471 (v2i32 DPR:$src1), 4472 (v2i32 (int_arm_neon_vqrdmulh 4473 (v2i32 DPR:$Vn), 4474 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4475 imm:$lane)))))), 4476 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4477 imm:$lane))>; 4478 def : Pat<(v8i16 (int_arm_neon_vqsubs 4479 (v8i16 QPR:$src1), 4480 (v8i16 (int_arm_neon_vqrdmulh 4481 (v8i16 QPR:$src2), 4482 (v8i16 (NEONvduplane (v8i16 QPR:$src3), 4483 imm:$lane)))))), 4484 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4485 (v8i16 QPR:$src2), 4486 (v4i16 (EXTRACT_SUBREG 4487 QPR:$src3, 4488 (DSubReg_i16_reg imm:$lane))), 4489 (SubReg_i16_lane imm:$lane)))>; 4490 def : Pat<(v4i32 (int_arm_neon_vqsubs 4491 (v4i32 QPR:$src1), 4492 (v4i32 (int_arm_neon_vqrdmulh 4493 (v4i32 QPR:$src2), 4494 (v4i32 (NEONvduplane (v4i32 QPR:$src3), 4495 imm:$lane)))))), 4496 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4497 (v4i32 QPR:$src2), 4498 (v2i32 (EXTRACT_SUBREG 4499 QPR:$src3, 4500 (DSubReg_i32_reg imm:$lane))), 4501 (SubReg_i32_lane imm:$lane)))>; 4502} 4503// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4504defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4505 "vqdmlal", "s", null_frag>; 4506defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4507 4508def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4509 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4510 (v4i16 DPR:$Vm))))), 4511 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4512def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4513 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4514 (v2i32 DPR:$Vm))))), 4515 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4516def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4517 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4518 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4519 imm:$lane)))))), 4520 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4521def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4522 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4523 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4524 imm:$lane)))))), 4525 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4526 4527// VMLS : Vector Multiply Subtract (integer and floating-point) 4528defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4529 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4530def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4531 v2f32, fmul_su, fsub_mlx>, 4532 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4533def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4534 v4f32, fmul_su, fsub_mlx>, 4535 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4536def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", 4537 v4f16, fmul, fsub>, 4538 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4539def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", 4540 v8f16, fmul, fsub>, 4541 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4542defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4543 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4544def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4545 v2f32, fmul_su, fsub_mlx>, 4546 Requires<[HasNEON, UseFPVMLx]>; 4547def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4548 v4f32, v2f32, fmul_su, fsub_mlx>, 4549 Requires<[HasNEON, UseFPVMLx]>; 4550def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", 4551 v4f16, fmul, fsub>, 4552 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4553def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", 4554 v8f16, v4f16, fmul, fsub>, 4555 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4556 4557def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4558 (mul (v8i16 QPR:$src2), 4559 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4560 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4561 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4562 (DSubReg_i16_reg imm:$lane))), 4563 (SubReg_i16_lane imm:$lane)))>; 4564 4565def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4566 (mul (v4i32 QPR:$src2), 4567 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4568 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4569 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4570 (DSubReg_i32_reg imm:$lane))), 4571 (SubReg_i32_lane imm:$lane)))>; 4572 4573def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4574 (fmul_su (v4f32 QPR:$src2), 4575 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4576 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4577 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4578 (DSubReg_i32_reg imm:$lane))), 4579 (SubReg_i32_lane imm:$lane)))>, 4580 Requires<[HasNEON, UseFPVMLx]>; 4581 4582// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4583defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4584 "vmlsl", "s", NEONvmulls, sub>; 4585defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4586 "vmlsl", "u", NEONvmullu, sub>; 4587 4588defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4589defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4590 4591// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4592defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4593 "vqdmlsl", "s", null_frag>; 4594defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4595 4596def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4597 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4598 (v4i16 DPR:$Vm))))), 4599 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4600def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4601 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4602 (v2i32 DPR:$Vm))))), 4603 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4604def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4605 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4606 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4607 imm:$lane)))))), 4608 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4609def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4610 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4611 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4612 imm:$lane)))))), 4613 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4614 4615// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4616def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4617 v2f32, fmul_su, fadd_mlx>, 4618 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4619 4620def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4621 v4f32, fmul_su, fadd_mlx>, 4622 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4623def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", 4624 v4f16, fmul, fadd>, 4625 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4626 4627def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", 4628 v8f16, fmul, fadd>, 4629 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4630 4631// Fused Vector Multiply Subtract (floating-point) 4632def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4633 v2f32, fmul_su, fsub_mlx>, 4634 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4635def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4636 v4f32, fmul_su, fsub_mlx>, 4637 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4638def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", 4639 v4f16, fmul, fsub>, 4640 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4641def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", 4642 v8f16, fmul, fsub>, 4643 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4644 4645// Match @llvm.fma.* intrinsics 4646def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4647 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4648 Requires<[HasVFP4]>; 4649def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4650 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4651 Requires<[HasVFP4]>; 4652def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4653 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4654 Requires<[HasVFP4]>; 4655def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4656 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4657 Requires<[HasVFP4]>; 4658 4659// Vector Subtract Operations. 4660 4661// VSUB : Vector Subtract (integer and floating-point) 4662defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4663 "vsub", "i", sub, 0>; 4664def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4665 v2f32, v2f32, fsub, 0>; 4666def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4667 v4f32, v4f32, fsub, 0>; 4668def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", 4669 v4f16, v4f16, fsub, 0>, 4670 Requires<[HasNEON,HasFullFP16]>; 4671def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", 4672 v8f16, v8f16, fsub, 0>, 4673 Requires<[HasNEON,HasFullFP16]>; 4674// VSUBL : Vector Subtract Long (Q = D - D) 4675defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4676 "vsubl", "s", sub, sext, 0>; 4677defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4678 "vsubl", "u", sub, zext, 0>; 4679// VSUBW : Vector Subtract Wide (Q = Q - D) 4680defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4681defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4682// VHSUB : Vector Halving Subtract 4683defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4684 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4685 "vhsub", "s", int_arm_neon_vhsubs, 0>; 4686defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 4687 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4688 "vhsub", "u", int_arm_neon_vhsubu, 0>; 4689// VQSUB : Vector Saturing Subtract 4690defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 4691 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4692 "vqsub", "s", int_arm_neon_vqsubs, 0>; 4693defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 4694 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4695 "vqsub", "u", int_arm_neon_vqsubu, 0>; 4696// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 4697defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 4698// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 4699defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 4700 int_arm_neon_vrsubhn, 0>; 4701 4702def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4703 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 4704def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4705 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 4706def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4707 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 4708 4709// Vector Comparisons. 4710 4711// VCEQ : Vector Compare Equal 4712defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4713 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 4714def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 4715 NEONvceq, 1>; 4716def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 4717 NEONvceq, 1>; 4718def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, 4719 NEONvceq, 1>, 4720 Requires<[HasNEON, HasFullFP16]>; 4721def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, 4722 NEONvceq, 1>, 4723 Requires<[HasNEON, HasFullFP16]>; 4724 4725let TwoOperandAliasConstraint = "$Vm = $Vd" in 4726defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 4727 "$Vd, $Vm, #0", NEONvceqz>; 4728 4729// VCGE : Vector Compare Greater Than or Equal 4730defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4731 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 4732defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4733 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 4734def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 4735 NEONvcge, 0>; 4736def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 4737 NEONvcge, 0>; 4738def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, 4739 NEONvcge, 0>, 4740 Requires<[HasNEON, HasFullFP16]>; 4741def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, 4742 NEONvcge, 0>, 4743 Requires<[HasNEON, HasFullFP16]>; 4744 4745let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4746defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 4747 "$Vd, $Vm, #0", NEONvcgez>; 4748defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 4749 "$Vd, $Vm, #0", NEONvclez>; 4750} 4751 4752// VCGT : Vector Compare Greater Than 4753defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4754 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 4755defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4756 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 4757def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 4758 NEONvcgt, 0>; 4759def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 4760 NEONvcgt, 0>; 4761def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, 4762 NEONvcgt, 0>, 4763 Requires<[HasNEON, HasFullFP16]>; 4764def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, 4765 NEONvcgt, 0>, 4766 Requires<[HasNEON, HasFullFP16]>; 4767 4768let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4769defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 4770 "$Vd, $Vm, #0", NEONvcgtz>; 4771defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 4772 "$Vd, $Vm, #0", NEONvcltz>; 4773} 4774 4775// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 4776def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4777 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 4778def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4779 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 4780def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4781 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, 4782 Requires<[HasNEON, HasFullFP16]>; 4783def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4784 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, 4785 Requires<[HasNEON, HasFullFP16]>; 4786// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 4787def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4788 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 4789def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4790 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 4791def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4792 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, 4793 Requires<[HasNEON, HasFullFP16]>; 4794def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4795 "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>, 4796 Requires<[HasNEON, HasFullFP16]>; 4797// VTST : Vector Test Bits 4798defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 4799 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 4800 4801def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4802 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4803def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4804 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4805def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4806 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4807def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4808 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4809let Predicates = [HasNEON, HasFullFP16] in { 4810def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 4811 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4812def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 4813 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4814def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 4815 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4816def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 4817 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4818} 4819 4820def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4821 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4822def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4823 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4824def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4825 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4826def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4827 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4828let Predicates = [HasNEON, HasFullFP16] in { 4829def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 4830 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4831def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 4832 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4833def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 4834 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4835def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 4836 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4837} 4838 4839// Vector Bitwise Operations. 4840 4841def vnotd : PatFrag<(ops node:$in), 4842 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 4843def vnotq : PatFrag<(ops node:$in), 4844 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 4845 4846 4847// VAND : Vector Bitwise AND 4848def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 4849 v2i32, v2i32, and, 1>; 4850def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 4851 v4i32, v4i32, and, 1>; 4852 4853// VEOR : Vector Bitwise Exclusive OR 4854def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 4855 v2i32, v2i32, xor, 1>; 4856def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 4857 v4i32, v4i32, xor, 1>; 4858 4859// VORR : Vector Bitwise OR 4860def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 4861 v2i32, v2i32, or, 1>; 4862def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 4863 v4i32, v4i32, or, 1>; 4864 4865def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 4866 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4867 IIC_VMOVImm, 4868 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4869 [(set DPR:$Vd, 4870 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4871 let Inst{9} = SIMM{9}; 4872} 4873 4874def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 4875 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4876 IIC_VMOVImm, 4877 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4878 [(set DPR:$Vd, 4879 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4880 let Inst{10-9} = SIMM{10-9}; 4881} 4882 4883def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 4884 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4885 IIC_VMOVImm, 4886 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4887 [(set QPR:$Vd, 4888 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4889 let Inst{9} = SIMM{9}; 4890} 4891 4892def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 4893 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4894 IIC_VMOVImm, 4895 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4896 [(set QPR:$Vd, 4897 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4898 let Inst{10-9} = SIMM{10-9}; 4899} 4900 4901 4902// VBIC : Vector Bitwise Bit Clear (AND NOT) 4903let TwoOperandAliasConstraint = "$Vn = $Vd" in { 4904def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4905 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4906 "vbic", "$Vd, $Vn, $Vm", "", 4907 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 4908 (vnotd DPR:$Vm))))]>; 4909def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4910 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4911 "vbic", "$Vd, $Vn, $Vm", "", 4912 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 4913 (vnotq QPR:$Vm))))]>; 4914} 4915 4916def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 4917 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4918 IIC_VMOVImm, 4919 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4920 [(set DPR:$Vd, 4921 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4922 let Inst{9} = SIMM{9}; 4923} 4924 4925def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 4926 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4927 IIC_VMOVImm, 4928 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4929 [(set DPR:$Vd, 4930 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4931 let Inst{10-9} = SIMM{10-9}; 4932} 4933 4934def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 4935 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4936 IIC_VMOVImm, 4937 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4938 [(set QPR:$Vd, 4939 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4940 let Inst{9} = SIMM{9}; 4941} 4942 4943def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 4944 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4945 IIC_VMOVImm, 4946 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4947 [(set QPR:$Vd, 4948 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4949 let Inst{10-9} = SIMM{10-9}; 4950} 4951 4952// VORN : Vector Bitwise OR NOT 4953def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 4954 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4955 "vorn", "$Vd, $Vn, $Vm", "", 4956 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 4957 (vnotd DPR:$Vm))))]>; 4958def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 4959 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4960 "vorn", "$Vd, $Vn, $Vm", "", 4961 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 4962 (vnotq QPR:$Vm))))]>; 4963 4964// VMVN : Vector Bitwise NOT (Immediate) 4965 4966let isReMaterializable = 1 in { 4967 4968def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4969 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4970 "vmvn", "i16", "$Vd, $SIMM", "", 4971 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4972 let Inst{9} = SIMM{9}; 4973} 4974 4975def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4976 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4977 "vmvn", "i16", "$Vd, $SIMM", "", 4978 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4979 let Inst{9} = SIMM{9}; 4980} 4981 4982def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4983 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4984 "vmvn", "i32", "$Vd, $SIMM", "", 4985 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4986 let Inst{11-8} = SIMM{11-8}; 4987} 4988 4989def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4990 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4991 "vmvn", "i32", "$Vd, $SIMM", "", 4992 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4993 let Inst{11-8} = SIMM{11-8}; 4994} 4995} 4996 4997// VMVN : Vector Bitwise NOT 4998def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4999 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 5000 "vmvn", "$Vd, $Vm", "", 5001 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 5002def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 5003 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 5004 "vmvn", "$Vd, $Vm", "", 5005 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 5006def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 5007def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 5008 5009// VBSL : Vector Bitwise Select 5010def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5011 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5012 N3RegFrm, IIC_VCNTiD, 5013 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5014 [(set DPR:$Vd, 5015 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 5016def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 5017 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 5018 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5019 Requires<[HasNEON]>; 5020def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 5021 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 5022 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5023 Requires<[HasNEON]>; 5024def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 5025 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 5026 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5027 Requires<[HasNEON]>; 5028def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 5029 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 5030 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5031 Requires<[HasNEON]>; 5032def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 5033 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 5034 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5035 Requires<[HasNEON]>; 5036 5037def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 5038 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5039 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 5040 Requires<[HasNEON]>; 5041 5042def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 5043 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5044 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 5045 Requires<[HasNEON]>; 5046 5047def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5048 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5049 N3RegFrm, IIC_VCNTiQ, 5050 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5051 [(set QPR:$Vd, 5052 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 5053 5054def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 5055 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 5056 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5057 Requires<[HasNEON]>; 5058def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 5059 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 5060 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5061 Requires<[HasNEON]>; 5062def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 5063 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 5064 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5065 Requires<[HasNEON]>; 5066def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 5067 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 5068 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5069 Requires<[HasNEON]>; 5070def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 5071 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 5072 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5073 Requires<[HasNEON]>; 5074 5075def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 5076 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5077 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 5078 Requires<[HasNEON]>; 5079def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 5080 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5081 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 5082 Requires<[HasNEON]>; 5083 5084// VBIF : Vector Bitwise Insert if False 5085// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 5086// FIXME: This instruction's encoding MAY NOT BE correct. 5087def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 5088 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5089 N3RegFrm, IIC_VBINiD, 5090 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5091 []>; 5092def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 5093 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5094 N3RegFrm, IIC_VBINiQ, 5095 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5096 []>; 5097 5098// VBIT : Vector Bitwise Insert if True 5099// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 5100// FIXME: This instruction's encoding MAY NOT BE correct. 5101def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 5102 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5103 N3RegFrm, IIC_VBINiD, 5104 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5105 []>; 5106def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 5107 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5108 N3RegFrm, IIC_VBINiQ, 5109 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5110 []>; 5111 5112// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 5113// for equivalent operations with different register constraints; it just 5114// inserts copies. 5115 5116// Vector Absolute Differences. 5117 5118// VABD : Vector Absolute Difference 5119defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 5120 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5121 "vabd", "s", int_arm_neon_vabds, 1>; 5122defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 5123 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5124 "vabd", "u", int_arm_neon_vabdu, 1>; 5125def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5126 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5127def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5128 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5129def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, 5130 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, 5131 Requires<[HasNEON, HasFullFP16]>; 5132def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5133 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, 5134 Requires<[HasNEON, HasFullFP16]>; 5135 5136// VABDL : Vector Absolute Difference Long (Q = | D - D |) 5137defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5138 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5139defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5140 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5141 5142def abd_shr : 5143 PatFrag<(ops node:$in1, node:$in2, node:$shift), 5144 (NEONvshrs (sub (zext node:$in1), 5145 (zext node:$in2)), (i32 $shift))>; 5146 5147def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))), 5148 (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)), 5149 (zext (v8i8 DPR:$opB))), 5150 (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))), 5151 (VABDLuv8i16 DPR:$opA, DPR:$opB)>; 5152 5153def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)), 5154 (v4i32 (add (sub (zext (v4i16 DPR:$opA)), 5155 (zext (v4i16 DPR:$opB))), 5156 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))), 5157 (VABDLuv4i32 DPR:$opA, DPR:$opB)>; 5158 5159def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), 5160 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), 5161 (zext (v2i32 DPR:$opB))), 5162 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), 5163 (VABDLuv2i64 DPR:$opA, DPR:$opB)>; 5164 5165// VABA : Vector Absolute Difference and Accumulate 5166defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5167 "vaba", "s", int_arm_neon_vabds, add>; 5168defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5169 "vaba", "u", int_arm_neon_vabdu, add>; 5170 5171// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5172defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5173 "vabal", "s", int_arm_neon_vabds, zext, add>; 5174defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5175 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5176 5177// Vector Maximum and Minimum. 5178 5179// VMAX : Vector Maximum 5180defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5181 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5182 "vmax", "s", smax, 1>; 5183defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5184 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5185 "vmax", "u", umax, 1>; 5186def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5187 "vmax", "f32", 5188 v2f32, v2f32, fmaxnan, 1>; 5189def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5190 "vmax", "f32", 5191 v4f32, v4f32, fmaxnan, 1>; 5192def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, 5193 "vmax", "f16", 5194 v4f16, v4f16, fmaxnan, 1>, 5195 Requires<[HasNEON, HasFullFP16]>; 5196def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5197 "vmax", "f16", 5198 v8f16, v8f16, fmaxnan, 1>, 5199 Requires<[HasNEON, HasFullFP16]>; 5200 5201// VMAXNM 5202let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5203 def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5204 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5205 v2f32, v2f32, fmaxnum, 1>, 5206 Requires<[HasV8, HasNEON]>; 5207 def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5208 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5209 v4f32, v4f32, fmaxnum, 1>, 5210 Requires<[HasV8, HasNEON]>; 5211 def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, 5212 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5213 v4f16, v4f16, fmaxnum, 1>, 5214 Requires<[HasV8, HasNEON, HasFullFP16]>; 5215 def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, 5216 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5217 v8f16, v8f16, fmaxnum, 1>, 5218 Requires<[HasV8, HasNEON, HasFullFP16]>; 5219} 5220 5221// VMIN : Vector Minimum 5222defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5223 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5224 "vmin", "s", smin, 1>; 5225defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5226 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5227 "vmin", "u", umin, 1>; 5228def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5229 "vmin", "f32", 5230 v2f32, v2f32, fminnan, 1>; 5231def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5232 "vmin", "f32", 5233 v4f32, v4f32, fminnan, 1>; 5234def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, 5235 "vmin", "f16", 5236 v4f16, v4f16, fminnan, 1>, 5237 Requires<[HasNEON, HasFullFP16]>; 5238def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5239 "vmin", "f16", 5240 v8f16, v8f16, fminnan, 1>, 5241 Requires<[HasNEON, HasFullFP16]>; 5242 5243// VMINNM 5244let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5245 def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5246 N3RegFrm, NoItinerary, "vminnm", "f32", 5247 v2f32, v2f32, fminnum, 1>, 5248 Requires<[HasV8, HasNEON]>; 5249 def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5250 N3RegFrm, NoItinerary, "vminnm", "f32", 5251 v4f32, v4f32, fminnum, 1>, 5252 Requires<[HasV8, HasNEON]>; 5253 def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, 5254 N3RegFrm, NoItinerary, "vminnm", "f16", 5255 v4f16, v4f16, fminnum, 1>, 5256 Requires<[HasV8, HasNEON, HasFullFP16]>; 5257 def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, 5258 N3RegFrm, NoItinerary, "vminnm", "f16", 5259 v8f16, v8f16, fminnum, 1>, 5260 Requires<[HasV8, HasNEON, HasFullFP16]>; 5261} 5262 5263// Vector Pairwise Operations. 5264 5265// VPADD : Vector Pairwise Add 5266def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5267 "vpadd", "i8", 5268 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5269def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5270 "vpadd", "i16", 5271 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5272def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5273 "vpadd", "i32", 5274 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5275def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5276 IIC_VPBIND, "vpadd", "f32", 5277 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5278def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, 5279 IIC_VPBIND, "vpadd", "f16", 5280 v4f16, v4f16, int_arm_neon_vpadd, 0>, 5281 Requires<[HasNEON, HasFullFP16]>; 5282 5283// VPADDL : Vector Pairwise Add Long 5284defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5285 int_arm_neon_vpaddls>; 5286defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5287 int_arm_neon_vpaddlu>; 5288 5289// VPADAL : Vector Pairwise Add and Accumulate Long 5290defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5291 int_arm_neon_vpadals>; 5292defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5293 int_arm_neon_vpadalu>; 5294 5295// VPMAX : Vector Pairwise Maximum 5296def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5297 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5298def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5299 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5300def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5301 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5302def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5303 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5304def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5305 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5306def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5307 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5308def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5309 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5310def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5311 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, 5312 Requires<[HasNEON, HasFullFP16]>; 5313 5314// VPMIN : Vector Pairwise Minimum 5315def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5316 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5317def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5318 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5319def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5320 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5321def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5322 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5323def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5324 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5325def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5326 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5327def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5328 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5329def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5330 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, 5331 Requires<[HasNEON, HasFullFP16]>; 5332 5333// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5334 5335// VRECPE : Vector Reciprocal Estimate 5336def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5337 IIC_VUNAD, "vrecpe", "u32", 5338 v2i32, v2i32, int_arm_neon_vrecpe>; 5339def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5340 IIC_VUNAQ, "vrecpe", "u32", 5341 v4i32, v4i32, int_arm_neon_vrecpe>; 5342def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5343 IIC_VUNAD, "vrecpe", "f32", 5344 v2f32, v2f32, int_arm_neon_vrecpe>; 5345def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5346 IIC_VUNAQ, "vrecpe", "f32", 5347 v4f32, v4f32, int_arm_neon_vrecpe>; 5348def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5349 IIC_VUNAD, "vrecpe", "f16", 5350 v4f16, v4f16, int_arm_neon_vrecpe>, 5351 Requires<[HasNEON, HasFullFP16]>; 5352def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5353 IIC_VUNAQ, "vrecpe", "f16", 5354 v8f16, v8f16, int_arm_neon_vrecpe>, 5355 Requires<[HasNEON, HasFullFP16]>; 5356 5357// VRECPS : Vector Reciprocal Step 5358def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5359 IIC_VRECSD, "vrecps", "f32", 5360 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5361def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5362 IIC_VRECSQ, "vrecps", "f32", 5363 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5364def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5365 IIC_VRECSD, "vrecps", "f16", 5366 v4f16, v4f16, int_arm_neon_vrecps, 1>, 5367 Requires<[HasNEON, HasFullFP16]>; 5368def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5369 IIC_VRECSQ, "vrecps", "f16", 5370 v8f16, v8f16, int_arm_neon_vrecps, 1>, 5371 Requires<[HasNEON, HasFullFP16]>; 5372 5373// VRSQRTE : Vector Reciprocal Square Root Estimate 5374def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5375 IIC_VUNAD, "vrsqrte", "u32", 5376 v2i32, v2i32, int_arm_neon_vrsqrte>; 5377def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5378 IIC_VUNAQ, "vrsqrte", "u32", 5379 v4i32, v4i32, int_arm_neon_vrsqrte>; 5380def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5381 IIC_VUNAD, "vrsqrte", "f32", 5382 v2f32, v2f32, int_arm_neon_vrsqrte>; 5383def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5384 IIC_VUNAQ, "vrsqrte", "f32", 5385 v4f32, v4f32, int_arm_neon_vrsqrte>; 5386def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5387 IIC_VUNAD, "vrsqrte", "f16", 5388 v4f16, v4f16, int_arm_neon_vrsqrte>, 5389 Requires<[HasNEON, HasFullFP16]>; 5390def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5391 IIC_VUNAQ, "vrsqrte", "f16", 5392 v8f16, v8f16, int_arm_neon_vrsqrte>, 5393 Requires<[HasNEON, HasFullFP16]>; 5394 5395// VRSQRTS : Vector Reciprocal Square Root Step 5396def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5397 IIC_VRECSD, "vrsqrts", "f32", 5398 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5399def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5400 IIC_VRECSQ, "vrsqrts", "f32", 5401 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5402def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5403 IIC_VRECSD, "vrsqrts", "f16", 5404 v4f16, v4f16, int_arm_neon_vrsqrts, 1>, 5405 Requires<[HasNEON, HasFullFP16]>; 5406def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5407 IIC_VRECSQ, "vrsqrts", "f16", 5408 v8f16, v8f16, int_arm_neon_vrsqrts, 1>, 5409 Requires<[HasNEON, HasFullFP16]>; 5410 5411// Vector Shifts. 5412 5413// VSHL : Vector Shift 5414defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5415 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5416 "vshl", "s", int_arm_neon_vshifts>; 5417defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5418 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5419 "vshl", "u", int_arm_neon_vshiftu>; 5420 5421// VSHL : Vector Shift Left (Immediate) 5422defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 5423 5424// VSHR : Vector Shift Right (Immediate) 5425defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 5426 NEONvshrs>; 5427defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 5428 NEONvshru>; 5429 5430// VSHLL : Vector Shift Left Long 5431defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 5432 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>; 5433defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 5434 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>; 5435 5436// VSHLL : Vector Shift Left Long (with maximum shift count) 5437class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 5438 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 5439 ValueType OpTy, Operand ImmTy> 5440 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 5441 ResTy, OpTy, ImmTy, null_frag> { 5442 let Inst{21-16} = op21_16; 5443 let DecoderMethod = "DecodeVSHLMaxInstruction"; 5444} 5445def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 5446 v8i16, v8i8, imm8>; 5447def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 5448 v4i32, v4i16, imm16>; 5449def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 5450 v2i64, v2i32, imm32>; 5451 5452def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))), 5453 (VSHLLi8 DPR:$Rn, 8)>; 5454def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))), 5455 (VSHLLi16 DPR:$Rn, 16)>; 5456def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))), 5457 (VSHLLi32 DPR:$Rn, 32)>; 5458def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))), 5459 (VSHLLi8 DPR:$Rn, 8)>; 5460def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))), 5461 (VSHLLi16 DPR:$Rn, 16)>; 5462def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))), 5463 (VSHLLi32 DPR:$Rn, 32)>; 5464 5465// VSHRN : Vector Shift Right and Narrow 5466defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 5467 PatFrag<(ops node:$Rn, node:$amt), 5468 (trunc (NEONvshrs node:$Rn, node:$amt))>>; 5469 5470def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))), 5471 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 5472def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))), 5473 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 5474def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))), 5475 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 5476 5477// VRSHL : Vector Rounding Shift 5478defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 5479 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5480 "vrshl", "s", int_arm_neon_vrshifts>; 5481defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 5482 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5483 "vrshl", "u", int_arm_neon_vrshiftu>; 5484// VRSHR : Vector Rounding Shift Right 5485defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 5486 NEONvrshrs>; 5487defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 5488 NEONvrshru>; 5489 5490// VRSHRN : Vector Rounding Shift Right and Narrow 5491defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 5492 NEONvrshrn>; 5493 5494// VQSHL : Vector Saturating Shift 5495defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 5496 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5497 "vqshl", "s", int_arm_neon_vqshifts>; 5498defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 5499 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5500 "vqshl", "u", int_arm_neon_vqshiftu>; 5501// VQSHL : Vector Saturating Shift Left (Immediate) 5502defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 5503defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 5504 5505// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 5506defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 5507 5508// VQSHRN : Vector Saturating Shift Right and Narrow 5509defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 5510 NEONvqshrns>; 5511defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 5512 NEONvqshrnu>; 5513 5514// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 5515defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 5516 NEONvqshrnsu>; 5517 5518// VQRSHL : Vector Saturating Rounding Shift 5519defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 5520 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5521 "vqrshl", "s", int_arm_neon_vqrshifts>; 5522defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 5523 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5524 "vqrshl", "u", int_arm_neon_vqrshiftu>; 5525 5526// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 5527defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 5528 NEONvqrshrns>; 5529defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 5530 NEONvqrshrnu>; 5531 5532// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 5533defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 5534 NEONvqrshrnsu>; 5535 5536// VSRA : Vector Shift Right and Accumulate 5537defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 5538defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 5539// VRSRA : Vector Rounding Shift Right and Accumulate 5540defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 5541defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 5542 5543// VSLI : Vector Shift Left and Insert 5544defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 5545 5546// VSRI : Vector Shift Right and Insert 5547defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 5548 5549// Vector Absolute and Saturating Absolute. 5550 5551// VABS : Vector Absolute Value 5552defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 5553 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 5554 int_arm_neon_vabs>; 5555def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5556 "vabs", "f32", 5557 v2f32, v2f32, fabs>; 5558def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5559 "vabs", "f32", 5560 v4f32, v4f32, fabs>; 5561def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 5562 "vabs", "f16", 5563 v4f16, v4f16, fabs>, 5564 Requires<[HasNEON, HasFullFP16]>; 5565def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 5566 "vabs", "f16", 5567 v8f16, v8f16, fabs>, 5568 Requires<[HasNEON, HasFullFP16]>; 5569 5570def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), 5571 (v2i32 (bitconvert (v8i8 (add DPR:$src, 5572 (NEONvshrs DPR:$src, (i32 7))))))), 5573 (VABSv8i8 DPR:$src)>; 5574def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), 5575 (v2i32 (bitconvert (v4i16 (add DPR:$src, 5576 (NEONvshrs DPR:$src, (i32 15))))))), 5577 (VABSv4i16 DPR:$src)>; 5578def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), 5579 (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), 5580 (VABSv2i32 DPR:$src)>; 5581def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), 5582 (v4i32 (bitconvert (v16i8 (add QPR:$src, 5583 (NEONvshrs QPR:$src, (i32 7))))))), 5584 (VABSv16i8 QPR:$src)>; 5585def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), 5586 (v4i32 (bitconvert (v8i16 (add QPR:$src, 5587 (NEONvshrs QPR:$src, (i32 15))))))), 5588 (VABSv8i16 QPR:$src)>; 5589def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), 5590 (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), 5591 (VABSv4i32 QPR:$src)>; 5592 5593// VQABS : Vector Saturating Absolute Value 5594defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 5595 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 5596 int_arm_neon_vqabs>; 5597 5598// Vector Negate. 5599 5600def vnegd : PatFrag<(ops node:$in), 5601 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 5602def vnegq : PatFrag<(ops node:$in), 5603 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 5604 5605class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5606 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 5607 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 5608 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 5609class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5610 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 5611 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 5612 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 5613 5614// VNEG : Vector Negate (integer) 5615def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 5616def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 5617def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 5618def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 5619def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 5620def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 5621 5622// VNEG : Vector Negate (floating-point) 5623def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 5624 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5625 "vneg", "f32", "$Vd, $Vm", "", 5626 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 5627def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 5628 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5629 "vneg", "f32", "$Vd, $Vm", "", 5630 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 5631def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, 5632 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5633 "vneg", "f16", "$Vd, $Vm", "", 5634 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, 5635 Requires<[HasNEON, HasFullFP16]>; 5636def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, 5637 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5638 "vneg", "f16", "$Vd, $Vm", "", 5639 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, 5640 Requires<[HasNEON, HasFullFP16]>; 5641 5642def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 5643def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 5644def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 5645def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 5646def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 5647def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 5648 5649// VQNEG : Vector Saturating Negate 5650defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 5651 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 5652 int_arm_neon_vqneg>; 5653 5654// Vector Bit Counting Operations. 5655 5656// VCLS : Vector Count Leading Sign Bits 5657defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 5658 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 5659 int_arm_neon_vcls>; 5660// VCLZ : Vector Count Leading Zeros 5661defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 5662 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 5663 ctlz>; 5664// VCNT : Vector Count One Bits 5665def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5666 IIC_VCNTiD, "vcnt", "8", 5667 v8i8, v8i8, ctpop>; 5668def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5669 IIC_VCNTiQ, "vcnt", "8", 5670 v16i8, v16i8, ctpop>; 5671 5672// Vector Swap 5673def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 5674 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 5675 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5676 []>; 5677def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 5678 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 5679 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5680 []>; 5681 5682// Vector Move Operations. 5683 5684// VMOV : Vector Move (Register) 5685def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5686 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 5687def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5688 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 5689 5690// VMOV : Vector Move (Immediate) 5691 5692// Although VMOVs are not strictly speaking cheap, they are as expensive 5693// as their copies counterpart (VORR), so we should prefer rematerialization 5694// over splitting when it applies. 5695let isReMaterializable = 1, isAsCheapAsAMove=1 in { 5696def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 5697 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5698 "vmov", "i8", "$Vd, $SIMM", "", 5699 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 5700def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 5701 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5702 "vmov", "i8", "$Vd, $SIMM", "", 5703 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 5704 5705def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 5706 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5707 "vmov", "i16", "$Vd, $SIMM", "", 5708 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 5709 let Inst{9} = SIMM{9}; 5710} 5711 5712def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 5713 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5714 "vmov", "i16", "$Vd, $SIMM", "", 5715 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 5716 let Inst{9} = SIMM{9}; 5717} 5718 5719def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 5720 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5721 "vmov", "i32", "$Vd, $SIMM", "", 5722 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 5723 let Inst{11-8} = SIMM{11-8}; 5724} 5725 5726def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 5727 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5728 "vmov", "i32", "$Vd, $SIMM", "", 5729 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 5730 let Inst{11-8} = SIMM{11-8}; 5731} 5732 5733def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 5734 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5735 "vmov", "i64", "$Vd, $SIMM", "", 5736 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 5737def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 5738 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5739 "vmov", "i64", "$Vd, $SIMM", "", 5740 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 5741 5742def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 5743 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5744 "vmov", "f32", "$Vd, $SIMM", "", 5745 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 5746def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 5747 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5748 "vmov", "f32", "$Vd, $SIMM", "", 5749 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 5750} // isReMaterializable, isAsCheapAsAMove 5751 5752// Add support for bytes replication feature, so it could be GAS compatible. 5753// E.g. instructions below: 5754// "vmov.i32 d0, 0xffffffff" 5755// "vmov.i32 d0, 0xabababab" 5756// "vmov.i16 d0, 0xabab" 5757// are incorrect, but we could deal with such cases. 5758// For last two instructions, for example, it should emit: 5759// "vmov.i8 d0, 0xab" 5760def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", 5761 (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; 5762def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", 5763 (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; 5764def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", 5765 (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; 5766def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", 5767 (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; 5768 5769// Also add same support for VMVN instructions. So instruction: 5770// "vmvn.i32 d0, 0xabababab" 5771// actually means: 5772// "vmov.i8 d0, 0x54" 5773def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", 5774 (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; 5775def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", 5776 (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; 5777def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", 5778 (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; 5779def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", 5780 (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; 5781 5782// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 5783// require zero cycles to execute so they should be used wherever possible for 5784// setting a register to zero. 5785 5786// Even without these pseudo-insts we would probably end up with the correct 5787// instruction, but we could not mark the general ones with "isAsCheapAsAMove" 5788// since they are sometimes rather expensive (in general). 5789 5790let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 5791 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 5792 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], 5793 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 5794 Requires<[HasZCZ]>; 5795 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 5796 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], 5797 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 5798 Requires<[HasZCZ]>; 5799} 5800 5801// VMOV : Vector Get Lane (move scalar to ARM core register) 5802 5803def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 5804 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5805 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 5806 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 5807 imm:$lane))]> { 5808 let Inst{21} = lane{2}; 5809 let Inst{6-5} = lane{1-0}; 5810} 5811def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 5812 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5813 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 5814 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 5815 imm:$lane))]> { 5816 let Inst{21} = lane{1}; 5817 let Inst{6} = lane{0}; 5818} 5819def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 5820 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5821 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 5822 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 5823 imm:$lane))]> { 5824 let Inst{21} = lane{2}; 5825 let Inst{6-5} = lane{1-0}; 5826} 5827def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 5828 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5829 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 5830 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 5831 imm:$lane))]> { 5832 let Inst{21} = lane{1}; 5833 let Inst{6} = lane{0}; 5834} 5835def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 5836 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 5837 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 5838 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 5839 imm:$lane))]>, 5840 Requires<[HasVFP2, HasFastVGETLNi32]> { 5841 let Inst{21} = lane{0}; 5842} 5843// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 5844def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 5845 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5846 (DSubReg_i8_reg imm:$lane))), 5847 (SubReg_i8_lane imm:$lane))>; 5848def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 5849 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5850 (DSubReg_i16_reg imm:$lane))), 5851 (SubReg_i16_lane imm:$lane))>; 5852def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 5853 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5854 (DSubReg_i8_reg imm:$lane))), 5855 (SubReg_i8_lane imm:$lane))>; 5856def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 5857 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5858 (DSubReg_i16_reg imm:$lane))), 5859 (SubReg_i16_lane imm:$lane))>; 5860def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5861 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 5862 (DSubReg_i32_reg imm:$lane))), 5863 (SubReg_i32_lane imm:$lane))>, 5864 Requires<[HasNEON, HasFastVGETLNi32]>; 5865def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 5866 (COPY_TO_REGCLASS 5867 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5868 Requires<[HasNEON, HasSlowVGETLNi32]>; 5869def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5870 (COPY_TO_REGCLASS 5871 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5872 Requires<[HasNEON, HasSlowVGETLNi32]>; 5873def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 5874 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 5875 (SSubReg_f32_reg imm:$src2))>; 5876def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 5877 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 5878 (SSubReg_f32_reg imm:$src2))>; 5879//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 5880// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5881def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 5882 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5883 5884 5885// VMOV : Vector Set Lane (move ARM core register to scalar) 5886 5887let Constraints = "$src1 = $V" in { 5888def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 5889 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 5890 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 5891 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 5892 GPR:$R, imm:$lane))]> { 5893 let Inst{21} = lane{2}; 5894 let Inst{6-5} = lane{1-0}; 5895} 5896def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 5897 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 5898 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 5899 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 5900 GPR:$R, imm:$lane))]> { 5901 let Inst{21} = lane{1}; 5902 let Inst{6} = lane{0}; 5903} 5904def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 5905 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 5906 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 5907 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 5908 GPR:$R, imm:$lane))]>, 5909 Requires<[HasVFP2]> { 5910 let Inst{21} = lane{0}; 5911 // This instruction is equivalent as 5912 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 5913 let isInsertSubreg = 1; 5914} 5915} 5916def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 5917 (v16i8 (INSERT_SUBREG QPR:$src1, 5918 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 5919 (DSubReg_i8_reg imm:$lane))), 5920 GPR:$src2, (SubReg_i8_lane imm:$lane))), 5921 (DSubReg_i8_reg imm:$lane)))>; 5922def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 5923 (v8i16 (INSERT_SUBREG QPR:$src1, 5924 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 5925 (DSubReg_i16_reg imm:$lane))), 5926 GPR:$src2, (SubReg_i16_lane imm:$lane))), 5927 (DSubReg_i16_reg imm:$lane)))>; 5928def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 5929 (v4i32 (INSERT_SUBREG QPR:$src1, 5930 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 5931 (DSubReg_i32_reg imm:$lane))), 5932 GPR:$src2, (SubReg_i32_lane imm:$lane))), 5933 (DSubReg_i32_reg imm:$lane)))>; 5934 5935def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 5936 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 5937 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5938def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 5939 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 5940 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5941 5942//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5943// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5944def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5945 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5946 5947def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 5948 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5949def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 5950 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 5951def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 5952 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5953 5954def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 5955 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5956def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 5957 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5958def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 5959 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5960 5961def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 5962 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 5963 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5964 dsub_0)>; 5965def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 5966 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 5967 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5968 dsub_0)>; 5969def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 5970 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5971 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5972 dsub_0)>; 5973 5974// VDUP : Vector Duplicate (from ARM core register to all elements) 5975 5976class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5977 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 5978 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5979 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5980class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5981 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 5982 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5983 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5984 5985def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 5986def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 5987def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 5988 Requires<[HasNEON, HasFastVDUP32]>; 5989def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 5990def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 5991def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 5992 5993// NEONvdup patterns for uarchs with fast VDUP.32. 5994def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 5995 Requires<[HasNEON,HasFastVDUP32]>; 5996def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 5997 5998// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 5999def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 6000 Requires<[HasNEON,HasSlowVDUP32]>; 6001def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 6002 Requires<[HasNEON,HasSlowVDUP32]>; 6003 6004// VDUP : Vector Duplicate Lane (from scalar to all elements) 6005 6006class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 6007 ValueType Ty, Operand IdxTy> 6008 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6009 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 6010 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 6011 6012class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 6013 ValueType ResTy, ValueType OpTy, Operand IdxTy> 6014 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6015 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 6016 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 6017 VectorIndex32:$lane)))]>; 6018 6019// Inst{19-16} is partially specified depending on the element size. 6020 6021def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 6022 bits<3> lane; 6023 let Inst{19-17} = lane{2-0}; 6024} 6025def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 6026 bits<2> lane; 6027 let Inst{19-18} = lane{1-0}; 6028} 6029def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 6030 bits<1> lane; 6031 let Inst{19} = lane{0}; 6032} 6033def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 6034 bits<3> lane; 6035 let Inst{19-17} = lane{2-0}; 6036} 6037def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 6038 bits<2> lane; 6039 let Inst{19-18} = lane{1-0}; 6040} 6041def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 6042 bits<1> lane; 6043 let Inst{19} = lane{0}; 6044} 6045 6046def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 6047 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6048 6049def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 6050 (VDUPLN32q DPR:$Vm, imm:$lane)>; 6051 6052def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 6053 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 6054 (DSubReg_i8_reg imm:$lane))), 6055 (SubReg_i8_lane imm:$lane)))>; 6056def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 6057 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 6058 (DSubReg_i16_reg imm:$lane))), 6059 (SubReg_i16_lane imm:$lane)))>; 6060def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 6061 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 6062 (DSubReg_i32_reg imm:$lane))), 6063 (SubReg_i32_lane imm:$lane)))>; 6064def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 6065 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 6066 (DSubReg_i32_reg imm:$lane))), 6067 (SubReg_i32_lane imm:$lane)))>; 6068 6069def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))), 6070 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6071 SPR:$src, ssub_0), (i32 0)))>; 6072def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))), 6073 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6074 SPR:$src, ssub_0), (i32 0)))>; 6075 6076// VMOVN : Vector Narrowing Move 6077defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 6078 "vmovn", "i", trunc>; 6079// VQMOVN : Vector Saturating Narrowing Move 6080defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 6081 "vqmovn", "s", int_arm_neon_vqmovns>; 6082defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 6083 "vqmovn", "u", int_arm_neon_vqmovnu>; 6084defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 6085 "vqmovun", "s", int_arm_neon_vqmovnsu>; 6086// VMOVL : Vector Lengthening Move 6087defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 6088defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 6089def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 6090def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 6091def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 6092 6093// Vector Conversions. 6094 6095// VCVT : Vector Convert Between Floating-Point and Integers 6096def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6097 v2i32, v2f32, fp_to_sint>; 6098def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6099 v2i32, v2f32, fp_to_uint>; 6100def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6101 v2f32, v2i32, sint_to_fp>; 6102def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6103 v2f32, v2i32, uint_to_fp>; 6104 6105def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6106 v4i32, v4f32, fp_to_sint>; 6107def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6108 v4i32, v4f32, fp_to_uint>; 6109def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6110 v4f32, v4i32, sint_to_fp>; 6111def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6112 v4f32, v4i32, uint_to_fp>; 6113 6114def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6115 v4i16, v4f16, fp_to_sint>, 6116 Requires<[HasNEON, HasFullFP16]>; 6117def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6118 v4i16, v4f16, fp_to_uint>, 6119 Requires<[HasNEON, HasFullFP16]>; 6120def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6121 v4f16, v4i16, sint_to_fp>, 6122 Requires<[HasNEON, HasFullFP16]>; 6123def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6124 v4f16, v4i16, uint_to_fp>, 6125 Requires<[HasNEON, HasFullFP16]>; 6126 6127def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6128 v8i16, v8f16, fp_to_sint>, 6129 Requires<[HasNEON, HasFullFP16]>; 6130def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6131 v8i16, v8f16, fp_to_uint>, 6132 Requires<[HasNEON, HasFullFP16]>; 6133def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6134 v8f16, v8i16, sint_to_fp>, 6135 Requires<[HasNEON, HasFullFP16]>; 6136def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6137 v8f16, v8i16, uint_to_fp>, 6138 Requires<[HasNEON, HasFullFP16]>; 6139 6140// VCVT{A, N, P, M} 6141multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 6142 SDPatternOperator IntU> { 6143 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6144 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6145 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 6146 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6147 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 6148 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6149 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 6150 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6151 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 6152 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6153 "s16.f16", v4i16, v4f16, IntS>, 6154 Requires<[HasV8, HasNEON, HasFullFP16]>; 6155 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6156 "s16.f16", v8i16, v8f16, IntS>, 6157 Requires<[HasV8, HasNEON, HasFullFP16]>; 6158 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6159 "u16.f16", v4i16, v4f16, IntU>, 6160 Requires<[HasV8, HasNEON, HasFullFP16]>; 6161 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6162 "u16.f16", v8i16, v8f16, IntU>, 6163 Requires<[HasV8, HasNEON, HasFullFP16]>; 6164 } 6165} 6166 6167defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 6168defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 6169defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 6170defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 6171 6172// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 6173let DecoderMethod = "DecodeVCVTD" in { 6174def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6175 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 6176def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6177 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 6178def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6179 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 6180def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6181 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 6182let Predicates = [HasNEON, HasFullFP16] in { 6183def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6184 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; 6185def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6186 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; 6187def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6188 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; 6189def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6190 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; 6191} // Predicates = [HasNEON, HasFullFP16] 6192} 6193 6194let DecoderMethod = "DecodeVCVTQ" in { 6195def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6196 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 6197def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6198 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 6199def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6200 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 6201def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6202 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 6203let Predicates = [HasNEON, HasFullFP16] in { 6204def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6205 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; 6206def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6207 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; 6208def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6209 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; 6210def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6211 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; 6212} // Predicates = [HasNEON, HasFullFP16] 6213} 6214 6215def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 6216 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6217def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 6218 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6219def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 6220 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6221def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 6222 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6223 6224def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 6225 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6226def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 6227 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6228def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 6229 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6230def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 6231 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6232 6233def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", 6234 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6235def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", 6236 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6237def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", 6238 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6239def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", 6240 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6241 6242def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", 6243 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6244def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", 6245 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6246def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", 6247 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6248def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", 6249 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6250 6251 6252// VCVT : Vector Convert Between Half-Precision and Single-Precision. 6253def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 6254 IIC_VUNAQ, "vcvt", "f16.f32", 6255 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 6256 Requires<[HasNEON, HasFP16]>; 6257def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 6258 IIC_VUNAQ, "vcvt", "f32.f16", 6259 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 6260 Requires<[HasNEON, HasFP16]>; 6261 6262// Vector Reverse. 6263 6264// VREV64 : Vector Reverse elements within 64-bit doublewords 6265 6266class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6267 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 6268 (ins DPR:$Vm), IIC_VMOVD, 6269 OpcodeStr, Dt, "$Vd, $Vm", "", 6270 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 6271class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6272 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 6273 (ins QPR:$Vm), IIC_VMOVQ, 6274 OpcodeStr, Dt, "$Vd, $Vm", "", 6275 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 6276 6277def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 6278def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 6279def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 6280def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 6281 6282def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 6283def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 6284def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 6285def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 6286 6287// VREV32 : Vector Reverse elements within 32-bit words 6288 6289class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6290 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 6291 (ins DPR:$Vm), IIC_VMOVD, 6292 OpcodeStr, Dt, "$Vd, $Vm", "", 6293 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 6294class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6295 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 6296 (ins QPR:$Vm), IIC_VMOVQ, 6297 OpcodeStr, Dt, "$Vd, $Vm", "", 6298 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 6299 6300def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 6301def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 6302 6303def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 6304def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 6305 6306// VREV16 : Vector Reverse elements within 16-bit halfwords 6307 6308class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6309 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 6310 (ins DPR:$Vm), IIC_VMOVD, 6311 OpcodeStr, Dt, "$Vd, $Vm", "", 6312 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 6313class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6314 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 6315 (ins QPR:$Vm), IIC_VMOVQ, 6316 OpcodeStr, Dt, "$Vd, $Vm", "", 6317 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 6318 6319def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 6320def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 6321 6322// Other Vector Shuffles. 6323 6324// Aligned extractions: really just dropping registers 6325 6326class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 6327 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 6328 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 6329 6330def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 6331 6332def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 6333 6334def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 6335 6336def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 6337 6338def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 6339 6340 6341// VEXT : Vector Extract 6342 6343 6344// All of these have a two-operand InstAlias. 6345let TwoOperandAliasConstraint = "$Vn = $Vd" in { 6346class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6347 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 6348 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 6349 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6350 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 6351 (Ty DPR:$Vm), imm:$index)))]> { 6352 bits<3> index; 6353 let Inst{11} = 0b0; 6354 let Inst{10-8} = index{2-0}; 6355} 6356 6357class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6358 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 6359 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 6360 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6361 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 6362 (Ty QPR:$Vm), imm:$index)))]> { 6363 bits<4> index; 6364 let Inst{11-8} = index{3-0}; 6365} 6366} 6367 6368def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 6369 let Inst{10-8} = index{2-0}; 6370} 6371def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 6372 let Inst{10-9} = index{1-0}; 6373 let Inst{8} = 0b0; 6374} 6375def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 6376 let Inst{10} = index{0}; 6377 let Inst{9-8} = 0b00; 6378} 6379def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 6380 (v2f32 DPR:$Vm), 6381 (i32 imm:$index))), 6382 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 6383 6384def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 6385 let Inst{11-8} = index{3-0}; 6386} 6387def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 6388 let Inst{11-9} = index{2-0}; 6389 let Inst{8} = 0b0; 6390} 6391def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 6392 let Inst{11-10} = index{1-0}; 6393 let Inst{9-8} = 0b00; 6394} 6395def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 6396 let Inst{11} = index{0}; 6397 let Inst{10-8} = 0b000; 6398} 6399def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 6400 (v4f32 QPR:$Vm), 6401 (i32 imm:$index))), 6402 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 6403 6404// VTRN : Vector Transpose 6405 6406def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 6407def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 6408def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 6409 6410def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 6411def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 6412def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 6413 6414// VUZP : Vector Unzip (Deinterleave) 6415 6416def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 6417def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 6418// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6419def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 6420 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6421 6422def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 6423def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 6424def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 6425 6426// VZIP : Vector Zip (Interleave) 6427 6428def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 6429def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 6430// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6431def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 6432 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6433 6434def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 6435def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 6436def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 6437 6438// Vector Table Lookup and Table Extension. 6439 6440// VTBL : Vector Table Lookup 6441let DecoderMethod = "DecodeTBLInstruction" in { 6442def VTBL1 6443 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 6444 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 6445 "vtbl", "8", "$Vd, $Vn, $Vm", "", 6446 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 6447let hasExtraSrcRegAllocReq = 1 in { 6448def VTBL2 6449 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 6450 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 6451 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6452def VTBL3 6453 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 6454 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 6455 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6456def VTBL4 6457 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 6458 (ins VecListFourD:$Vn, DPR:$Vm), 6459 NVTBLFrm, IIC_VTB4, 6460 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6461} // hasExtraSrcRegAllocReq = 1 6462 6463def VTBL3Pseudo 6464 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 6465def VTBL4Pseudo 6466 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 6467 6468// VTBX : Vector Table Extension 6469def VTBX1 6470 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 6471 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 6472 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 6473 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 6474 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 6475let hasExtraSrcRegAllocReq = 1 in { 6476def VTBX2 6477 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 6478 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 6479 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 6480def VTBX3 6481 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 6482 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 6483 NVTBLFrm, IIC_VTBX3, 6484 "vtbx", "8", "$Vd, $Vn, $Vm", 6485 "$orig = $Vd", []>; 6486def VTBX4 6487 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 6488 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 6489 "vtbx", "8", "$Vd, $Vn, $Vm", 6490 "$orig = $Vd", []>; 6491} // hasExtraSrcRegAllocReq = 1 6492 6493def VTBX3Pseudo 6494 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6495 IIC_VTBX3, "$orig = $dst", []>; 6496def VTBX4Pseudo 6497 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6498 IIC_VTBX4, "$orig = $dst", []>; 6499} // DecoderMethod = "DecodeTBLInstruction" 6500 6501// VRINT : Vector Rounding 6502multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 6503 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6504 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 6505 !strconcat("vrint", op), "f32", 6506 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 6507 let Inst{9-7} = op9_7; 6508 } 6509 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 6510 !strconcat("vrint", op), "f32", 6511 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 6512 let Inst{9-7} = op9_7; 6513 } 6514 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 6515 !strconcat("vrint", op), "f16", 6516 v4f16, v4f16, Int>, 6517 Requires<[HasV8, HasNEON, HasFullFP16]> { 6518 let Inst{9-7} = op9_7; 6519 } 6520 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 6521 !strconcat("vrint", op), "f16", 6522 v8f16, v8f16, Int>, 6523 Requires<[HasV8, HasNEON, HasFullFP16]> { 6524 let Inst{9-7} = op9_7; 6525 } 6526 } 6527 6528 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 6529 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>; 6530 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 6531 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; 6532 let Predicates = [HasNEON, HasFullFP16] in { 6533 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"), 6534 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; 6535 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"), 6536 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; 6537 } 6538} 6539 6540defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 6541defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 6542defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 6543defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 6544defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 6545defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 6546 6547// Cryptography instructions 6548let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 6549 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 6550 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 6551 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 6552 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 6553 Requires<[HasV8, HasCrypto]>; 6554 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 6555 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 6556 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 6557 Requires<[HasV8, HasCrypto]>; 6558 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 6559 SDPatternOperator Int> 6560 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 6561 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 6562 Requires<[HasV8, HasCrypto]>; 6563 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 6564 SDPatternOperator Int> 6565 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 6566 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 6567 Requires<[HasV8, HasCrypto]>; 6568 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 6569 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 6570 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, 6571 Requires<[HasV8, HasCrypto]>; 6572} 6573 6574def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 6575def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 6576def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 6577def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 6578 6579def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 6580def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 6581def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 6582def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 6583def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 6584def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 6585def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 6586def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 6587def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 6588def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 6589 6590def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 6591 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 6592 (SHA1H (SUBREG_TO_REG (i64 0), 6593 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 6594 ssub_0)), 6595 ssub_0)), GPR)>; 6596 6597def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6598 (SHA1C v4i32:$hash_abcd, 6599 (SUBREG_TO_REG (i64 0), 6600 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6601 ssub_0), 6602 v4i32:$wk)>; 6603 6604def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6605 (SHA1M v4i32:$hash_abcd, 6606 (SUBREG_TO_REG (i64 0), 6607 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6608 ssub_0), 6609 v4i32:$wk)>; 6610 6611def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6612 (SHA1P v4i32:$hash_abcd, 6613 (SUBREG_TO_REG (i64 0), 6614 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6615 ssub_0), 6616 v4i32:$wk)>; 6617 6618//===----------------------------------------------------------------------===// 6619// NEON instructions for single-precision FP math 6620//===----------------------------------------------------------------------===// 6621 6622class N2VSPat<SDNode OpNode, NeonI Inst> 6623 : NEONFPPat<(f32 (OpNode SPR:$a)), 6624 (EXTRACT_SUBREG 6625 (v2f32 (COPY_TO_REGCLASS (Inst 6626 (INSERT_SUBREG 6627 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6628 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 6629 6630class N3VSPat<SDNode OpNode, NeonI Inst> 6631 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 6632 (EXTRACT_SUBREG 6633 (v2f32 (COPY_TO_REGCLASS (Inst 6634 (INSERT_SUBREG 6635 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6636 SPR:$a, ssub_0), 6637 (INSERT_SUBREG 6638 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6639 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 6640 6641class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 6642 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 6643 (EXTRACT_SUBREG 6644 (v2f32 (COPY_TO_REGCLASS (Inst 6645 (INSERT_SUBREG 6646 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6647 SPR:$acc, ssub_0), 6648 (INSERT_SUBREG 6649 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6650 SPR:$a, ssub_0), 6651 (INSERT_SUBREG 6652 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6653 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 6654 6655class NVCVTIFPat<SDNode OpNode, NeonI Inst> 6656 : NEONFPPat<(f32 (OpNode GPR:$a)), 6657 (f32 (EXTRACT_SUBREG 6658 (v2f32 (Inst 6659 (INSERT_SUBREG 6660 (v2f32 (IMPLICIT_DEF)), 6661 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 6662 ssub_0))>; 6663class NVCVTFIPat<SDNode OpNode, NeonI Inst> 6664 : NEONFPPat<(i32 (OpNode SPR:$a)), 6665 (i32 (EXTRACT_SUBREG 6666 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6667 SPR:$a, ssub_0))), 6668 ssub_0))>; 6669 6670def : N3VSPat<fadd, VADDfd>; 6671def : N3VSPat<fsub, VSUBfd>; 6672def : N3VSPat<fmul, VMULfd>; 6673def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 6674 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 6675def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 6676 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 6677def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 6678 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 6679def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 6680 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 6681def : N2VSPat<fabs, VABSfd>; 6682def : N2VSPat<fneg, VNEGfd>; 6683def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>; 6684def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>; 6685def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 6686def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 6687def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 6688def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 6689 6690// NEON doesn't have any f64 conversions, so provide patterns to make 6691// sure the VFP conversions match when extracting from a vector. 6692def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 6693 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6694def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 6695 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6696def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 6697 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6698def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 6699 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6700 6701 6702// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 6703def : Pat<(f32 (bitconvert GPR:$a)), 6704 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 6705 Requires<[HasNEON, DontUseVMOVSR]>; 6706 6707//===----------------------------------------------------------------------===// 6708// Non-Instruction Patterns 6709//===----------------------------------------------------------------------===// 6710 6711// bit_convert 6712let Predicates = [IsLE] in { 6713 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 6714 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 6715 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 6716} 6717def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 6718let Predicates = [IsLE] in { 6719 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 6720 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 6721 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 6722 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 6723 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 6724} 6725def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 6726let Predicates = [IsLE] in { 6727 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 6728 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 6729 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 6730 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 6731 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 6732 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 6733 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 6734 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 6735 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 6736 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 6737} 6738def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 6739let Predicates = [IsLE] in { 6740 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 6741 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 6742 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 6743 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 6744 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 6745 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 6746} 6747def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 6748let Predicates = [IsLE] in { 6749 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 6750 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 6751} 6752 6753let Predicates = [IsLE] in { 6754 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 6755 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 6756 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 6757} 6758def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 6759let Predicates = [IsLE] in { 6760 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 6761 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 6762 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 6763 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 6764 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 6765} 6766def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 6767let Predicates = [IsLE] in { 6768 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 6769 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 6770 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 6771 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 6772 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 6773 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 6774 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 6775 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 6776 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 6777 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 6778 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 6779} 6780def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 6781let Predicates = [IsLE] in { 6782 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 6783 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 6784 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 6785} 6786def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 6787let Predicates = [IsLE] in { 6788 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 6789 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 6790 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 6791 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 6792} 6793 6794let Predicates = [IsBE] in { 6795 // 64 bit conversions 6796 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 6797 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 6798 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 6799 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 6800 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 6801 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 6802 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 6803 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 6804 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 6805 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 6806 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 6807 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 6808 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 6809 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 6810 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 6811 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 6812 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 6813 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 6814 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 6815 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 6816 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 6817 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 6818 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 6819 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 6820 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 6821 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 6822 6823 // 128 bit conversions 6824 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 6825 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 6826 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 6827 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 6828 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 6829 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 6830 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 6831 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 6832 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 6833 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 6834 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 6835 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 6836 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 6837 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 6838 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 6839 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 6840 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 6841 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 6842 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 6843 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 6844 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 6845 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 6846 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 6847 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 6848 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 6849 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 6850} 6851 6852// Fold extracting an element out of a v2i32 into a vfp register. 6853def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 6854 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6855 6856// Vector lengthening move with load, matching extending loads. 6857 6858// extload, zextload and sextload for a standard lengthening load. Example: 6859// Lengthen_Single<"8", "i16", "8"> = 6860// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 6861// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 6862// (f64 (IMPLICIT_DEF)), (i32 0)))>; 6863multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 6864 let AddedComplexity = 10 in { 6865 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6866 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 6867 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6868 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6869 6870 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6871 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 6872 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6873 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6874 6875 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6876 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 6877 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 6878 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6879 } 6880} 6881 6882// extload, zextload and sextload for a lengthening load which only uses 6883// half the lanes available. Example: 6884// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 6885// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 6886// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6887// (f64 (IMPLICIT_DEF)), (i32 0))), 6888// dsub_0)>; 6889multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 6890 string InsnLanes, string InsnTy> { 6891 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6892 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6893 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6894 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6895 dsub_0)>; 6896 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6897 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6898 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6899 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6900 dsub_0)>; 6901 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6902 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6903 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 6904 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6905 dsub_0)>; 6906} 6907 6908// The following class definition is basically a copy of the 6909// Lengthen_HalfSingle definition above, however with an additional parameter 6910// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 6911// data loaded by VLD1LN into proper vector format in big endian mode. 6912multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 6913 string InsnLanes, string InsnTy, string RevLanes> { 6914 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6915 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6916 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6917 (!cast<Instruction>("VREV32d" # RevLanes) 6918 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6919 dsub_0)>; 6920 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6921 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6922 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6923 (!cast<Instruction>("VREV32d" # RevLanes) 6924 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6925 dsub_0)>; 6926 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6927 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6928 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 6929 (!cast<Instruction>("VREV32d" # RevLanes) 6930 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6931 dsub_0)>; 6932} 6933 6934// extload, zextload and sextload for a lengthening load followed by another 6935// lengthening load, to quadruple the initial length. 6936// 6937// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 6938// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 6939// (EXTRACT_SUBREG (VMOVLuv4i32 6940// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6941// (f64 (IMPLICIT_DEF)), 6942// (i32 0))), 6943// dsub_0)), 6944// dsub_0)>; 6945multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 6946 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6947 string Insn2Ty> { 6948 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6949 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6950 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6951 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6952 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6953 dsub_0))>; 6954 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6955 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6956 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6957 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6958 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6959 dsub_0))>; 6960 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6961 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6962 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6963 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6964 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6965 dsub_0))>; 6966} 6967 6968// The following class definition is basically a copy of the 6969// Lengthen_Double definition above, however with an additional parameter 6970// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 6971// data loaded by VLD1LN into proper vector format in big endian mode. 6972multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 6973 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6974 string Insn2Ty, string RevLanes> { 6975 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6976 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6977 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6978 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6979 (!cast<Instruction>("VREV32d" # RevLanes) 6980 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6981 dsub_0))>; 6982 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6983 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6984 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6985 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6986 (!cast<Instruction>("VREV32d" # RevLanes) 6987 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6988 dsub_0))>; 6989 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6990 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6991 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6992 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6993 (!cast<Instruction>("VREV32d" # RevLanes) 6994 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6995 dsub_0))>; 6996} 6997 6998// extload, zextload and sextload for a lengthening load followed by another 6999// lengthening load, to quadruple the initial length, but which ends up only 7000// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 7001// 7002// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 7003// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 7004// (EXTRACT_SUBREG (VMOVLuv4i32 7005// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 7006// (f64 (IMPLICIT_DEF)), (i32 0))), 7007// dsub_0)), 7008// dsub_0)>; 7009multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 7010 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7011 string Insn2Ty> { 7012 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7013 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7014 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7015 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7016 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7017 dsub_0)), 7018 dsub_0)>; 7019 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7020 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7021 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7022 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7023 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7024 dsub_0)), 7025 dsub_0)>; 7026 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7027 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7028 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7029 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7030 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7031 dsub_0)), 7032 dsub_0)>; 7033} 7034 7035// The following class definition is basically a copy of the 7036// Lengthen_HalfDouble definition above, however with an additional VREV16d8 7037// instruction to convert data loaded by VLD1LN into proper vector format 7038// in big endian mode. 7039multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7040 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7041 string Insn2Ty> { 7042 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7043 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7044 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7045 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7046 (!cast<Instruction>("VREV16d8") 7047 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7048 dsub_0)), 7049 dsub_0)>; 7050 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7051 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7052 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7053 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7054 (!cast<Instruction>("VREV16d8") 7055 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7056 dsub_0)), 7057 dsub_0)>; 7058 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7059 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7060 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7061 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7062 (!cast<Instruction>("VREV16d8") 7063 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7064 dsub_0)), 7065 dsub_0)>; 7066} 7067 7068defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 7069defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 7070defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 7071 7072let Predicates = [IsLE] in { 7073 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 7074 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 7075 7076 // Double lengthening - v4i8 -> v4i16 -> v4i32 7077 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 7078 // v2i8 -> v2i16 -> v2i32 7079 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 7080 // v2i16 -> v2i32 -> v2i64 7081 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 7082} 7083 7084let Predicates = [IsBE] in { 7085 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 7086 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 7087 7088 // Double lengthening - v4i8 -> v4i16 -> v4i32 7089 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 7090 // v2i8 -> v2i16 -> v2i32 7091 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 7092 // v2i16 -> v2i32 -> v2i64 7093 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 7094} 7095 7096// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 7097let Predicates = [IsLE] in { 7098 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7099 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7100 (VLD1LNd16 addrmode6:$addr, 7101 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7102 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7103 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7104 (VLD1LNd16 addrmode6:$addr, 7105 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7106 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7107 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7108 (VLD1LNd16 addrmode6:$addr, 7109 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7110} 7111// The following patterns are basically a copy of the patterns above, 7112// however with an additional VREV16d instruction to convert data 7113// loaded by VLD1LN into proper vector format in big endian mode. 7114let Predicates = [IsBE] in { 7115 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7116 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7117 (!cast<Instruction>("VREV16d8") 7118 (VLD1LNd16 addrmode6:$addr, 7119 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7120 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7121 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7122 (!cast<Instruction>("VREV16d8") 7123 (VLD1LNd16 addrmode6:$addr, 7124 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7125 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7126 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7127 (!cast<Instruction>("VREV16d8") 7128 (VLD1LNd16 addrmode6:$addr, 7129 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7130} 7131 7132//===----------------------------------------------------------------------===// 7133// Assembler aliases 7134// 7135 7136def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 7137 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 7138def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 7139 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 7140 7141// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 7142defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 7143 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7144defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 7145 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7146defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 7147 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7148defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 7149 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7150defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 7151 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7152defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 7153 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7154defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 7155 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7156defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 7157 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7158// ... two-operand aliases 7159defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 7160 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7161defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 7162 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7163defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 7164 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7165defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 7166 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7167defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 7168 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7169defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 7170 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7171// ... immediates 7172def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 7173 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 7174def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 7175 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 7176def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 7177 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 7178def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 7179 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 7180 7181 7182// VLD1 single-lane pseudo-instructions. These need special handling for 7183// the lane index that an InstAlias can't handle, so we use these instead. 7184def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 7185 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7186 pred:$p)>; 7187def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 7188 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7189 pred:$p)>; 7190def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 7191 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7192 pred:$p)>; 7193 7194def VLD1LNdWB_fixed_Asm_8 : 7195 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 7196 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7197 pred:$p)>; 7198def VLD1LNdWB_fixed_Asm_16 : 7199 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 7200 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7201 pred:$p)>; 7202def VLD1LNdWB_fixed_Asm_32 : 7203 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 7204 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7205 pred:$p)>; 7206def VLD1LNdWB_register_Asm_8 : 7207 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 7208 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7209 rGPR:$Rm, pred:$p)>; 7210def VLD1LNdWB_register_Asm_16 : 7211 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 7212 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7213 rGPR:$Rm, pred:$p)>; 7214def VLD1LNdWB_register_Asm_32 : 7215 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 7216 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7217 rGPR:$Rm, pred:$p)>; 7218 7219 7220// VST1 single-lane pseudo-instructions. These need special handling for 7221// the lane index that an InstAlias can't handle, so we use these instead. 7222def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 7223 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7224 pred:$p)>; 7225def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 7226 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7227 pred:$p)>; 7228def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 7229 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7230 pred:$p)>; 7231 7232def VST1LNdWB_fixed_Asm_8 : 7233 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 7234 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7235 pred:$p)>; 7236def VST1LNdWB_fixed_Asm_16 : 7237 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 7238 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7239 pred:$p)>; 7240def VST1LNdWB_fixed_Asm_32 : 7241 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 7242 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7243 pred:$p)>; 7244def VST1LNdWB_register_Asm_8 : 7245 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 7246 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7247 rGPR:$Rm, pred:$p)>; 7248def VST1LNdWB_register_Asm_16 : 7249 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 7250 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7251 rGPR:$Rm, pred:$p)>; 7252def VST1LNdWB_register_Asm_32 : 7253 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 7254 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7255 rGPR:$Rm, pred:$p)>; 7256 7257// VLD2 single-lane pseudo-instructions. These need special handling for 7258// the lane index that an InstAlias can't handle, so we use these instead. 7259def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 7260 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7261 pred:$p)>; 7262def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 7263 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7264 pred:$p)>; 7265def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 7266 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 7267def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 7268 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7269 pred:$p)>; 7270def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 7271 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7272 pred:$p)>; 7273 7274def VLD2LNdWB_fixed_Asm_8 : 7275 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 7276 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7277 pred:$p)>; 7278def VLD2LNdWB_fixed_Asm_16 : 7279 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 7280 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7281 pred:$p)>; 7282def VLD2LNdWB_fixed_Asm_32 : 7283 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 7284 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7285 pred:$p)>; 7286def VLD2LNqWB_fixed_Asm_16 : 7287 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 7288 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7289 pred:$p)>; 7290def VLD2LNqWB_fixed_Asm_32 : 7291 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 7292 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7293 pred:$p)>; 7294def VLD2LNdWB_register_Asm_8 : 7295 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 7296 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7297 rGPR:$Rm, pred:$p)>; 7298def VLD2LNdWB_register_Asm_16 : 7299 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 7300 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7301 rGPR:$Rm, pred:$p)>; 7302def VLD2LNdWB_register_Asm_32 : 7303 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 7304 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7305 rGPR:$Rm, pred:$p)>; 7306def VLD2LNqWB_register_Asm_16 : 7307 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 7308 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7309 rGPR:$Rm, pred:$p)>; 7310def VLD2LNqWB_register_Asm_32 : 7311 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 7312 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7313 rGPR:$Rm, pred:$p)>; 7314 7315 7316// VST2 single-lane pseudo-instructions. These need special handling for 7317// the lane index that an InstAlias can't handle, so we use these instead. 7318def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 7319 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7320 pred:$p)>; 7321def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 7322 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7323 pred:$p)>; 7324def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 7325 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7326 pred:$p)>; 7327def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 7328 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7329 pred:$p)>; 7330def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 7331 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7332 pred:$p)>; 7333 7334def VST2LNdWB_fixed_Asm_8 : 7335 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 7336 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7337 pred:$p)>; 7338def VST2LNdWB_fixed_Asm_16 : 7339 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 7340 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7341 pred:$p)>; 7342def VST2LNdWB_fixed_Asm_32 : 7343 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 7344 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7345 pred:$p)>; 7346def VST2LNqWB_fixed_Asm_16 : 7347 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 7348 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7349 pred:$p)>; 7350def VST2LNqWB_fixed_Asm_32 : 7351 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 7352 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7353 pred:$p)>; 7354def VST2LNdWB_register_Asm_8 : 7355 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 7356 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7357 rGPR:$Rm, pred:$p)>; 7358def VST2LNdWB_register_Asm_16 : 7359 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 7360 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7361 rGPR:$Rm, pred:$p)>; 7362def VST2LNdWB_register_Asm_32 : 7363 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 7364 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7365 rGPR:$Rm, pred:$p)>; 7366def VST2LNqWB_register_Asm_16 : 7367 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 7368 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7369 rGPR:$Rm, pred:$p)>; 7370def VST2LNqWB_register_Asm_32 : 7371 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 7372 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7373 rGPR:$Rm, pred:$p)>; 7374 7375// VLD3 all-lanes pseudo-instructions. These need special handling for 7376// the lane index that an InstAlias can't handle, so we use these instead. 7377def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7378 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7379 pred:$p)>; 7380def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7381 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7382 pred:$p)>; 7383def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7384 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7385 pred:$p)>; 7386def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7387 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7388 pred:$p)>; 7389def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7390 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7391 pred:$p)>; 7392def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7393 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7394 pred:$p)>; 7395 7396def VLD3DUPdWB_fixed_Asm_8 : 7397 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7398 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7399 pred:$p)>; 7400def VLD3DUPdWB_fixed_Asm_16 : 7401 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7402 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7403 pred:$p)>; 7404def VLD3DUPdWB_fixed_Asm_32 : 7405 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7406 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7407 pred:$p)>; 7408def VLD3DUPqWB_fixed_Asm_8 : 7409 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7410 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7411 pred:$p)>; 7412def VLD3DUPqWB_fixed_Asm_16 : 7413 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7414 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7415 pred:$p)>; 7416def VLD3DUPqWB_fixed_Asm_32 : 7417 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7418 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7419 pred:$p)>; 7420def VLD3DUPdWB_register_Asm_8 : 7421 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7422 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7423 rGPR:$Rm, pred:$p)>; 7424def VLD3DUPdWB_register_Asm_16 : 7425 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7426 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7427 rGPR:$Rm, pred:$p)>; 7428def VLD3DUPdWB_register_Asm_32 : 7429 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7430 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7431 rGPR:$Rm, pred:$p)>; 7432def VLD3DUPqWB_register_Asm_8 : 7433 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7434 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7435 rGPR:$Rm, pred:$p)>; 7436def VLD3DUPqWB_register_Asm_16 : 7437 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7438 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7439 rGPR:$Rm, pred:$p)>; 7440def VLD3DUPqWB_register_Asm_32 : 7441 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7442 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7443 rGPR:$Rm, pred:$p)>; 7444 7445 7446// VLD3 single-lane pseudo-instructions. These need special handling for 7447// the lane index that an InstAlias can't handle, so we use these instead. 7448def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7449 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7450 pred:$p)>; 7451def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7452 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7453 pred:$p)>; 7454def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7455 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7456 pred:$p)>; 7457def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7458 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7459 pred:$p)>; 7460def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7461 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7462 pred:$p)>; 7463 7464def VLD3LNdWB_fixed_Asm_8 : 7465 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7466 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7467 pred:$p)>; 7468def VLD3LNdWB_fixed_Asm_16 : 7469 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7470 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7471 pred:$p)>; 7472def VLD3LNdWB_fixed_Asm_32 : 7473 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7474 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7475 pred:$p)>; 7476def VLD3LNqWB_fixed_Asm_16 : 7477 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7478 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7479 pred:$p)>; 7480def VLD3LNqWB_fixed_Asm_32 : 7481 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7482 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7483 pred:$p)>; 7484def VLD3LNdWB_register_Asm_8 : 7485 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7486 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7487 rGPR:$Rm, pred:$p)>; 7488def VLD3LNdWB_register_Asm_16 : 7489 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7490 (ins VecListThreeDHWordIndexed:$list, 7491 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7492def VLD3LNdWB_register_Asm_32 : 7493 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7494 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7495 rGPR:$Rm, pred:$p)>; 7496def VLD3LNqWB_register_Asm_16 : 7497 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7498 (ins VecListThreeQHWordIndexed:$list, 7499 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7500def VLD3LNqWB_register_Asm_32 : 7501 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7502 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7503 rGPR:$Rm, pred:$p)>; 7504 7505// VLD3 multiple structure pseudo-instructions. These need special handling for 7506// the vector operands that the normal instructions don't yet model. 7507// FIXME: Remove these when the register classes and instructions are updated. 7508def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7509 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7510def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7511 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7512def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7513 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7514def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7515 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7516def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7517 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7518def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7519 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7520 7521def VLD3dWB_fixed_Asm_8 : 7522 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7523 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7524def VLD3dWB_fixed_Asm_16 : 7525 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7526 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7527def VLD3dWB_fixed_Asm_32 : 7528 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7529 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7530def VLD3qWB_fixed_Asm_8 : 7531 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7532 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7533def VLD3qWB_fixed_Asm_16 : 7534 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7535 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7536def VLD3qWB_fixed_Asm_32 : 7537 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7538 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7539def VLD3dWB_register_Asm_8 : 7540 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7541 (ins VecListThreeD:$list, addrmode6align64:$addr, 7542 rGPR:$Rm, pred:$p)>; 7543def VLD3dWB_register_Asm_16 : 7544 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7545 (ins VecListThreeD:$list, addrmode6align64:$addr, 7546 rGPR:$Rm, pred:$p)>; 7547def VLD3dWB_register_Asm_32 : 7548 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7549 (ins VecListThreeD:$list, addrmode6align64:$addr, 7550 rGPR:$Rm, pred:$p)>; 7551def VLD3qWB_register_Asm_8 : 7552 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7553 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7554 rGPR:$Rm, pred:$p)>; 7555def VLD3qWB_register_Asm_16 : 7556 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7557 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7558 rGPR:$Rm, pred:$p)>; 7559def VLD3qWB_register_Asm_32 : 7560 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7561 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7562 rGPR:$Rm, pred:$p)>; 7563 7564// VST3 single-lane pseudo-instructions. These need special handling for 7565// the lane index that an InstAlias can't handle, so we use these instead. 7566def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7567 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7568 pred:$p)>; 7569def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7570 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7571 pred:$p)>; 7572def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7573 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7574 pred:$p)>; 7575def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7576 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7577 pred:$p)>; 7578def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7579 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7580 pred:$p)>; 7581 7582def VST3LNdWB_fixed_Asm_8 : 7583 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7584 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7585 pred:$p)>; 7586def VST3LNdWB_fixed_Asm_16 : 7587 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7588 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7589 pred:$p)>; 7590def VST3LNdWB_fixed_Asm_32 : 7591 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7592 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7593 pred:$p)>; 7594def VST3LNqWB_fixed_Asm_16 : 7595 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7596 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7597 pred:$p)>; 7598def VST3LNqWB_fixed_Asm_32 : 7599 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7600 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7601 pred:$p)>; 7602def VST3LNdWB_register_Asm_8 : 7603 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7604 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7605 rGPR:$Rm, pred:$p)>; 7606def VST3LNdWB_register_Asm_16 : 7607 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7608 (ins VecListThreeDHWordIndexed:$list, 7609 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7610def VST3LNdWB_register_Asm_32 : 7611 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7612 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7613 rGPR:$Rm, pred:$p)>; 7614def VST3LNqWB_register_Asm_16 : 7615 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7616 (ins VecListThreeQHWordIndexed:$list, 7617 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7618def VST3LNqWB_register_Asm_32 : 7619 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7620 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7621 rGPR:$Rm, pred:$p)>; 7622 7623 7624// VST3 multiple structure pseudo-instructions. These need special handling for 7625// the vector operands that the normal instructions don't yet model. 7626// FIXME: Remove these when the register classes and instructions are updated. 7627def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7628 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7629def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7630 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7631def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7632 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7633def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7634 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7635def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7636 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7637def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7638 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7639 7640def VST3dWB_fixed_Asm_8 : 7641 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7642 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7643def VST3dWB_fixed_Asm_16 : 7644 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7645 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7646def VST3dWB_fixed_Asm_32 : 7647 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7648 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7649def VST3qWB_fixed_Asm_8 : 7650 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7651 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7652def VST3qWB_fixed_Asm_16 : 7653 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7654 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7655def VST3qWB_fixed_Asm_32 : 7656 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7657 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7658def VST3dWB_register_Asm_8 : 7659 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7660 (ins VecListThreeD:$list, addrmode6align64:$addr, 7661 rGPR:$Rm, pred:$p)>; 7662def VST3dWB_register_Asm_16 : 7663 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7664 (ins VecListThreeD:$list, addrmode6align64:$addr, 7665 rGPR:$Rm, pred:$p)>; 7666def VST3dWB_register_Asm_32 : 7667 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7668 (ins VecListThreeD:$list, addrmode6align64:$addr, 7669 rGPR:$Rm, pred:$p)>; 7670def VST3qWB_register_Asm_8 : 7671 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7672 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7673 rGPR:$Rm, pred:$p)>; 7674def VST3qWB_register_Asm_16 : 7675 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7676 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7677 rGPR:$Rm, pred:$p)>; 7678def VST3qWB_register_Asm_32 : 7679 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7680 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7681 rGPR:$Rm, pred:$p)>; 7682 7683// VLD4 all-lanes pseudo-instructions. These need special handling for 7684// the lane index that an InstAlias can't handle, so we use these instead. 7685def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7686 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7687 pred:$p)>; 7688def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7689 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7690 pred:$p)>; 7691def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7692 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 7693 pred:$p)>; 7694def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7695 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7696 pred:$p)>; 7697def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7698 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7699 pred:$p)>; 7700def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7701 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 7702 pred:$p)>; 7703 7704def VLD4DUPdWB_fixed_Asm_8 : 7705 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7706 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7707 pred:$p)>; 7708def VLD4DUPdWB_fixed_Asm_16 : 7709 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7710 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7711 pred:$p)>; 7712def VLD4DUPdWB_fixed_Asm_32 : 7713 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7714 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 7715 pred:$p)>; 7716def VLD4DUPqWB_fixed_Asm_8 : 7717 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7718 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7719 pred:$p)>; 7720def VLD4DUPqWB_fixed_Asm_16 : 7721 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7722 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7723 pred:$p)>; 7724def VLD4DUPqWB_fixed_Asm_32 : 7725 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7726 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 7727 pred:$p)>; 7728def VLD4DUPdWB_register_Asm_8 : 7729 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7730 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7731 rGPR:$Rm, pred:$p)>; 7732def VLD4DUPdWB_register_Asm_16 : 7733 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7734 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7735 rGPR:$Rm, pred:$p)>; 7736def VLD4DUPdWB_register_Asm_32 : 7737 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7738 (ins VecListFourDAllLanes:$list, 7739 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 7740def VLD4DUPqWB_register_Asm_8 : 7741 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7742 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7743 rGPR:$Rm, pred:$p)>; 7744def VLD4DUPqWB_register_Asm_16 : 7745 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7746 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7747 rGPR:$Rm, pred:$p)>; 7748def VLD4DUPqWB_register_Asm_32 : 7749 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7750 (ins VecListFourQAllLanes:$list, 7751 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 7752 7753 7754// VLD4 single-lane pseudo-instructions. These need special handling for 7755// the lane index that an InstAlias can't handle, so we use these instead. 7756def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7757 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7758 pred:$p)>; 7759def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7760 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7761 pred:$p)>; 7762def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7763 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7764 pred:$p)>; 7765def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7766 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7767 pred:$p)>; 7768def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7769 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7770 pred:$p)>; 7771 7772def VLD4LNdWB_fixed_Asm_8 : 7773 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7774 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7775 pred:$p)>; 7776def VLD4LNdWB_fixed_Asm_16 : 7777 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7778 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7779 pred:$p)>; 7780def VLD4LNdWB_fixed_Asm_32 : 7781 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7782 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7783 pred:$p)>; 7784def VLD4LNqWB_fixed_Asm_16 : 7785 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7786 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7787 pred:$p)>; 7788def VLD4LNqWB_fixed_Asm_32 : 7789 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7790 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7791 pred:$p)>; 7792def VLD4LNdWB_register_Asm_8 : 7793 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7794 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7795 rGPR:$Rm, pred:$p)>; 7796def VLD4LNdWB_register_Asm_16 : 7797 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7798 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7799 rGPR:$Rm, pred:$p)>; 7800def VLD4LNdWB_register_Asm_32 : 7801 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7802 (ins VecListFourDWordIndexed:$list, 7803 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7804def VLD4LNqWB_register_Asm_16 : 7805 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7806 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7807 rGPR:$Rm, pred:$p)>; 7808def VLD4LNqWB_register_Asm_32 : 7809 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7810 (ins VecListFourQWordIndexed:$list, 7811 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7812 7813 7814 7815// VLD4 multiple structure pseudo-instructions. These need special handling for 7816// the vector operands that the normal instructions don't yet model. 7817// FIXME: Remove these when the register classes and instructions are updated. 7818def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7819 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7820 pred:$p)>; 7821def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7822 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7823 pred:$p)>; 7824def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7825 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7826 pred:$p)>; 7827def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7828 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7829 pred:$p)>; 7830def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7831 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7832 pred:$p)>; 7833def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7834 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7835 pred:$p)>; 7836 7837def VLD4dWB_fixed_Asm_8 : 7838 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7839 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7840 pred:$p)>; 7841def VLD4dWB_fixed_Asm_16 : 7842 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7843 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7844 pred:$p)>; 7845def VLD4dWB_fixed_Asm_32 : 7846 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7847 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7848 pred:$p)>; 7849def VLD4qWB_fixed_Asm_8 : 7850 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7851 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7852 pred:$p)>; 7853def VLD4qWB_fixed_Asm_16 : 7854 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7855 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7856 pred:$p)>; 7857def VLD4qWB_fixed_Asm_32 : 7858 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7859 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7860 pred:$p)>; 7861def VLD4dWB_register_Asm_8 : 7862 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7863 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7864 rGPR:$Rm, pred:$p)>; 7865def VLD4dWB_register_Asm_16 : 7866 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7867 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7868 rGPR:$Rm, pred:$p)>; 7869def VLD4dWB_register_Asm_32 : 7870 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7871 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7872 rGPR:$Rm, pred:$p)>; 7873def VLD4qWB_register_Asm_8 : 7874 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7875 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7876 rGPR:$Rm, pred:$p)>; 7877def VLD4qWB_register_Asm_16 : 7878 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7879 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7880 rGPR:$Rm, pred:$p)>; 7881def VLD4qWB_register_Asm_32 : 7882 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7883 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7884 rGPR:$Rm, pred:$p)>; 7885 7886// VST4 single-lane pseudo-instructions. These need special handling for 7887// the lane index that an InstAlias can't handle, so we use these instead. 7888def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7889 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7890 pred:$p)>; 7891def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7892 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7893 pred:$p)>; 7894def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7895 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7896 pred:$p)>; 7897def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7898 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7899 pred:$p)>; 7900def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7901 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7902 pred:$p)>; 7903 7904def VST4LNdWB_fixed_Asm_8 : 7905 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7906 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7907 pred:$p)>; 7908def VST4LNdWB_fixed_Asm_16 : 7909 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7910 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7911 pred:$p)>; 7912def VST4LNdWB_fixed_Asm_32 : 7913 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7914 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7915 pred:$p)>; 7916def VST4LNqWB_fixed_Asm_16 : 7917 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7918 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7919 pred:$p)>; 7920def VST4LNqWB_fixed_Asm_32 : 7921 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7922 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7923 pred:$p)>; 7924def VST4LNdWB_register_Asm_8 : 7925 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 7926 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7927 rGPR:$Rm, pred:$p)>; 7928def VST4LNdWB_register_Asm_16 : 7929 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7930 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7931 rGPR:$Rm, pred:$p)>; 7932def VST4LNdWB_register_Asm_32 : 7933 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7934 (ins VecListFourDWordIndexed:$list, 7935 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7936def VST4LNqWB_register_Asm_16 : 7937 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7938 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7939 rGPR:$Rm, pred:$p)>; 7940def VST4LNqWB_register_Asm_32 : 7941 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7942 (ins VecListFourQWordIndexed:$list, 7943 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7944 7945 7946// VST4 multiple structure pseudo-instructions. These need special handling for 7947// the vector operands that the normal instructions don't yet model. 7948// FIXME: Remove these when the register classes and instructions are updated. 7949def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7950 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7951 pred:$p)>; 7952def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7953 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7954 pred:$p)>; 7955def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7956 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7957 pred:$p)>; 7958def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7959 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7960 pred:$p)>; 7961def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7962 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7963 pred:$p)>; 7964def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7965 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7966 pred:$p)>; 7967 7968def VST4dWB_fixed_Asm_8 : 7969 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7970 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7971 pred:$p)>; 7972def VST4dWB_fixed_Asm_16 : 7973 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7974 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7975 pred:$p)>; 7976def VST4dWB_fixed_Asm_32 : 7977 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7978 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7979 pred:$p)>; 7980def VST4qWB_fixed_Asm_8 : 7981 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7982 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7983 pred:$p)>; 7984def VST4qWB_fixed_Asm_16 : 7985 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7986 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7987 pred:$p)>; 7988def VST4qWB_fixed_Asm_32 : 7989 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7990 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7991 pred:$p)>; 7992def VST4dWB_register_Asm_8 : 7993 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 7994 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7995 rGPR:$Rm, pred:$p)>; 7996def VST4dWB_register_Asm_16 : 7997 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7998 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7999 rGPR:$Rm, pred:$p)>; 8000def VST4dWB_register_Asm_32 : 8001 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8002 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8003 rGPR:$Rm, pred:$p)>; 8004def VST4qWB_register_Asm_8 : 8005 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8006 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8007 rGPR:$Rm, pred:$p)>; 8008def VST4qWB_register_Asm_16 : 8009 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8010 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8011 rGPR:$Rm, pred:$p)>; 8012def VST4qWB_register_Asm_32 : 8013 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8014 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8015 rGPR:$Rm, pred:$p)>; 8016 8017// VMOV/VMVN takes an optional datatype suffix 8018defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8019 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 8020defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8021 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 8022 8023defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8024 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 8025defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8026 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 8027 8028// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8029// D-register versions. 8030def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 8031 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8032def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 8033 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8034def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 8035 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8036def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 8037 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8038def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 8039 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8040def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 8041 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8042def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 8043 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8044let Predicates = [HasNEON, HasFullFP16] in 8045def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", 8046 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8047// Q-register versions. 8048def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 8049 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8050def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 8051 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8052def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 8053 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8054def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 8055 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8056def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 8057 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8058def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 8059 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8060def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 8061 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8062let Predicates = [HasNEON, HasFullFP16] in 8063def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", 8064 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8065 8066// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8067// D-register versions. 8068def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 8069 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8070def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 8071 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8072def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 8073 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8074def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 8075 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8076def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 8077 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8078def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 8079 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8080def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 8081 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8082let Predicates = [HasNEON, HasFullFP16] in 8083def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", 8084 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8085// Q-register versions. 8086def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 8087 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8088def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 8089 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8090def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 8091 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8092def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 8093 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8094def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 8095 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8096def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 8097 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8098def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 8099 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8100let Predicates = [HasNEON, HasFullFP16] in 8101def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", 8102 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8103 8104// VSWP allows, but does not require, a type suffix. 8105defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8106 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 8107defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8108 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 8109 8110// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 8111defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8112 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8113defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8114 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8115defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8116 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8117defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8118 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8119defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8120 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8121defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8122 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8123 8124// "vmov Rd, #-imm" can be handled via "vmvn". 8125def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8126 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8127def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8128 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8129def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8130 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8131def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8132 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8133 8134// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 8135// these should restrict to just the Q register variants, but the register 8136// classes are enough to match correctly regardless, so we keep it simple 8137// and just use MnemonicAlias. 8138def : NEONMnemonicAlias<"vbicq", "vbic">; 8139def : NEONMnemonicAlias<"vandq", "vand">; 8140def : NEONMnemonicAlias<"veorq", "veor">; 8141def : NEONMnemonicAlias<"vorrq", "vorr">; 8142 8143def : NEONMnemonicAlias<"vmovq", "vmov">; 8144def : NEONMnemonicAlias<"vmvnq", "vmvn">; 8145// Explicit versions for floating point so that the FPImm variants get 8146// handled early. The parser gets confused otherwise. 8147def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 8148def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 8149 8150def : NEONMnemonicAlias<"vaddq", "vadd">; 8151def : NEONMnemonicAlias<"vsubq", "vsub">; 8152 8153def : NEONMnemonicAlias<"vminq", "vmin">; 8154def : NEONMnemonicAlias<"vmaxq", "vmax">; 8155 8156def : NEONMnemonicAlias<"vmulq", "vmul">; 8157 8158def : NEONMnemonicAlias<"vabsq", "vabs">; 8159 8160def : NEONMnemonicAlias<"vshlq", "vshl">; 8161def : NEONMnemonicAlias<"vshrq", "vshr">; 8162 8163def : NEONMnemonicAlias<"vcvtq", "vcvt">; 8164 8165def : NEONMnemonicAlias<"vcleq", "vcle">; 8166def : NEONMnemonicAlias<"vceqq", "vceq">; 8167 8168def : NEONMnemonicAlias<"vzipq", "vzip">; 8169def : NEONMnemonicAlias<"vswpq", "vswp">; 8170 8171def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 8172def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 8173 8174 8175// Alias for loading floating point immediates that aren't representable 8176// using the vmov.f32 encoding but the bitpattern is representable using 8177// the .i32 encoding. 8178def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 8179 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 8180def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 8181 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 8182