1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package ppc64 6 7import ( 8 "cmd/compile/internal/base" 9 "cmd/compile/internal/ir" 10 "cmd/compile/internal/logopt" 11 "cmd/compile/internal/objw" 12 "cmd/compile/internal/ssa" 13 "cmd/compile/internal/ssagen" 14 "cmd/compile/internal/types" 15 "cmd/internal/obj" 16 "cmd/internal/obj/ppc64" 17 "internal/buildcfg" 18 "math" 19 "strings" 20) 21 22// ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags. 23func ssaMarkMoves(s *ssagen.State, b *ssa.Block) { 24 // flive := b.FlagsLiveAtEnd 25 // if b.Control != nil && b.Control.Type.IsFlags() { 26 // flive = true 27 // } 28 // for i := len(b.Values) - 1; i >= 0; i-- { 29 // v := b.Values[i] 30 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) { 31 // // The "mark" is any non-nil Aux value. 32 // v.Aux = v 33 // } 34 // if v.Type.IsFlags() { 35 // flive = false 36 // } 37 // for _, a := range v.Args { 38 // if a.Type.IsFlags() { 39 // flive = true 40 // } 41 // } 42 // } 43} 44 45// loadByType returns the load instruction of the given type. 46func loadByType(t *types.Type) obj.As { 47 if t.IsFloat() { 48 switch t.Size() { 49 case 4: 50 return ppc64.AFMOVS 51 case 8: 52 return ppc64.AFMOVD 53 } 54 } else { 55 switch t.Size() { 56 case 1: 57 if t.IsSigned() { 58 return ppc64.AMOVB 59 } else { 60 return ppc64.AMOVBZ 61 } 62 case 2: 63 if t.IsSigned() { 64 return ppc64.AMOVH 65 } else { 66 return ppc64.AMOVHZ 67 } 68 case 4: 69 if t.IsSigned() { 70 return ppc64.AMOVW 71 } else { 72 return ppc64.AMOVWZ 73 } 74 case 8: 75 return ppc64.AMOVD 76 } 77 } 78 panic("bad load type") 79} 80 81// storeByType returns the store instruction of the given type. 82func storeByType(t *types.Type) obj.As { 83 if t.IsFloat() { 84 switch t.Size() { 85 case 4: 86 return ppc64.AFMOVS 87 case 8: 88 return ppc64.AFMOVD 89 } 90 } else { 91 switch t.Size() { 92 case 1: 93 return ppc64.AMOVB 94 case 2: 95 return ppc64.AMOVH 96 case 4: 97 return ppc64.AMOVW 98 case 8: 99 return ppc64.AMOVD 100 } 101 } 102 panic("bad store type") 103} 104 105func ssaGenValue(s *ssagen.State, v *ssa.Value) { 106 switch v.Op { 107 case ssa.OpCopy: 108 t := v.Type 109 if t.IsMemory() { 110 return 111 } 112 x := v.Args[0].Reg() 113 y := v.Reg() 114 if x != y { 115 rt := obj.TYPE_REG 116 op := ppc64.AMOVD 117 118 if t.IsFloat() { 119 op = ppc64.AFMOVD 120 } 121 p := s.Prog(op) 122 p.From.Type = rt 123 p.From.Reg = x 124 p.To.Type = rt 125 p.To.Reg = y 126 } 127 128 case ssa.OpPPC64LoweredAtomicAnd8, 129 ssa.OpPPC64LoweredAtomicAnd32, 130 ssa.OpPPC64LoweredAtomicOr8, 131 ssa.OpPPC64LoweredAtomicOr32: 132 // LWSYNC 133 // LBAR/LWAR (Rarg0), Rtmp 134 // AND/OR Rarg1, Rtmp 135 // STBCCC/STWCCC Rtmp, (Rarg0) 136 // BNE -3(PC) 137 ld := ppc64.ALBAR 138 st := ppc64.ASTBCCC 139 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 { 140 ld = ppc64.ALWAR 141 st = ppc64.ASTWCCC 142 } 143 r0 := v.Args[0].Reg() 144 r1 := v.Args[1].Reg() 145 // LWSYNC - Assuming shared data not write-through-required nor 146 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 147 plwsync := s.Prog(ppc64.ALWSYNC) 148 plwsync.To.Type = obj.TYPE_NONE 149 // LBAR or LWAR 150 p := s.Prog(ld) 151 p.From.Type = obj.TYPE_MEM 152 p.From.Reg = r0 153 p.To.Type = obj.TYPE_REG 154 p.To.Reg = ppc64.REGTMP 155 // AND/OR reg1,out 156 p1 := s.Prog(v.Op.Asm()) 157 p1.From.Type = obj.TYPE_REG 158 p1.From.Reg = r1 159 p1.To.Type = obj.TYPE_REG 160 p1.To.Reg = ppc64.REGTMP 161 // STBCCC or STWCCC 162 p2 := s.Prog(st) 163 p2.From.Type = obj.TYPE_REG 164 p2.From.Reg = ppc64.REGTMP 165 p2.To.Type = obj.TYPE_MEM 166 p2.To.Reg = r0 167 p2.RegTo2 = ppc64.REGTMP 168 // BNE retry 169 p3 := s.Prog(ppc64.ABNE) 170 p3.To.Type = obj.TYPE_BRANCH 171 p3.To.SetTarget(p) 172 173 case ssa.OpPPC64LoweredAtomicAdd32, 174 ssa.OpPPC64LoweredAtomicAdd64: 175 // LWSYNC 176 // LDAR/LWAR (Rarg0), Rout 177 // ADD Rarg1, Rout 178 // STDCCC/STWCCC Rout, (Rarg0) 179 // BNE -3(PC) 180 // MOVW Rout,Rout (if Add32) 181 ld := ppc64.ALDAR 182 st := ppc64.ASTDCCC 183 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 184 ld = ppc64.ALWAR 185 st = ppc64.ASTWCCC 186 } 187 r0 := v.Args[0].Reg() 188 r1 := v.Args[1].Reg() 189 out := v.Reg0() 190 // LWSYNC - Assuming shared data not write-through-required nor 191 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 192 plwsync := s.Prog(ppc64.ALWSYNC) 193 plwsync.To.Type = obj.TYPE_NONE 194 // LDAR or LWAR 195 p := s.Prog(ld) 196 p.From.Type = obj.TYPE_MEM 197 p.From.Reg = r0 198 p.To.Type = obj.TYPE_REG 199 p.To.Reg = out 200 // ADD reg1,out 201 p1 := s.Prog(ppc64.AADD) 202 p1.From.Type = obj.TYPE_REG 203 p1.From.Reg = r1 204 p1.To.Reg = out 205 p1.To.Type = obj.TYPE_REG 206 // STDCCC or STWCCC 207 p3 := s.Prog(st) 208 p3.From.Type = obj.TYPE_REG 209 p3.From.Reg = out 210 p3.To.Type = obj.TYPE_MEM 211 p3.To.Reg = r0 212 // BNE retry 213 p4 := s.Prog(ppc64.ABNE) 214 p4.To.Type = obj.TYPE_BRANCH 215 p4.To.SetTarget(p) 216 217 // Ensure a 32 bit result 218 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 219 p5 := s.Prog(ppc64.AMOVWZ) 220 p5.To.Type = obj.TYPE_REG 221 p5.To.Reg = out 222 p5.From.Type = obj.TYPE_REG 223 p5.From.Reg = out 224 } 225 226 case ssa.OpPPC64LoweredAtomicExchange32, 227 ssa.OpPPC64LoweredAtomicExchange64: 228 // LWSYNC 229 // LDAR/LWAR (Rarg0), Rout 230 // STDCCC/STWCCC Rout, (Rarg0) 231 // BNE -2(PC) 232 // ISYNC 233 ld := ppc64.ALDAR 234 st := ppc64.ASTDCCC 235 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 { 236 ld = ppc64.ALWAR 237 st = ppc64.ASTWCCC 238 } 239 r0 := v.Args[0].Reg() 240 r1 := v.Args[1].Reg() 241 out := v.Reg0() 242 // LWSYNC - Assuming shared data not write-through-required nor 243 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 244 plwsync := s.Prog(ppc64.ALWSYNC) 245 plwsync.To.Type = obj.TYPE_NONE 246 // LDAR or LWAR 247 p := s.Prog(ld) 248 p.From.Type = obj.TYPE_MEM 249 p.From.Reg = r0 250 p.To.Type = obj.TYPE_REG 251 p.To.Reg = out 252 // STDCCC or STWCCC 253 p1 := s.Prog(st) 254 p1.From.Type = obj.TYPE_REG 255 p1.From.Reg = r1 256 p1.To.Type = obj.TYPE_MEM 257 p1.To.Reg = r0 258 // BNE retry 259 p2 := s.Prog(ppc64.ABNE) 260 p2.To.Type = obj.TYPE_BRANCH 261 p2.To.SetTarget(p) 262 // ISYNC 263 pisync := s.Prog(ppc64.AISYNC) 264 pisync.To.Type = obj.TYPE_NONE 265 266 case ssa.OpPPC64LoweredAtomicLoad8, 267 ssa.OpPPC64LoweredAtomicLoad32, 268 ssa.OpPPC64LoweredAtomicLoad64, 269 ssa.OpPPC64LoweredAtomicLoadPtr: 270 // SYNC 271 // MOVB/MOVD/MOVW (Rarg0), Rout 272 // CMP Rout,Rout 273 // BNE 1(PC) 274 // ISYNC 275 ld := ppc64.AMOVD 276 cmp := ppc64.ACMP 277 switch v.Op { 278 case ssa.OpPPC64LoweredAtomicLoad8: 279 ld = ppc64.AMOVBZ 280 case ssa.OpPPC64LoweredAtomicLoad32: 281 ld = ppc64.AMOVWZ 282 cmp = ppc64.ACMPW 283 } 284 arg0 := v.Args[0].Reg() 285 out := v.Reg0() 286 // SYNC when AuxInt == 1; otherwise, load-acquire 287 if v.AuxInt == 1 { 288 psync := s.Prog(ppc64.ASYNC) 289 psync.To.Type = obj.TYPE_NONE 290 } 291 // Load 292 p := s.Prog(ld) 293 p.From.Type = obj.TYPE_MEM 294 p.From.Reg = arg0 295 p.To.Type = obj.TYPE_REG 296 p.To.Reg = out 297 // CMP 298 p1 := s.Prog(cmp) 299 p1.From.Type = obj.TYPE_REG 300 p1.From.Reg = out 301 p1.To.Type = obj.TYPE_REG 302 p1.To.Reg = out 303 // BNE 304 p2 := s.Prog(ppc64.ABNE) 305 p2.To.Type = obj.TYPE_BRANCH 306 // ISYNC 307 pisync := s.Prog(ppc64.AISYNC) 308 pisync.To.Type = obj.TYPE_NONE 309 p2.To.SetTarget(pisync) 310 311 case ssa.OpPPC64LoweredAtomicStore8, 312 ssa.OpPPC64LoweredAtomicStore32, 313 ssa.OpPPC64LoweredAtomicStore64: 314 // SYNC or LWSYNC 315 // MOVB/MOVW/MOVD arg1,(arg0) 316 st := ppc64.AMOVD 317 switch v.Op { 318 case ssa.OpPPC64LoweredAtomicStore8: 319 st = ppc64.AMOVB 320 case ssa.OpPPC64LoweredAtomicStore32: 321 st = ppc64.AMOVW 322 } 323 arg0 := v.Args[0].Reg() 324 arg1 := v.Args[1].Reg() 325 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC 326 // SYNC 327 syncOp := ppc64.ASYNC 328 if v.AuxInt == 0 { 329 syncOp = ppc64.ALWSYNC 330 } 331 psync := s.Prog(syncOp) 332 psync.To.Type = obj.TYPE_NONE 333 // Store 334 p := s.Prog(st) 335 p.To.Type = obj.TYPE_MEM 336 p.To.Reg = arg0 337 p.From.Type = obj.TYPE_REG 338 p.From.Reg = arg1 339 340 case ssa.OpPPC64LoweredAtomicCas64, 341 ssa.OpPPC64LoweredAtomicCas32: 342 // MOVD $0, Rout 343 // LWSYNC 344 // loop: 345 // LDAR (Rarg0), MutexHint, Rtmp 346 // CMP Rarg1, Rtmp 347 // BNE end 348 // STDCCC Rarg2, (Rarg0) 349 // BNE loop 350 // MOVD $1, Rout 351 // end: 352 // LWSYNC // Only for sequential consistency; not required in CasRel. 353 ld := ppc64.ALDAR 354 st := ppc64.ASTDCCC 355 cmp := ppc64.ACMP 356 if v.Op == ssa.OpPPC64LoweredAtomicCas32 { 357 ld = ppc64.ALWAR 358 st = ppc64.ASTWCCC 359 cmp = ppc64.ACMPW 360 } 361 r0 := v.Args[0].Reg() 362 r1 := v.Args[1].Reg() 363 r2 := v.Args[2].Reg() 364 out := v.Reg0() 365 // Initialize return value to false 366 p := s.Prog(ppc64.AMOVD) 367 p.From.Type = obj.TYPE_CONST 368 p.From.Offset = 0 369 p.To.Type = obj.TYPE_REG 370 p.To.Reg = out 371 // LWSYNC - Assuming shared data not write-through-required nor 372 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 373 plwsync1 := s.Prog(ppc64.ALWSYNC) 374 plwsync1.To.Type = obj.TYPE_NONE 375 // LDAR or LWAR 376 p0 := s.Prog(ld) 377 p0.From.Type = obj.TYPE_MEM 378 p0.From.Reg = r0 379 p0.To.Type = obj.TYPE_REG 380 p0.To.Reg = ppc64.REGTMP 381 // If it is a Compare-and-Swap-Release operation, set the EH field with 382 // the release hint. 383 if v.AuxInt == 0 { 384 p0.AddRestSourceConst(0) 385 } 386 // CMP reg1,reg2 387 p1 := s.Prog(cmp) 388 p1.From.Type = obj.TYPE_REG 389 p1.From.Reg = r1 390 p1.To.Reg = ppc64.REGTMP 391 p1.To.Type = obj.TYPE_REG 392 // BNE done with return value = false 393 p2 := s.Prog(ppc64.ABNE) 394 p2.To.Type = obj.TYPE_BRANCH 395 // STDCCC or STWCCC 396 p3 := s.Prog(st) 397 p3.From.Type = obj.TYPE_REG 398 p3.From.Reg = r2 399 p3.To.Type = obj.TYPE_MEM 400 p3.To.Reg = r0 401 // BNE retry 402 p4 := s.Prog(ppc64.ABNE) 403 p4.To.Type = obj.TYPE_BRANCH 404 p4.To.SetTarget(p0) 405 // return value true 406 p5 := s.Prog(ppc64.AMOVD) 407 p5.From.Type = obj.TYPE_CONST 408 p5.From.Offset = 1 409 p5.To.Type = obj.TYPE_REG 410 p5.To.Reg = out 411 // LWSYNC - Assuming shared data not write-through-required nor 412 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b. 413 // If the operation is a CAS-Release, then synchronization is not necessary. 414 if v.AuxInt != 0 { 415 plwsync2 := s.Prog(ppc64.ALWSYNC) 416 plwsync2.To.Type = obj.TYPE_NONE 417 p2.To.SetTarget(plwsync2) 418 } else { 419 // done (label) 420 p6 := s.Prog(obj.ANOP) 421 p2.To.SetTarget(p6) 422 } 423 424 case ssa.OpPPC64LoweredPubBarrier: 425 // LWSYNC 426 s.Prog(v.Op.Asm()) 427 428 case ssa.OpPPC64LoweredGetClosurePtr: 429 // Closure pointer is R11 (already) 430 ssagen.CheckLoweredGetClosurePtr(v) 431 432 case ssa.OpPPC64LoweredGetCallerSP: 433 // caller's SP is FixedFrameSize below the address of the first arg 434 p := s.Prog(ppc64.AMOVD) 435 p.From.Type = obj.TYPE_ADDR 436 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize 437 p.From.Name = obj.NAME_PARAM 438 p.To.Type = obj.TYPE_REG 439 p.To.Reg = v.Reg() 440 441 case ssa.OpPPC64LoweredGetCallerPC: 442 p := s.Prog(obj.AGETCALLERPC) 443 p.To.Type = obj.TYPE_REG 444 p.To.Reg = v.Reg() 445 446 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F: 447 // input is already rounded 448 449 case ssa.OpLoadReg: 450 loadOp := loadByType(v.Type) 451 p := s.Prog(loadOp) 452 ssagen.AddrAuto(&p.From, v.Args[0]) 453 p.To.Type = obj.TYPE_REG 454 p.To.Reg = v.Reg() 455 456 case ssa.OpStoreReg: 457 storeOp := storeByType(v.Type) 458 p := s.Prog(storeOp) 459 p.From.Type = obj.TYPE_REG 460 p.From.Reg = v.Args[0].Reg() 461 ssagen.AddrAuto(&p.To, v) 462 463 case ssa.OpArgIntReg, ssa.OpArgFloatReg: 464 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill 465 // The loop only runs once. 466 for _, a := range v.Block.Func.RegArgs { 467 // Pass the spill/unspill information along to the assembler, offset by size of 468 // the saved LR slot. 469 addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize) 470 s.FuncInfo().AddSpill( 471 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)}) 472 } 473 v.Block.Func.RegArgs = nil 474 475 ssagen.CheckArgReg(v) 476 477 case ssa.OpPPC64DIVD: 478 // For now, 479 // 480 // cmp arg1, -1 481 // be ahead 482 // v = arg0 / arg1 483 // b over 484 // ahead: v = - arg0 485 // over: nop 486 r := v.Reg() 487 r0 := v.Args[0].Reg() 488 r1 := v.Args[1].Reg() 489 490 p := s.Prog(ppc64.ACMP) 491 p.From.Type = obj.TYPE_REG 492 p.From.Reg = r1 493 p.To.Type = obj.TYPE_CONST 494 p.To.Offset = -1 495 496 pbahead := s.Prog(ppc64.ABEQ) 497 pbahead.To.Type = obj.TYPE_BRANCH 498 499 p = s.Prog(v.Op.Asm()) 500 p.From.Type = obj.TYPE_REG 501 p.From.Reg = r1 502 p.Reg = r0 503 p.To.Type = obj.TYPE_REG 504 p.To.Reg = r 505 506 pbover := s.Prog(obj.AJMP) 507 pbover.To.Type = obj.TYPE_BRANCH 508 509 p = s.Prog(ppc64.ANEG) 510 p.To.Type = obj.TYPE_REG 511 p.To.Reg = r 512 p.From.Type = obj.TYPE_REG 513 p.From.Reg = r0 514 pbahead.To.SetTarget(p) 515 516 p = s.Prog(obj.ANOP) 517 pbover.To.SetTarget(p) 518 519 case ssa.OpPPC64DIVW: 520 // word-width version of above 521 r := v.Reg() 522 r0 := v.Args[0].Reg() 523 r1 := v.Args[1].Reg() 524 525 p := s.Prog(ppc64.ACMPW) 526 p.From.Type = obj.TYPE_REG 527 p.From.Reg = r1 528 p.To.Type = obj.TYPE_CONST 529 p.To.Offset = -1 530 531 pbahead := s.Prog(ppc64.ABEQ) 532 pbahead.To.Type = obj.TYPE_BRANCH 533 534 p = s.Prog(v.Op.Asm()) 535 p.From.Type = obj.TYPE_REG 536 p.From.Reg = r1 537 p.Reg = r0 538 p.To.Type = obj.TYPE_REG 539 p.To.Reg = r 540 541 pbover := s.Prog(obj.AJMP) 542 pbover.To.Type = obj.TYPE_BRANCH 543 544 p = s.Prog(ppc64.ANEG) 545 p.To.Type = obj.TYPE_REG 546 p.To.Reg = r 547 p.From.Type = obj.TYPE_REG 548 p.From.Reg = r0 549 pbahead.To.SetTarget(p) 550 551 p = s.Prog(obj.ANOP) 552 pbover.To.SetTarget(p) 553 554 case ssa.OpPPC64CLRLSLWI: 555 r := v.Reg() 556 r1 := v.Args[0].Reg() 557 shifts := v.AuxInt 558 p := s.Prog(v.Op.Asm()) 559 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA 560 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} 561 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts)) 562 p.Reg = r1 563 p.To.Type = obj.TYPE_REG 564 p.To.Reg = r 565 566 case ssa.OpPPC64CLRLSLDI: 567 r := v.Reg() 568 r1 := v.Args[0].Reg() 569 shifts := v.AuxInt 570 p := s.Prog(v.Op.Asm()) 571 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh 572 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} 573 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts)) 574 p.Reg = r1 575 p.To.Type = obj.TYPE_REG 576 p.To.Reg = r 577 578 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS, 579 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU, 580 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW, 581 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW, 582 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU, 583 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN, 584 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV, 585 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW, ssa.OpPPC64XSMINJDP, ssa.OpPPC64XSMAXJDP: 586 r := v.Reg() 587 r1 := v.Args[0].Reg() 588 r2 := v.Args[1].Reg() 589 p := s.Prog(v.Op.Asm()) 590 p.From.Type = obj.TYPE_REG 591 p.From.Reg = r2 592 p.Reg = r1 593 p.To.Type = obj.TYPE_REG 594 p.To.Reg = r 595 596 case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC, 597 ssa.OpPPC64ANDNCC: 598 r1 := v.Args[0].Reg() 599 r2 := v.Args[1].Reg() 600 p := s.Prog(v.Op.Asm()) 601 p.From.Type = obj.TYPE_REG 602 p.From.Reg = r2 603 p.Reg = r1 604 p.To.Type = obj.TYPE_REG 605 p.To.Reg = v.Reg0() 606 607 case ssa.OpPPC64NEGCC, ssa.OpPPC64CNTLZDCC: 608 p := s.Prog(v.Op.Asm()) 609 p.To.Type = obj.TYPE_REG 610 p.To.Reg = v.Reg0() 611 p.From.Type = obj.TYPE_REG 612 p.From.Reg = v.Args[0].Reg() 613 614 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst: 615 p := s.Prog(v.Op.Asm()) 616 p.From.Type = obj.TYPE_CONST 617 p.From.Offset = v.AuxInt 618 p.Reg = v.Args[0].Reg() 619 p.To.Type = obj.TYPE_REG 620 p.To.Reg = v.Reg() 621 622 // Auxint holds encoded rotate + mask 623 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI: 624 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt) 625 p := s.Prog(v.Op.Asm()) 626 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 627 p.Reg = v.Args[0].Reg() 628 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)} 629 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}}) 630 // Auxint holds mask 631 632 case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICLCC, ssa.OpPPC64RLDICR: 633 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt) 634 p := s.Prog(v.Op.Asm()) 635 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh} 636 switch v.Op { 637 case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICLCC: 638 p.AddRestSourceConst(mb) 639 case ssa.OpPPC64RLDICR: 640 p.AddRestSourceConst(me) 641 } 642 p.Reg = v.Args[0].Reg() 643 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.ResultReg()} 644 645 case ssa.OpPPC64RLWNM: 646 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt) 647 p := s.Prog(v.Op.Asm()) 648 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 649 p.Reg = v.Args[0].Reg() 650 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()} 651 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}}) 652 653 case ssa.OpPPC64MADDLD: 654 r := v.Reg() 655 r1 := v.Args[0].Reg() 656 r2 := v.Args[1].Reg() 657 r3 := v.Args[2].Reg() 658 // r = r1*r2 ± r3 659 p := s.Prog(v.Op.Asm()) 660 p.From.Type = obj.TYPE_REG 661 p.From.Reg = r1 662 p.Reg = r2 663 p.AddRestSourceReg(r3) 664 p.To.Type = obj.TYPE_REG 665 p.To.Reg = r 666 667 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS: 668 r := v.Reg() 669 r1 := v.Args[0].Reg() 670 r2 := v.Args[1].Reg() 671 r3 := v.Args[2].Reg() 672 // r = r1*r2 ± r3 673 p := s.Prog(v.Op.Asm()) 674 p.From.Type = obj.TYPE_REG 675 p.From.Reg = r1 676 p.Reg = r3 677 p.AddRestSourceReg(r2) 678 p.To.Type = obj.TYPE_REG 679 p.To.Reg = r 680 681 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, 682 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, 683 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, 684 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD: 685 r := v.Reg() 686 p := s.Prog(v.Op.Asm()) 687 p.To.Type = obj.TYPE_REG 688 p.To.Reg = r 689 p.From.Type = obj.TYPE_REG 690 p.From.Reg = v.Args[0].Reg() 691 692 case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, 693 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, 694 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst, 695 ssa.OpPPC64ANDconst: 696 p := s.Prog(v.Op.Asm()) 697 p.Reg = v.Args[0].Reg() 698 p.From.Type = obj.TYPE_CONST 699 p.From.Offset = v.AuxInt 700 p.To.Type = obj.TYPE_REG 701 p.To.Reg = v.Reg() 702 703 case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE: 704 r := v.Reg0() // CA is the first, implied argument. 705 r1 := v.Args[0].Reg() 706 r2 := v.Args[1].Reg() 707 p := s.Prog(v.Op.Asm()) 708 p.From.Type = obj.TYPE_REG 709 p.From.Reg = r2 710 p.Reg = r1 711 p.To.Type = obj.TYPE_REG 712 p.To.Reg = r 713 714 case ssa.OpPPC64ADDZE: 715 p := s.Prog(v.Op.Asm()) 716 p.From.Type = obj.TYPE_REG 717 p.From.Reg = v.Args[0].Reg() 718 p.To.Type = obj.TYPE_REG 719 p.To.Reg = v.Reg0() 720 721 case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero: 722 p := s.Prog(v.Op.Asm()) 723 p.From.Type = obj.TYPE_REG 724 p.From.Reg = ppc64.REG_R0 725 p.To.Type = obj.TYPE_REG 726 p.To.Reg = v.Reg() 727 728 case ssa.OpPPC64ADDCconst: 729 p := s.Prog(v.Op.Asm()) 730 p.Reg = v.Args[0].Reg() 731 p.From.Type = obj.TYPE_CONST 732 p.From.Offset = v.AuxInt 733 p.To.Type = obj.TYPE_REG 734 // Output is a pair, the second is the CA, which is implied. 735 p.To.Reg = v.Reg0() 736 737 case ssa.OpPPC64SUBCconst: 738 p := s.Prog(v.Op.Asm()) 739 p.AddRestSourceConst(v.AuxInt) 740 p.From.Type = obj.TYPE_REG 741 p.From.Reg = v.Args[0].Reg() 742 p.To.Type = obj.TYPE_REG 743 p.To.Reg = v.Reg0() 744 745 case ssa.OpPPC64SUBFCconst: 746 p := s.Prog(v.Op.Asm()) 747 p.AddRestSourceConst(v.AuxInt) 748 p.From.Type = obj.TYPE_REG 749 p.From.Reg = v.Args[0].Reg() 750 p.To.Type = obj.TYPE_REG 751 p.To.Reg = v.Reg() 752 753 case ssa.OpPPC64ADDCCconst, ssa.OpPPC64ANDCCconst: 754 p := s.Prog(v.Op.Asm()) 755 p.Reg = v.Args[0].Reg() 756 p.From.Type = obj.TYPE_CONST 757 p.From.Offset = v.AuxInt 758 p.To.Type = obj.TYPE_REG 759 p.To.Reg = v.Reg0() 760 761 case ssa.OpPPC64MOVDaddr: 762 switch v.Aux.(type) { 763 default: 764 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux) 765 case nil: 766 // If aux offset and aux int are both 0, and the same 767 // input and output regs are used, no instruction 768 // needs to be generated, since it would just be 769 // addi rx, rx, 0. 770 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() { 771 p := s.Prog(ppc64.AMOVD) 772 p.From.Type = obj.TYPE_ADDR 773 p.From.Reg = v.Args[0].Reg() 774 p.From.Offset = v.AuxInt 775 p.To.Type = obj.TYPE_REG 776 p.To.Reg = v.Reg() 777 } 778 779 case *obj.LSym, ir.Node: 780 p := s.Prog(ppc64.AMOVD) 781 p.From.Type = obj.TYPE_ADDR 782 p.From.Reg = v.Args[0].Reg() 783 p.To.Type = obj.TYPE_REG 784 p.To.Reg = v.Reg() 785 ssagen.AddAux(&p.From, v) 786 787 } 788 789 case ssa.OpPPC64MOVDconst: 790 p := s.Prog(v.Op.Asm()) 791 p.From.Type = obj.TYPE_CONST 792 p.From.Offset = v.AuxInt 793 p.To.Type = obj.TYPE_REG 794 p.To.Reg = v.Reg() 795 796 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst: 797 p := s.Prog(v.Op.Asm()) 798 p.From.Type = obj.TYPE_FCONST 799 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 800 p.To.Type = obj.TYPE_REG 801 p.To.Reg = v.Reg() 802 803 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU: 804 p := s.Prog(v.Op.Asm()) 805 p.From.Type = obj.TYPE_REG 806 p.From.Reg = v.Args[0].Reg() 807 p.To.Type = obj.TYPE_REG 808 p.To.Reg = v.Args[1].Reg() 809 810 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst: 811 p := s.Prog(v.Op.Asm()) 812 p.From.Type = obj.TYPE_REG 813 p.From.Reg = v.Args[0].Reg() 814 p.To.Type = obj.TYPE_CONST 815 p.To.Offset = v.AuxInt 816 817 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg: 818 // Shift in register to required size 819 p := s.Prog(v.Op.Asm()) 820 p.From.Type = obj.TYPE_REG 821 p.From.Reg = v.Args[0].Reg() 822 p.To.Reg = v.Reg() 823 p.To.Type = obj.TYPE_REG 824 825 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload: 826 827 // MOVDload and MOVWload are DS form instructions that are restricted to 828 // offsets that are a multiple of 4. If the offset is not a multiple of 4, 829 // then the address of the symbol to be loaded is computed (base + offset) 830 // and used as the new base register and the offset field in the instruction 831 // can be set to zero. 832 833 // This same problem can happen with gostrings since the final offset is not 834 // known yet, but could be unaligned after the relocation is resolved. 835 // So gostrings are handled the same way. 836 837 // This allows the MOVDload and MOVWload to be generated in more cases and 838 // eliminates some offset and alignment checking in the rules file. 839 840 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} 841 ssagen.AddAux(&fromAddr, v) 842 843 genAddr := false 844 845 switch fromAddr.Name { 846 case obj.NAME_EXTERN, obj.NAME_STATIC: 847 // Special case for a rule combines the bytes of gostring. 848 // The v alignment might seem OK, but we don't want to load it 849 // using an offset because relocation comes later. 850 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0 851 default: 852 genAddr = fromAddr.Offset%4 != 0 853 } 854 if genAddr { 855 // Load full address into the temp register. 856 p := s.Prog(ppc64.AMOVD) 857 p.From.Type = obj.TYPE_ADDR 858 p.From.Reg = v.Args[0].Reg() 859 ssagen.AddAux(&p.From, v) 860 // Load target using temp as base register 861 // and offset zero. Setting NAME_NONE 862 // prevents any extra offsets from being 863 // added. 864 p.To.Type = obj.TYPE_REG 865 p.To.Reg = ppc64.REGTMP 866 fromAddr.Reg = ppc64.REGTMP 867 // Clear the offset field and other 868 // information that might be used 869 // by the assembler to add to the 870 // final offset value. 871 fromAddr.Offset = 0 872 fromAddr.Name = obj.NAME_NONE 873 fromAddr.Sym = nil 874 } 875 p := s.Prog(v.Op.Asm()) 876 p.From = fromAddr 877 p.To.Type = obj.TYPE_REG 878 p.To.Reg = v.Reg() 879 880 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload: 881 p := s.Prog(v.Op.Asm()) 882 p.From.Type = obj.TYPE_MEM 883 p.From.Reg = v.Args[0].Reg() 884 ssagen.AddAux(&p.From, v) 885 p.To.Type = obj.TYPE_REG 886 p.To.Reg = v.Reg() 887 888 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload: 889 p := s.Prog(v.Op.Asm()) 890 p.From.Type = obj.TYPE_MEM 891 p.From.Reg = v.Args[0].Reg() 892 p.To.Type = obj.TYPE_REG 893 p.To.Reg = v.Reg() 894 895 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore: 896 p := s.Prog(v.Op.Asm()) 897 p.To.Type = obj.TYPE_MEM 898 p.To.Reg = v.Args[0].Reg() 899 p.From.Type = obj.TYPE_REG 900 p.From.Reg = v.Args[1].Reg() 901 902 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx, 903 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx, 904 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx: 905 p := s.Prog(v.Op.Asm()) 906 p.From.Type = obj.TYPE_MEM 907 p.From.Reg = v.Args[0].Reg() 908 p.From.Index = v.Args[1].Reg() 909 p.To.Type = obj.TYPE_REG 910 p.To.Reg = v.Reg() 911 912 case ssa.OpPPC64DCBT: 913 p := s.Prog(v.Op.Asm()) 914 p.From.Type = obj.TYPE_MEM 915 p.From.Reg = v.Args[0].Reg() 916 p.To.Type = obj.TYPE_CONST 917 p.To.Offset = v.AuxInt 918 919 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero: 920 p := s.Prog(v.Op.Asm()) 921 p.From.Type = obj.TYPE_REG 922 p.From.Reg = ppc64.REGZERO 923 p.To.Type = obj.TYPE_MEM 924 p.To.Reg = v.Args[0].Reg() 925 ssagen.AddAux(&p.To, v) 926 927 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero: 928 929 // MOVDstore and MOVDstorezero become DS form instructions that are restricted 930 // to offset values that are a multiple of 4. If the offset field is not a 931 // multiple of 4, then the full address of the store target is computed (base + 932 // offset) and used as the new base register and the offset in the instruction 933 // is set to 0. 934 935 // This allows the MOVDstore and MOVDstorezero to be generated in more cases, 936 // and prevents checking of the offset value and alignment in the rules. 937 938 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} 939 ssagen.AddAux(&toAddr, v) 940 941 if toAddr.Offset%4 != 0 { 942 p := s.Prog(ppc64.AMOVD) 943 p.From.Type = obj.TYPE_ADDR 944 p.From.Reg = v.Args[0].Reg() 945 ssagen.AddAux(&p.From, v) 946 p.To.Type = obj.TYPE_REG 947 p.To.Reg = ppc64.REGTMP 948 toAddr.Reg = ppc64.REGTMP 949 // Clear the offset field and other 950 // information that might be used 951 // by the assembler to add to the 952 // final offset value. 953 toAddr.Offset = 0 954 toAddr.Name = obj.NAME_NONE 955 toAddr.Sym = nil 956 } 957 p := s.Prog(v.Op.Asm()) 958 p.To = toAddr 959 p.From.Type = obj.TYPE_REG 960 if v.Op == ssa.OpPPC64MOVDstorezero { 961 p.From.Reg = ppc64.REGZERO 962 } else { 963 p.From.Reg = v.Args[1].Reg() 964 } 965 966 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore: 967 p := s.Prog(v.Op.Asm()) 968 p.From.Type = obj.TYPE_REG 969 p.From.Reg = v.Args[1].Reg() 970 p.To.Type = obj.TYPE_MEM 971 p.To.Reg = v.Args[0].Reg() 972 ssagen.AddAux(&p.To, v) 973 974 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx, 975 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx, 976 ssa.OpPPC64MOVHBRstoreidx: 977 p := s.Prog(v.Op.Asm()) 978 p.From.Type = obj.TYPE_REG 979 p.From.Reg = v.Args[2].Reg() 980 p.To.Index = v.Args[1].Reg() 981 p.To.Type = obj.TYPE_MEM 982 p.To.Reg = v.Args[0].Reg() 983 984 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ: 985 // ISEL AuxInt ? arg0 : arg1 986 // ISELZ is a special case of ISEL where arg1 is implicitly $0. 987 // 988 // AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO. 989 // ISEL accepts a CR bit argument, not a condition as expressed by AuxInt. 990 // Convert the condition to a CR bit argument by the following conversion: 991 // 992 // AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO 993 // AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO 994 p := s.Prog(v.Op.Asm()) 995 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 996 p.Reg = v.Args[0].Reg() 997 if v.Op == ssa.OpPPC64ISEL { 998 p.AddRestSourceReg(v.Args[1].Reg()) 999 } else { 1000 p.AddRestSourceReg(ppc64.REG_R0) 1001 } 1002 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2 1003 if v.AuxInt > 3 { 1004 p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg 1005 } 1006 p.From.SetConst(v.AuxInt & 3) 1007 1008 case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR: 1009 p := s.Prog(v.Op.Asm()) 1010 p.To.Type = obj.TYPE_REG 1011 p.To.Reg = v.Reg() 1012 p.From.Type = obj.TYPE_REG 1013 p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt) 1014 1015 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort: 1016 // The LoweredQuad code generation 1017 // generates STXV instructions on 1018 // power9. The Short variation is used 1019 // if no loop is generated. 1020 1021 // sizes >= 64 generate a loop as follows: 1022 1023 // Set up loop counter in CTR, used by BC 1024 // XXLXOR clears VS32 1025 // XXLXOR VS32,VS32,VS32 1026 // MOVD len/64,REG_TMP 1027 // MOVD REG_TMP,CTR 1028 // loop: 1029 // STXV VS32,0(R20) 1030 // STXV VS32,16(R20) 1031 // STXV VS32,32(R20) 1032 // STXV VS32,48(R20) 1033 // ADD $64,R20 1034 // BC 16, 0, loop 1035 1036 // Bytes per iteration 1037 ctr := v.AuxInt / 64 1038 1039 // Remainder bytes 1040 rem := v.AuxInt % 64 1041 1042 // Only generate a loop if there is more 1043 // than 1 iteration. 1044 if ctr > 1 { 1045 // Set up VS32 (V0) to hold 0s 1046 p := s.Prog(ppc64.AXXLXOR) 1047 p.From.Type = obj.TYPE_REG 1048 p.From.Reg = ppc64.REG_VS32 1049 p.To.Type = obj.TYPE_REG 1050 p.To.Reg = ppc64.REG_VS32 1051 p.Reg = ppc64.REG_VS32 1052 1053 // Set up CTR loop counter 1054 p = s.Prog(ppc64.AMOVD) 1055 p.From.Type = obj.TYPE_CONST 1056 p.From.Offset = ctr 1057 p.To.Type = obj.TYPE_REG 1058 p.To.Reg = ppc64.REGTMP 1059 1060 p = s.Prog(ppc64.AMOVD) 1061 p.From.Type = obj.TYPE_REG 1062 p.From.Reg = ppc64.REGTMP 1063 p.To.Type = obj.TYPE_REG 1064 p.To.Reg = ppc64.REG_CTR 1065 1066 // Don't generate padding for 1067 // loops with few iterations. 1068 if ctr > 3 { 1069 p = s.Prog(obj.APCALIGN) 1070 p.From.Type = obj.TYPE_CONST 1071 p.From.Offset = 16 1072 } 1073 1074 // generate 4 STXVs to zero 64 bytes 1075 var top *obj.Prog 1076 1077 p = s.Prog(ppc64.ASTXV) 1078 p.From.Type = obj.TYPE_REG 1079 p.From.Reg = ppc64.REG_VS32 1080 p.To.Type = obj.TYPE_MEM 1081 p.To.Reg = v.Args[0].Reg() 1082 1083 // Save the top of loop 1084 if top == nil { 1085 top = p 1086 } 1087 p = s.Prog(ppc64.ASTXV) 1088 p.From.Type = obj.TYPE_REG 1089 p.From.Reg = ppc64.REG_VS32 1090 p.To.Type = obj.TYPE_MEM 1091 p.To.Reg = v.Args[0].Reg() 1092 p.To.Offset = 16 1093 1094 p = s.Prog(ppc64.ASTXV) 1095 p.From.Type = obj.TYPE_REG 1096 p.From.Reg = ppc64.REG_VS32 1097 p.To.Type = obj.TYPE_MEM 1098 p.To.Reg = v.Args[0].Reg() 1099 p.To.Offset = 32 1100 1101 p = s.Prog(ppc64.ASTXV) 1102 p.From.Type = obj.TYPE_REG 1103 p.From.Reg = ppc64.REG_VS32 1104 p.To.Type = obj.TYPE_MEM 1105 p.To.Reg = v.Args[0].Reg() 1106 p.To.Offset = 48 1107 1108 // Increment address for the 1109 // 64 bytes just zeroed. 1110 p = s.Prog(ppc64.AADD) 1111 p.Reg = v.Args[0].Reg() 1112 p.From.Type = obj.TYPE_CONST 1113 p.From.Offset = 64 1114 p.To.Type = obj.TYPE_REG 1115 p.To.Reg = v.Args[0].Reg() 1116 1117 // Branch back to top of loop 1118 // based on CTR 1119 // BC with BO_BCTR generates bdnz 1120 p = s.Prog(ppc64.ABC) 1121 p.From.Type = obj.TYPE_CONST 1122 p.From.Offset = ppc64.BO_BCTR 1123 p.Reg = ppc64.REG_CR0LT 1124 p.To.Type = obj.TYPE_BRANCH 1125 p.To.SetTarget(top) 1126 } 1127 // When ctr == 1 the loop was not generated but 1128 // there are at least 64 bytes to clear, so add 1129 // that to the remainder to generate the code 1130 // to clear those doublewords 1131 if ctr == 1 { 1132 rem += 64 1133 } 1134 1135 // Clear the remainder starting at offset zero 1136 offset := int64(0) 1137 1138 if rem >= 16 && ctr <= 1 { 1139 // If the XXLXOR hasn't already been 1140 // generated, do it here to initialize 1141 // VS32 (V0) to 0. 1142 p := s.Prog(ppc64.AXXLXOR) 1143 p.From.Type = obj.TYPE_REG 1144 p.From.Reg = ppc64.REG_VS32 1145 p.To.Type = obj.TYPE_REG 1146 p.To.Reg = ppc64.REG_VS32 1147 p.Reg = ppc64.REG_VS32 1148 } 1149 // Generate STXV for 32 or 64 1150 // bytes. 1151 for rem >= 32 { 1152 p := s.Prog(ppc64.ASTXV) 1153 p.From.Type = obj.TYPE_REG 1154 p.From.Reg = ppc64.REG_VS32 1155 p.To.Type = obj.TYPE_MEM 1156 p.To.Reg = v.Args[0].Reg() 1157 p.To.Offset = offset 1158 1159 p = s.Prog(ppc64.ASTXV) 1160 p.From.Type = obj.TYPE_REG 1161 p.From.Reg = ppc64.REG_VS32 1162 p.To.Type = obj.TYPE_MEM 1163 p.To.Reg = v.Args[0].Reg() 1164 p.To.Offset = offset + 16 1165 offset += 32 1166 rem -= 32 1167 } 1168 // Generate 16 bytes 1169 if rem >= 16 { 1170 p := s.Prog(ppc64.ASTXV) 1171 p.From.Type = obj.TYPE_REG 1172 p.From.Reg = ppc64.REG_VS32 1173 p.To.Type = obj.TYPE_MEM 1174 p.To.Reg = v.Args[0].Reg() 1175 p.To.Offset = offset 1176 offset += 16 1177 rem -= 16 1178 } 1179 1180 // first clear as many doublewords as possible 1181 // then clear remaining sizes as available 1182 for rem > 0 { 1183 op, size := ppc64.AMOVB, int64(1) 1184 switch { 1185 case rem >= 8: 1186 op, size = ppc64.AMOVD, 8 1187 case rem >= 4: 1188 op, size = ppc64.AMOVW, 4 1189 case rem >= 2: 1190 op, size = ppc64.AMOVH, 2 1191 } 1192 p := s.Prog(op) 1193 p.From.Type = obj.TYPE_REG 1194 p.From.Reg = ppc64.REG_R0 1195 p.To.Type = obj.TYPE_MEM 1196 p.To.Reg = v.Args[0].Reg() 1197 p.To.Offset = offset 1198 rem -= size 1199 offset += size 1200 } 1201 1202 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort: 1203 1204 // Unaligned data doesn't hurt performance 1205 // for these instructions on power8. 1206 1207 // For sizes >= 64 generate a loop as follows: 1208 1209 // Set up loop counter in CTR, used by BC 1210 // XXLXOR VS32,VS32,VS32 1211 // MOVD len/32,REG_TMP 1212 // MOVD REG_TMP,CTR 1213 // MOVD $16,REG_TMP 1214 // loop: 1215 // STXVD2X VS32,(R0)(R20) 1216 // STXVD2X VS32,(R31)(R20) 1217 // ADD $32,R20 1218 // BC 16, 0, loop 1219 // 1220 // any remainder is done as described below 1221 1222 // for sizes < 64 bytes, first clear as many doublewords as possible, 1223 // then handle the remainder 1224 // MOVD R0,(R20) 1225 // MOVD R0,8(R20) 1226 // .... etc. 1227 // 1228 // the remainder bytes are cleared using one or more 1229 // of the following instructions with the appropriate 1230 // offsets depending which instructions are needed 1231 // 1232 // MOVW R0,n1(R20) 4 bytes 1233 // MOVH R0,n2(R20) 2 bytes 1234 // MOVB R0,n3(R20) 1 byte 1235 // 1236 // 7 bytes: MOVW, MOVH, MOVB 1237 // 6 bytes: MOVW, MOVH 1238 // 5 bytes: MOVW, MOVB 1239 // 3 bytes: MOVH, MOVB 1240 1241 // each loop iteration does 32 bytes 1242 ctr := v.AuxInt / 32 1243 1244 // remainder bytes 1245 rem := v.AuxInt % 32 1246 1247 // only generate a loop if there is more 1248 // than 1 iteration. 1249 if ctr > 1 { 1250 // Set up VS32 (V0) to hold 0s 1251 p := s.Prog(ppc64.AXXLXOR) 1252 p.From.Type = obj.TYPE_REG 1253 p.From.Reg = ppc64.REG_VS32 1254 p.To.Type = obj.TYPE_REG 1255 p.To.Reg = ppc64.REG_VS32 1256 p.Reg = ppc64.REG_VS32 1257 1258 // Set up CTR loop counter 1259 p = s.Prog(ppc64.AMOVD) 1260 p.From.Type = obj.TYPE_CONST 1261 p.From.Offset = ctr 1262 p.To.Type = obj.TYPE_REG 1263 p.To.Reg = ppc64.REGTMP 1264 1265 p = s.Prog(ppc64.AMOVD) 1266 p.From.Type = obj.TYPE_REG 1267 p.From.Reg = ppc64.REGTMP 1268 p.To.Type = obj.TYPE_REG 1269 p.To.Reg = ppc64.REG_CTR 1270 1271 // Set up R31 to hold index value 16 1272 p = s.Prog(ppc64.AMOVD) 1273 p.From.Type = obj.TYPE_CONST 1274 p.From.Offset = 16 1275 p.To.Type = obj.TYPE_REG 1276 p.To.Reg = ppc64.REGTMP 1277 1278 // Don't add padding for alignment 1279 // with few loop iterations. 1280 if ctr > 3 { 1281 p = s.Prog(obj.APCALIGN) 1282 p.From.Type = obj.TYPE_CONST 1283 p.From.Offset = 16 1284 } 1285 1286 // generate 2 STXVD2Xs to store 16 bytes 1287 // when this is a loop then the top must be saved 1288 var top *obj.Prog 1289 // This is the top of loop 1290 1291 p = s.Prog(ppc64.ASTXVD2X) 1292 p.From.Type = obj.TYPE_REG 1293 p.From.Reg = ppc64.REG_VS32 1294 p.To.Type = obj.TYPE_MEM 1295 p.To.Reg = v.Args[0].Reg() 1296 p.To.Index = ppc64.REGZERO 1297 // Save the top of loop 1298 if top == nil { 1299 top = p 1300 } 1301 p = s.Prog(ppc64.ASTXVD2X) 1302 p.From.Type = obj.TYPE_REG 1303 p.From.Reg = ppc64.REG_VS32 1304 p.To.Type = obj.TYPE_MEM 1305 p.To.Reg = v.Args[0].Reg() 1306 p.To.Index = ppc64.REGTMP 1307 1308 // Increment address for the 1309 // 4 doublewords just zeroed. 1310 p = s.Prog(ppc64.AADD) 1311 p.Reg = v.Args[0].Reg() 1312 p.From.Type = obj.TYPE_CONST 1313 p.From.Offset = 32 1314 p.To.Type = obj.TYPE_REG 1315 p.To.Reg = v.Args[0].Reg() 1316 1317 // Branch back to top of loop 1318 // based on CTR 1319 // BC with BO_BCTR generates bdnz 1320 p = s.Prog(ppc64.ABC) 1321 p.From.Type = obj.TYPE_CONST 1322 p.From.Offset = ppc64.BO_BCTR 1323 p.Reg = ppc64.REG_CR0LT 1324 p.To.Type = obj.TYPE_BRANCH 1325 p.To.SetTarget(top) 1326 } 1327 1328 // when ctr == 1 the loop was not generated but 1329 // there are at least 32 bytes to clear, so add 1330 // that to the remainder to generate the code 1331 // to clear those doublewords 1332 if ctr == 1 { 1333 rem += 32 1334 } 1335 1336 // clear the remainder starting at offset zero 1337 offset := int64(0) 1338 1339 // first clear as many doublewords as possible 1340 // then clear remaining sizes as available 1341 for rem > 0 { 1342 op, size := ppc64.AMOVB, int64(1) 1343 switch { 1344 case rem >= 8: 1345 op, size = ppc64.AMOVD, 8 1346 case rem >= 4: 1347 op, size = ppc64.AMOVW, 4 1348 case rem >= 2: 1349 op, size = ppc64.AMOVH, 2 1350 } 1351 p := s.Prog(op) 1352 p.From.Type = obj.TYPE_REG 1353 p.From.Reg = ppc64.REG_R0 1354 p.To.Type = obj.TYPE_MEM 1355 p.To.Reg = v.Args[0].Reg() 1356 p.To.Offset = offset 1357 rem -= size 1358 offset += size 1359 } 1360 1361 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort: 1362 1363 bytesPerLoop := int64(32) 1364 // This will be used when moving more 1365 // than 8 bytes. Moves start with 1366 // as many 8 byte moves as possible, then 1367 // 4, 2, or 1 byte(s) as remaining. This will 1368 // work and be efficient for power8 or later. 1369 // If there are 64 or more bytes, then a 1370 // loop is generated to move 32 bytes and 1371 // update the src and dst addresses on each 1372 // iteration. When < 64 bytes, the appropriate 1373 // number of moves are generated based on the 1374 // size. 1375 // When moving >= 64 bytes a loop is used 1376 // MOVD len/32,REG_TMP 1377 // MOVD REG_TMP,CTR 1378 // MOVD $16,REG_TMP 1379 // top: 1380 // LXVD2X (R0)(R21),VS32 1381 // LXVD2X (R31)(R21),VS33 1382 // ADD $32,R21 1383 // STXVD2X VS32,(R0)(R20) 1384 // STXVD2X VS33,(R31)(R20) 1385 // ADD $32,R20 1386 // BC 16,0,top 1387 // Bytes not moved by this loop are moved 1388 // with a combination of the following instructions, 1389 // starting with the largest sizes and generating as 1390 // many as needed, using the appropriate offset value. 1391 // MOVD n(R21),R31 1392 // MOVD R31,n(R20) 1393 // MOVW n1(R21),R31 1394 // MOVW R31,n1(R20) 1395 // MOVH n2(R21),R31 1396 // MOVH R31,n2(R20) 1397 // MOVB n3(R21),R31 1398 // MOVB R31,n3(R20) 1399 1400 // Each loop iteration moves 32 bytes 1401 ctr := v.AuxInt / bytesPerLoop 1402 1403 // Remainder after the loop 1404 rem := v.AuxInt % bytesPerLoop 1405 1406 dstReg := v.Args[0].Reg() 1407 srcReg := v.Args[1].Reg() 1408 1409 // The set of registers used here, must match the clobbered reg list 1410 // in PPC64Ops.go. 1411 offset := int64(0) 1412 1413 // top of the loop 1414 var top *obj.Prog 1415 // Only generate looping code when loop counter is > 1 for >= 64 bytes 1416 if ctr > 1 { 1417 // Set up the CTR 1418 p := s.Prog(ppc64.AMOVD) 1419 p.From.Type = obj.TYPE_CONST 1420 p.From.Offset = ctr 1421 p.To.Type = obj.TYPE_REG 1422 p.To.Reg = ppc64.REGTMP 1423 1424 p = s.Prog(ppc64.AMOVD) 1425 p.From.Type = obj.TYPE_REG 1426 p.From.Reg = ppc64.REGTMP 1427 p.To.Type = obj.TYPE_REG 1428 p.To.Reg = ppc64.REG_CTR 1429 1430 // Use REGTMP as index reg 1431 p = s.Prog(ppc64.AMOVD) 1432 p.From.Type = obj.TYPE_CONST 1433 p.From.Offset = 16 1434 p.To.Type = obj.TYPE_REG 1435 p.To.Reg = ppc64.REGTMP 1436 1437 // Don't adding padding for 1438 // alignment with small iteration 1439 // counts. 1440 if ctr > 3 { 1441 p = s.Prog(obj.APCALIGN) 1442 p.From.Type = obj.TYPE_CONST 1443 p.From.Offset = 16 1444 } 1445 1446 // Generate 16 byte loads and stores. 1447 // Use temp register for index (16) 1448 // on the second one. 1449 1450 p = s.Prog(ppc64.ALXVD2X) 1451 p.From.Type = obj.TYPE_MEM 1452 p.From.Reg = srcReg 1453 p.From.Index = ppc64.REGZERO 1454 p.To.Type = obj.TYPE_REG 1455 p.To.Reg = ppc64.REG_VS32 1456 if top == nil { 1457 top = p 1458 } 1459 p = s.Prog(ppc64.ALXVD2X) 1460 p.From.Type = obj.TYPE_MEM 1461 p.From.Reg = srcReg 1462 p.From.Index = ppc64.REGTMP 1463 p.To.Type = obj.TYPE_REG 1464 p.To.Reg = ppc64.REG_VS33 1465 1466 // increment the src reg for next iteration 1467 p = s.Prog(ppc64.AADD) 1468 p.Reg = srcReg 1469 p.From.Type = obj.TYPE_CONST 1470 p.From.Offset = bytesPerLoop 1471 p.To.Type = obj.TYPE_REG 1472 p.To.Reg = srcReg 1473 1474 // generate 16 byte stores 1475 p = s.Prog(ppc64.ASTXVD2X) 1476 p.From.Type = obj.TYPE_REG 1477 p.From.Reg = ppc64.REG_VS32 1478 p.To.Type = obj.TYPE_MEM 1479 p.To.Reg = dstReg 1480 p.To.Index = ppc64.REGZERO 1481 1482 p = s.Prog(ppc64.ASTXVD2X) 1483 p.From.Type = obj.TYPE_REG 1484 p.From.Reg = ppc64.REG_VS33 1485 p.To.Type = obj.TYPE_MEM 1486 p.To.Reg = dstReg 1487 p.To.Index = ppc64.REGTMP 1488 1489 // increment the dst reg for next iteration 1490 p = s.Prog(ppc64.AADD) 1491 p.Reg = dstReg 1492 p.From.Type = obj.TYPE_CONST 1493 p.From.Offset = bytesPerLoop 1494 p.To.Type = obj.TYPE_REG 1495 p.To.Reg = dstReg 1496 1497 // BC with BO_BCTR generates bdnz to branch on nonzero CTR 1498 // to loop top. 1499 p = s.Prog(ppc64.ABC) 1500 p.From.Type = obj.TYPE_CONST 1501 p.From.Offset = ppc64.BO_BCTR 1502 p.Reg = ppc64.REG_CR0LT 1503 p.To.Type = obj.TYPE_BRANCH 1504 p.To.SetTarget(top) 1505 1506 // srcReg and dstReg were incremented in the loop, so 1507 // later instructions start with offset 0. 1508 offset = int64(0) 1509 } 1510 1511 // No loop was generated for one iteration, so 1512 // add 32 bytes to the remainder to move those bytes. 1513 if ctr == 1 { 1514 rem += bytesPerLoop 1515 } 1516 1517 if rem >= 16 { 1518 // Generate 16 byte loads and stores. 1519 // Use temp register for index (value 16) 1520 // on the second one. 1521 p := s.Prog(ppc64.ALXVD2X) 1522 p.From.Type = obj.TYPE_MEM 1523 p.From.Reg = srcReg 1524 p.From.Index = ppc64.REGZERO 1525 p.To.Type = obj.TYPE_REG 1526 p.To.Reg = ppc64.REG_VS32 1527 1528 p = s.Prog(ppc64.ASTXVD2X) 1529 p.From.Type = obj.TYPE_REG 1530 p.From.Reg = ppc64.REG_VS32 1531 p.To.Type = obj.TYPE_MEM 1532 p.To.Reg = dstReg 1533 p.To.Index = ppc64.REGZERO 1534 1535 offset = 16 1536 rem -= 16 1537 1538 if rem >= 16 { 1539 // Use REGTMP as index reg 1540 p := s.Prog(ppc64.AMOVD) 1541 p.From.Type = obj.TYPE_CONST 1542 p.From.Offset = 16 1543 p.To.Type = obj.TYPE_REG 1544 p.To.Reg = ppc64.REGTMP 1545 1546 p = s.Prog(ppc64.ALXVD2X) 1547 p.From.Type = obj.TYPE_MEM 1548 p.From.Reg = srcReg 1549 p.From.Index = ppc64.REGTMP 1550 p.To.Type = obj.TYPE_REG 1551 p.To.Reg = ppc64.REG_VS32 1552 1553 p = s.Prog(ppc64.ASTXVD2X) 1554 p.From.Type = obj.TYPE_REG 1555 p.From.Reg = ppc64.REG_VS32 1556 p.To.Type = obj.TYPE_MEM 1557 p.To.Reg = dstReg 1558 p.To.Index = ppc64.REGTMP 1559 1560 offset = 32 1561 rem -= 16 1562 } 1563 } 1564 1565 // Generate all the remaining load and store pairs, starting with 1566 // as many 8 byte moves as possible, then 4, 2, 1. 1567 for rem > 0 { 1568 op, size := ppc64.AMOVB, int64(1) 1569 switch { 1570 case rem >= 8: 1571 op, size = ppc64.AMOVD, 8 1572 case rem >= 4: 1573 op, size = ppc64.AMOVWZ, 4 1574 case rem >= 2: 1575 op, size = ppc64.AMOVH, 2 1576 } 1577 // Load 1578 p := s.Prog(op) 1579 p.To.Type = obj.TYPE_REG 1580 p.To.Reg = ppc64.REGTMP 1581 p.From.Type = obj.TYPE_MEM 1582 p.From.Reg = srcReg 1583 p.From.Offset = offset 1584 1585 // Store 1586 p = s.Prog(op) 1587 p.From.Type = obj.TYPE_REG 1588 p.From.Reg = ppc64.REGTMP 1589 p.To.Type = obj.TYPE_MEM 1590 p.To.Reg = dstReg 1591 p.To.Offset = offset 1592 rem -= size 1593 offset += size 1594 } 1595 1596 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort: 1597 bytesPerLoop := int64(64) 1598 // This is used when moving more 1599 // than 8 bytes on power9. Moves start with 1600 // as many 8 byte moves as possible, then 1601 // 4, 2, or 1 byte(s) as remaining. This will 1602 // work and be efficient for power8 or later. 1603 // If there are 64 or more bytes, then a 1604 // loop is generated to move 32 bytes and 1605 // update the src and dst addresses on each 1606 // iteration. When < 64 bytes, the appropriate 1607 // number of moves are generated based on the 1608 // size. 1609 // When moving >= 64 bytes a loop is used 1610 // MOVD len/32,REG_TMP 1611 // MOVD REG_TMP,CTR 1612 // top: 1613 // LXV 0(R21),VS32 1614 // LXV 16(R21),VS33 1615 // ADD $32,R21 1616 // STXV VS32,0(R20) 1617 // STXV VS33,16(R20) 1618 // ADD $32,R20 1619 // BC 16,0,top 1620 // Bytes not moved by this loop are moved 1621 // with a combination of the following instructions, 1622 // starting with the largest sizes and generating as 1623 // many as needed, using the appropriate offset value. 1624 // MOVD n(R21),R31 1625 // MOVD R31,n(R20) 1626 // MOVW n1(R21),R31 1627 // MOVW R31,n1(R20) 1628 // MOVH n2(R21),R31 1629 // MOVH R31,n2(R20) 1630 // MOVB n3(R21),R31 1631 // MOVB R31,n3(R20) 1632 1633 // Each loop iteration moves 32 bytes 1634 ctr := v.AuxInt / bytesPerLoop 1635 1636 // Remainder after the loop 1637 rem := v.AuxInt % bytesPerLoop 1638 1639 dstReg := v.Args[0].Reg() 1640 srcReg := v.Args[1].Reg() 1641 1642 offset := int64(0) 1643 1644 // top of the loop 1645 var top *obj.Prog 1646 1647 // Only generate looping code when loop counter is > 1 for >= 64 bytes 1648 if ctr > 1 { 1649 // Set up the CTR 1650 p := s.Prog(ppc64.AMOVD) 1651 p.From.Type = obj.TYPE_CONST 1652 p.From.Offset = ctr 1653 p.To.Type = obj.TYPE_REG 1654 p.To.Reg = ppc64.REGTMP 1655 1656 p = s.Prog(ppc64.AMOVD) 1657 p.From.Type = obj.TYPE_REG 1658 p.From.Reg = ppc64.REGTMP 1659 p.To.Type = obj.TYPE_REG 1660 p.To.Reg = ppc64.REG_CTR 1661 1662 p = s.Prog(obj.APCALIGN) 1663 p.From.Type = obj.TYPE_CONST 1664 p.From.Offset = 16 1665 1666 // Generate 16 byte loads and stores. 1667 p = s.Prog(ppc64.ALXV) 1668 p.From.Type = obj.TYPE_MEM 1669 p.From.Reg = srcReg 1670 p.From.Offset = offset 1671 p.To.Type = obj.TYPE_REG 1672 p.To.Reg = ppc64.REG_VS32 1673 if top == nil { 1674 top = p 1675 } 1676 p = s.Prog(ppc64.ALXV) 1677 p.From.Type = obj.TYPE_MEM 1678 p.From.Reg = srcReg 1679 p.From.Offset = offset + 16 1680 p.To.Type = obj.TYPE_REG 1681 p.To.Reg = ppc64.REG_VS33 1682 1683 // generate 16 byte stores 1684 p = s.Prog(ppc64.ASTXV) 1685 p.From.Type = obj.TYPE_REG 1686 p.From.Reg = ppc64.REG_VS32 1687 p.To.Type = obj.TYPE_MEM 1688 p.To.Reg = dstReg 1689 p.To.Offset = offset 1690 1691 p = s.Prog(ppc64.ASTXV) 1692 p.From.Type = obj.TYPE_REG 1693 p.From.Reg = ppc64.REG_VS33 1694 p.To.Type = obj.TYPE_MEM 1695 p.To.Reg = dstReg 1696 p.To.Offset = offset + 16 1697 1698 // Generate 16 byte loads and stores. 1699 p = s.Prog(ppc64.ALXV) 1700 p.From.Type = obj.TYPE_MEM 1701 p.From.Reg = srcReg 1702 p.From.Offset = offset + 32 1703 p.To.Type = obj.TYPE_REG 1704 p.To.Reg = ppc64.REG_VS32 1705 1706 p = s.Prog(ppc64.ALXV) 1707 p.From.Type = obj.TYPE_MEM 1708 p.From.Reg = srcReg 1709 p.From.Offset = offset + 48 1710 p.To.Type = obj.TYPE_REG 1711 p.To.Reg = ppc64.REG_VS33 1712 1713 // generate 16 byte stores 1714 p = s.Prog(ppc64.ASTXV) 1715 p.From.Type = obj.TYPE_REG 1716 p.From.Reg = ppc64.REG_VS32 1717 p.To.Type = obj.TYPE_MEM 1718 p.To.Reg = dstReg 1719 p.To.Offset = offset + 32 1720 1721 p = s.Prog(ppc64.ASTXV) 1722 p.From.Type = obj.TYPE_REG 1723 p.From.Reg = ppc64.REG_VS33 1724 p.To.Type = obj.TYPE_MEM 1725 p.To.Reg = dstReg 1726 p.To.Offset = offset + 48 1727 1728 // increment the src reg for next iteration 1729 p = s.Prog(ppc64.AADD) 1730 p.Reg = srcReg 1731 p.From.Type = obj.TYPE_CONST 1732 p.From.Offset = bytesPerLoop 1733 p.To.Type = obj.TYPE_REG 1734 p.To.Reg = srcReg 1735 1736 // increment the dst reg for next iteration 1737 p = s.Prog(ppc64.AADD) 1738 p.Reg = dstReg 1739 p.From.Type = obj.TYPE_CONST 1740 p.From.Offset = bytesPerLoop 1741 p.To.Type = obj.TYPE_REG 1742 p.To.Reg = dstReg 1743 1744 // BC with BO_BCTR generates bdnz to branch on nonzero CTR 1745 // to loop top. 1746 p = s.Prog(ppc64.ABC) 1747 p.From.Type = obj.TYPE_CONST 1748 p.From.Offset = ppc64.BO_BCTR 1749 p.Reg = ppc64.REG_CR0LT 1750 p.To.Type = obj.TYPE_BRANCH 1751 p.To.SetTarget(top) 1752 1753 // srcReg and dstReg were incremented in the loop, so 1754 // later instructions start with offset 0. 1755 offset = int64(0) 1756 } 1757 1758 // No loop was generated for one iteration, so 1759 // add 32 bytes to the remainder to move those bytes. 1760 if ctr == 1 { 1761 rem += bytesPerLoop 1762 } 1763 if rem >= 32 { 1764 p := s.Prog(ppc64.ALXV) 1765 p.From.Type = obj.TYPE_MEM 1766 p.From.Reg = srcReg 1767 p.To.Type = obj.TYPE_REG 1768 p.To.Reg = ppc64.REG_VS32 1769 1770 p = s.Prog(ppc64.ALXV) 1771 p.From.Type = obj.TYPE_MEM 1772 p.From.Reg = srcReg 1773 p.From.Offset = 16 1774 p.To.Type = obj.TYPE_REG 1775 p.To.Reg = ppc64.REG_VS33 1776 1777 p = s.Prog(ppc64.ASTXV) 1778 p.From.Type = obj.TYPE_REG 1779 p.From.Reg = ppc64.REG_VS32 1780 p.To.Type = obj.TYPE_MEM 1781 p.To.Reg = dstReg 1782 1783 p = s.Prog(ppc64.ASTXV) 1784 p.From.Type = obj.TYPE_REG 1785 p.From.Reg = ppc64.REG_VS33 1786 p.To.Type = obj.TYPE_MEM 1787 p.To.Reg = dstReg 1788 p.To.Offset = 16 1789 1790 offset = 32 1791 rem -= 32 1792 } 1793 1794 if rem >= 16 { 1795 // Generate 16 byte loads and stores. 1796 p := s.Prog(ppc64.ALXV) 1797 p.From.Type = obj.TYPE_MEM 1798 p.From.Reg = srcReg 1799 p.From.Offset = offset 1800 p.To.Type = obj.TYPE_REG 1801 p.To.Reg = ppc64.REG_VS32 1802 1803 p = s.Prog(ppc64.ASTXV) 1804 p.From.Type = obj.TYPE_REG 1805 p.From.Reg = ppc64.REG_VS32 1806 p.To.Type = obj.TYPE_MEM 1807 p.To.Reg = dstReg 1808 p.To.Offset = offset 1809 1810 offset += 16 1811 rem -= 16 1812 1813 if rem >= 16 { 1814 p := s.Prog(ppc64.ALXV) 1815 p.From.Type = obj.TYPE_MEM 1816 p.From.Reg = srcReg 1817 p.From.Offset = offset 1818 p.To.Type = obj.TYPE_REG 1819 p.To.Reg = ppc64.REG_VS32 1820 1821 p = s.Prog(ppc64.ASTXV) 1822 p.From.Type = obj.TYPE_REG 1823 p.From.Reg = ppc64.REG_VS32 1824 p.To.Type = obj.TYPE_MEM 1825 p.To.Reg = dstReg 1826 p.To.Offset = offset 1827 1828 offset += 16 1829 rem -= 16 1830 } 1831 } 1832 // Generate all the remaining load and store pairs, starting with 1833 // as many 8 byte moves as possible, then 4, 2, 1. 1834 for rem > 0 { 1835 op, size := ppc64.AMOVB, int64(1) 1836 switch { 1837 case rem >= 8: 1838 op, size = ppc64.AMOVD, 8 1839 case rem >= 4: 1840 op, size = ppc64.AMOVWZ, 4 1841 case rem >= 2: 1842 op, size = ppc64.AMOVH, 2 1843 } 1844 // Load 1845 p := s.Prog(op) 1846 p.To.Type = obj.TYPE_REG 1847 p.To.Reg = ppc64.REGTMP 1848 p.From.Type = obj.TYPE_MEM 1849 p.From.Reg = srcReg 1850 p.From.Offset = offset 1851 1852 // Store 1853 p = s.Prog(op) 1854 p.From.Type = obj.TYPE_REG 1855 p.From.Reg = ppc64.REGTMP 1856 p.To.Type = obj.TYPE_MEM 1857 p.To.Reg = dstReg 1858 p.To.Offset = offset 1859 rem -= size 1860 offset += size 1861 } 1862 1863 case ssa.OpPPC64CALLstatic: 1864 s.Call(v) 1865 1866 case ssa.OpPPC64CALLtail: 1867 s.TailCall(v) 1868 1869 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter: 1870 p := s.Prog(ppc64.AMOVD) 1871 p.From.Type = obj.TYPE_REG 1872 p.From.Reg = v.Args[0].Reg() 1873 p.To.Type = obj.TYPE_REG 1874 p.To.Reg = ppc64.REG_LR 1875 1876 if v.Args[0].Reg() != ppc64.REG_R12 { 1877 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg) 1878 } 1879 1880 pp := s.Call(v) 1881 1882 // Convert the call into a blrl with hint this is not a subroutine return. 1883 // The full bclrl opcode must be specified when passing a hint. 1884 pp.As = ppc64.ABCL 1885 pp.From.Type = obj.TYPE_CONST 1886 pp.From.Offset = ppc64.BO_ALWAYS 1887 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored. 1888 pp.To.Reg = ppc64.REG_LR 1889 pp.AddRestSourceConst(1) 1890 1891 if ppc64.NeedTOCpointer(base.Ctxt) { 1892 // When compiling Go into PIC, the function we just 1893 // called via pointer might have been implemented in 1894 // a separate module and so overwritten the TOC 1895 // pointer in R2; reload it. 1896 q := s.Prog(ppc64.AMOVD) 1897 q.From.Type = obj.TYPE_MEM 1898 q.From.Offset = 24 1899 q.From.Reg = ppc64.REGSP 1900 q.To.Type = obj.TYPE_REG 1901 q.To.Reg = ppc64.REG_R2 1902 } 1903 1904 case ssa.OpPPC64LoweredWB: 1905 p := s.Prog(obj.ACALL) 1906 p.To.Type = obj.TYPE_MEM 1907 p.To.Name = obj.NAME_EXTERN 1908 // AuxInt encodes how many buffer entries we need. 1909 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1] 1910 1911 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC: 1912 p := s.Prog(obj.ACALL) 1913 p.To.Type = obj.TYPE_MEM 1914 p.To.Name = obj.NAME_EXTERN 1915 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt] 1916 s.UseArgs(16) // space used in callee args area by assembly stubs 1917 1918 case ssa.OpPPC64LoweredNilCheck: 1919 if buildcfg.GOOS == "aix" { 1920 // CMP Rarg0, $0 1921 // BNE 2(PC) 1922 // STW R0, 0(R0) 1923 // NOP (so the BNE has somewhere to land) 1924 1925 // CMP Rarg0, $0 1926 p := s.Prog(ppc64.ACMP) 1927 p.From.Type = obj.TYPE_REG 1928 p.From.Reg = v.Args[0].Reg() 1929 p.To.Type = obj.TYPE_CONST 1930 p.To.Offset = 0 1931 1932 // BNE 2(PC) 1933 p2 := s.Prog(ppc64.ABNE) 1934 p2.To.Type = obj.TYPE_BRANCH 1935 1936 // STW R0, 0(R0) 1937 // Write at 0 is forbidden and will trigger a SIGSEGV 1938 p = s.Prog(ppc64.AMOVW) 1939 p.From.Type = obj.TYPE_REG 1940 p.From.Reg = ppc64.REG_R0 1941 p.To.Type = obj.TYPE_MEM 1942 p.To.Reg = ppc64.REG_R0 1943 1944 // NOP (so the BNE has somewhere to land) 1945 nop := s.Prog(obj.ANOP) 1946 p2.To.SetTarget(nop) 1947 1948 } else { 1949 // Issue a load which will fault if arg is nil. 1950 p := s.Prog(ppc64.AMOVBZ) 1951 p.From.Type = obj.TYPE_MEM 1952 p.From.Reg = v.Args[0].Reg() 1953 ssagen.AddAux(&p.From, v) 1954 p.To.Type = obj.TYPE_REG 1955 p.To.Reg = ppc64.REGTMP 1956 } 1957 if logopt.Enabled() { 1958 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) 1959 } 1960 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1961 base.WarnfAt(v.Pos, "generated nil check") 1962 } 1963 1964 // These should be resolved by rules and not make it here. 1965 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan, 1966 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual, 1967 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual: 1968 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString()) 1969 case ssa.OpPPC64InvertFlags: 1970 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1971 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT: 1972 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1973 case ssa.OpClobber, ssa.OpClobberReg: 1974 // TODO: implement for clobberdead experiment. Nop is ok for now. 1975 default: 1976 v.Fatalf("genValue not implemented: %s", v.LongString()) 1977 } 1978} 1979 1980var blockJump = [...]struct { 1981 asm, invasm obj.As 1982 asmeq, invasmun bool 1983}{ 1984 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false}, 1985 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false}, 1986 1987 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1988 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false}, 1989 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false}, 1990 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1991 1992 // TODO: need to work FP comparisons into block jumps 1993 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1994 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN 1995 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN 1996 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1997} 1998 1999func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { 2000 switch b.Kind { 2001 case ssa.BlockDefer: 2002 // defer returns in R3: 2003 // 0 if we should continue executing 2004 // 1 if we should jump to deferreturn call 2005 p := s.Prog(ppc64.ACMP) 2006 p.From.Type = obj.TYPE_REG 2007 p.From.Reg = ppc64.REG_R3 2008 p.To.Type = obj.TYPE_CONST 2009 p.To.Offset = 0 2010 2011 p = s.Prog(ppc64.ABNE) 2012 p.To.Type = obj.TYPE_BRANCH 2013 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()}) 2014 if b.Succs[0].Block() != next { 2015 p := s.Prog(obj.AJMP) 2016 p.To.Type = obj.TYPE_BRANCH 2017 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 2018 } 2019 2020 case ssa.BlockPlain: 2021 if b.Succs[0].Block() != next { 2022 p := s.Prog(obj.AJMP) 2023 p.To.Type = obj.TYPE_BRANCH 2024 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 2025 } 2026 case ssa.BlockExit, ssa.BlockRetJmp: 2027 case ssa.BlockRet: 2028 s.Prog(obj.ARET) 2029 2030 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE, 2031 ssa.BlockPPC64LT, ssa.BlockPPC64GE, 2032 ssa.BlockPPC64LE, ssa.BlockPPC64GT, 2033 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE, 2034 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT: 2035 jmp := blockJump[b.Kind] 2036 switch next { 2037 case b.Succs[0].Block(): 2038 s.Br(jmp.invasm, b.Succs[1].Block()) 2039 if jmp.invasmun { 2040 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 2041 s.Br(ppc64.ABVS, b.Succs[1].Block()) 2042 } 2043 case b.Succs[1].Block(): 2044 s.Br(jmp.asm, b.Succs[0].Block()) 2045 if jmp.asmeq { 2046 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 2047 } 2048 default: 2049 if b.Likely != ssa.BranchUnlikely { 2050 s.Br(jmp.asm, b.Succs[0].Block()) 2051 if jmp.asmeq { 2052 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 2053 } 2054 s.Br(obj.AJMP, b.Succs[1].Block()) 2055 } else { 2056 s.Br(jmp.invasm, b.Succs[1].Block()) 2057 if jmp.invasmun { 2058 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 2059 s.Br(ppc64.ABVS, b.Succs[1].Block()) 2060 } 2061 s.Br(obj.AJMP, b.Succs[0].Block()) 2062 } 2063 } 2064 default: 2065 b.Fatalf("branch not implemented: %s", b.LongString()) 2066 } 2067} 2068 2069func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 2070 p := s.Prog(loadByType(t)) 2071 p.From.Type = obj.TYPE_MEM 2072 p.From.Name = obj.NAME_AUTO 2073 p.From.Sym = n.Linksym() 2074 p.From.Offset = n.FrameOffset() + off 2075 p.To.Type = obj.TYPE_REG 2076 p.To.Reg = reg 2077 return p 2078} 2079 2080func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 2081 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) 2082 p.To.Name = obj.NAME_PARAM 2083 p.To.Sym = n.Linksym() 2084 p.Pos = p.Pos.WithNotStmt() 2085 return p 2086} 2087