1// asmcheck 2 3// Copyright 2018 The Go Authors. All rights reserved. 4// Use of this source code is governed by a BSD-style 5// license that can be found in the LICENSE file. 6 7package codegen 8 9import "math/bits" 10 11// ----------------------- // 12// bits.LeadingZeros // 13// ----------------------- // 14 15func LeadingZeros(n uint) int { 16 // amd64/v1,amd64/v2:"BSRQ" 17 // amd64/v3:"LZCNTQ", -"BSRQ" 18 // s390x:"FLOGR" 19 // arm:"CLZ" arm64:"CLZ" 20 // mips:"CLZ" 21 // wasm:"I64Clz" 22 // ppc64x:"CNTLZD" 23 return bits.LeadingZeros(n) 24} 25 26func LeadingZeros64(n uint64) int { 27 // amd64/v1,amd64/v2:"BSRQ" 28 // amd64/v3:"LZCNTQ", -"BSRQ" 29 // s390x:"FLOGR" 30 // arm:"CLZ" arm64:"CLZ" 31 // mips:"CLZ" 32 // wasm:"I64Clz" 33 // ppc64x:"CNTLZD" 34 return bits.LeadingZeros64(n) 35} 36 37func LeadingZeros32(n uint32) int { 38 // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" 39 // amd64/v3: "LZCNTL",- "BSRL" 40 // s390x:"FLOGR" 41 // arm:"CLZ" arm64:"CLZW" 42 // mips:"CLZ" 43 // wasm:"I64Clz" 44 // ppc64x:"CNTLZW" 45 return bits.LeadingZeros32(n) 46} 47 48func LeadingZeros16(n uint16) int { 49 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 50 // amd64/v3: "LZCNTL",- "BSRL" 51 // s390x:"FLOGR" 52 // arm:"CLZ" arm64:"CLZ" 53 // mips:"CLZ" 54 // wasm:"I64Clz" 55 // ppc64x:"CNTLZD" 56 return bits.LeadingZeros16(n) 57} 58 59func LeadingZeros8(n uint8) int { 60 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 61 // amd64/v3: "LZCNTL",- "BSRL" 62 // s390x:"FLOGR" 63 // arm:"CLZ" arm64:"CLZ" 64 // mips:"CLZ" 65 // wasm:"I64Clz" 66 // ppc64x:"CNTLZD" 67 return bits.LeadingZeros8(n) 68} 69 70// --------------- // 71// bits.Len* // 72// --------------- // 73 74func Len(n uint) int { 75 // amd64/v1,amd64/v2:"BSRQ" 76 // amd64/v3: "LZCNTQ" 77 // s390x:"FLOGR" 78 // arm:"CLZ" arm64:"CLZ" 79 // mips:"CLZ" 80 // wasm:"I64Clz" 81 // ppc64x:"SUBC","CNTLZD" 82 return bits.Len(n) 83} 84 85func Len64(n uint64) int { 86 // amd64/v1,amd64/v2:"BSRQ" 87 // amd64/v3: "LZCNTQ" 88 // s390x:"FLOGR" 89 // arm:"CLZ" arm64:"CLZ" 90 // mips:"CLZ" 91 // wasm:"I64Clz" 92 // ppc64x:"SUBC","CNTLZD" 93 return bits.Len64(n) 94} 95 96func SubFromLen64(n uint64) int { 97 // ppc64x:"CNTLZD",-"SUBC" 98 return 64 - bits.Len64(n) 99} 100 101func Len32(n uint32) int { 102 // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" 103 // amd64/v3: "LZCNTL" 104 // s390x:"FLOGR" 105 // arm:"CLZ" arm64:"CLZ" 106 // mips:"CLZ" 107 // wasm:"I64Clz" 108 // ppc64x: "CNTLZW" 109 return bits.Len32(n) 110} 111 112func Len16(n uint16) int { 113 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 114 // amd64/v3: "LZCNTL" 115 // s390x:"FLOGR" 116 // arm:"CLZ" arm64:"CLZ" 117 // mips:"CLZ" 118 // wasm:"I64Clz" 119 // ppc64x:"SUBC","CNTLZD" 120 return bits.Len16(n) 121} 122 123func Len8(n uint8) int { 124 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 125 // amd64/v3: "LZCNTL" 126 // s390x:"FLOGR" 127 // arm:"CLZ" arm64:"CLZ" 128 // mips:"CLZ" 129 // wasm:"I64Clz" 130 // ppc64x:"SUBC","CNTLZD" 131 return bits.Len8(n) 132} 133 134// -------------------- // 135// bits.OnesCount // 136// -------------------- // 137 138// TODO(register args) Restore a m d 6 4 / v 1 :.*x86HasPOPCNT when only one ABI is tested. 139func OnesCount(n uint) int { 140 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 141 // amd64:"POPCNTQ" 142 // arm64:"VCNT","VUADDLV" 143 // s390x:"POPCNT" 144 // ppc64x:"POPCNTD" 145 // wasm:"I64Popcnt" 146 return bits.OnesCount(n) 147} 148 149func OnesCount64(n uint64) int { 150 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 151 // amd64:"POPCNTQ" 152 // arm64:"VCNT","VUADDLV" 153 // s390x:"POPCNT" 154 // ppc64x:"POPCNTD" 155 // wasm:"I64Popcnt" 156 return bits.OnesCount64(n) 157} 158 159func OnesCount32(n uint32) int { 160 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 161 // amd64:"POPCNTL" 162 // arm64:"VCNT","VUADDLV" 163 // s390x:"POPCNT" 164 // ppc64x:"POPCNTW" 165 // wasm:"I64Popcnt" 166 return bits.OnesCount32(n) 167} 168 169func OnesCount16(n uint16) int { 170 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 171 // amd64:"POPCNTL" 172 // arm64:"VCNT","VUADDLV" 173 // s390x:"POPCNT" 174 // ppc64x:"POPCNTW" 175 // wasm:"I64Popcnt" 176 return bits.OnesCount16(n) 177} 178 179func OnesCount8(n uint8) int { 180 // s390x:"POPCNT" 181 // ppc64x:"POPCNTB" 182 // wasm:"I64Popcnt" 183 return bits.OnesCount8(n) 184} 185 186// ----------------------- // 187// bits.ReverseBytes // 188// ----------------------- // 189 190func ReverseBytes(n uint) uint { 191 // amd64:"BSWAPQ" 192 // 386:"BSWAPL" 193 // s390x:"MOVDBR" 194 // arm64:"REV" 195 return bits.ReverseBytes(n) 196} 197 198func ReverseBytes64(n uint64) uint64 { 199 // amd64:"BSWAPQ" 200 // 386:"BSWAPL" 201 // s390x:"MOVDBR" 202 // arm64:"REV" 203 // ppc64x/power10: "BRD" 204 return bits.ReverseBytes64(n) 205} 206 207func ReverseBytes32(n uint32) uint32 { 208 // amd64:"BSWAPL" 209 // 386:"BSWAPL" 210 // s390x:"MOVWBR" 211 // arm64:"REVW" 212 // ppc64x/power10: "BRW" 213 return bits.ReverseBytes32(n) 214} 215 216func ReverseBytes16(n uint16) uint16 { 217 // amd64:"ROLW" 218 // arm64:"REV16W",-"UBFX",-"ORR" 219 // arm/5:"SLL","SRL","ORR" 220 // arm/6:"REV16" 221 // arm/7:"REV16" 222 // ppc64x/power10: "BRH" 223 return bits.ReverseBytes16(n) 224} 225 226// --------------------- // 227// bits.RotateLeft // 228// --------------------- // 229 230func RotateLeft64(n uint64) uint64 { 231 // amd64:"ROLQ" 232 // arm64:"ROR" 233 // ppc64x:"ROTL" 234 // s390x:"RISBGZ\t[$]0, [$]63, [$]37, " 235 // wasm:"I64Rotl" 236 return bits.RotateLeft64(n, 37) 237} 238 239func RotateLeft32(n uint32) uint32 { 240 // amd64:"ROLL" 386:"ROLL" 241 // arm:`MOVW\tR[0-9]+@>23` 242 // arm64:"RORW" 243 // ppc64x:"ROTLW" 244 // s390x:"RLL" 245 // wasm:"I32Rotl" 246 return bits.RotateLeft32(n, 9) 247} 248 249func RotateLeft16(n uint16, s int) uint16 { 250 // amd64:"ROLW" 386:"ROLW" 251 // arm64:"RORW",-"CSEL" 252 return bits.RotateLeft16(n, s) 253} 254 255func RotateLeft8(n uint8, s int) uint8 { 256 // amd64:"ROLB" 386:"ROLB" 257 // arm64:"LSL","LSR",-"CSEL" 258 return bits.RotateLeft8(n, s) 259} 260 261func RotateLeftVariable(n uint, m int) uint { 262 // amd64:"ROLQ" 263 // arm64:"ROR" 264 // ppc64x:"ROTL" 265 // s390x:"RLLG" 266 // wasm:"I64Rotl" 267 return bits.RotateLeft(n, m) 268} 269 270func RotateLeftVariable64(n uint64, m int) uint64 { 271 // amd64:"ROLQ" 272 // arm64:"ROR" 273 // ppc64x:"ROTL" 274 // s390x:"RLLG" 275 // wasm:"I64Rotl" 276 return bits.RotateLeft64(n, m) 277} 278 279func RotateLeftVariable32(n uint32, m int) uint32 { 280 // arm:`MOVW\tR[0-9]+@>R[0-9]+` 281 // amd64:"ROLL" 282 // arm64:"RORW" 283 // ppc64x:"ROTLW" 284 // s390x:"RLL" 285 // wasm:"I32Rotl" 286 return bits.RotateLeft32(n, m) 287} 288 289// ------------------------ // 290// bits.TrailingZeros // 291// ------------------------ // 292 293func TrailingZeros(n uint) int { 294 // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" 295 // amd64/v3:"TZCNTQ" 296 // 386:"BSFL" 297 // arm:"CLZ" 298 // arm64:"RBIT","CLZ" 299 // s390x:"FLOGR" 300 // ppc64x/power8:"ANDN","POPCNTD" 301 // ppc64x/power9: "CNTTZD" 302 // wasm:"I64Ctz" 303 return bits.TrailingZeros(n) 304} 305 306func TrailingZeros64(n uint64) int { 307 // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" 308 // amd64/v3:"TZCNTQ" 309 // 386:"BSFL" 310 // arm64:"RBIT","CLZ" 311 // s390x:"FLOGR" 312 // ppc64x/power8:"ANDN","POPCNTD" 313 // ppc64x/power9: "CNTTZD" 314 // wasm:"I64Ctz" 315 return bits.TrailingZeros64(n) 316} 317 318func TrailingZeros64Subtract(n uint64) int { 319 // ppc64x/power8:"NEG","SUBC","ANDN","POPCNTD" 320 // ppc64x/power9:"SUBC","CNTTZD" 321 return bits.TrailingZeros64(1 - n) 322} 323 324func TrailingZeros32(n uint32) int { 325 // amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ" 326 // amd64/v3:"TZCNTL" 327 // 386:"BSFL" 328 // arm:"CLZ" 329 // arm64:"RBITW","CLZW" 330 // s390x:"FLOGR","MOVWZ" 331 // ppc64x/power8:"ANDN","POPCNTW" 332 // ppc64x/power9: "CNTTZW" 333 // wasm:"I64Ctz" 334 return bits.TrailingZeros32(n) 335} 336 337func TrailingZeros16(n uint16) int { 338 // amd64:"BSFL","ORL\\t\\$65536" 339 // 386:"BSFL\t" 340 // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR" 341 // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" 342 // s390x:"FLOGR","OR\t\\$65536" 343 // ppc64x/power8:"POPCNTD","ORIS\\t\\$1" 344 // ppc64x/power9:"CNTTZD","ORIS\\t\\$1" 345 // wasm:"I64Ctz" 346 return bits.TrailingZeros16(n) 347} 348 349func TrailingZeros8(n uint8) int { 350 // amd64:"BSFL","ORL\\t\\$256" 351 // 386:"BSFL" 352 // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" 353 // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" 354 // s390x:"FLOGR","OR\t\\$256" 355 // wasm:"I64Ctz" 356 return bits.TrailingZeros8(n) 357} 358 359// IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero. 360 361func IterateBits(n uint) int { 362 i := 0 363 for n != 0 { 364 // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" 365 // amd64/v3:"TZCNTQ" 366 i += bits.TrailingZeros(n) 367 n &= n - 1 368 } 369 return i 370} 371 372func IterateBits64(n uint64) int { 373 i := 0 374 for n != 0 { 375 // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" 376 // amd64/v3:"TZCNTQ" 377 i += bits.TrailingZeros64(n) 378 n &= n - 1 379 } 380 return i 381} 382 383func IterateBits32(n uint32) int { 384 i := 0 385 for n != 0 { 386 // amd64/v1,amd64/v2:"BSFL",-"BTSQ" 387 // amd64/v3:"TZCNTL" 388 i += bits.TrailingZeros32(n) 389 n &= n - 1 390 } 391 return i 392} 393 394func IterateBits16(n uint16) int { 395 i := 0 396 for n != 0 { 397 // amd64/v1,amd64/v2:"BSFL",-"BTSL" 398 // amd64/v3:"TZCNTL" 399 // arm64:"RBITW","CLZW",-"ORR" 400 i += bits.TrailingZeros16(n) 401 n &= n - 1 402 } 403 return i 404} 405 406func IterateBits8(n uint8) int { 407 i := 0 408 for n != 0 { 409 // amd64/v1,amd64/v2:"BSFL",-"BTSL" 410 // amd64/v3:"TZCNTL" 411 // arm64:"RBITW","CLZW",-"ORR" 412 i += bits.TrailingZeros8(n) 413 n &= n - 1 414 } 415 return i 416} 417 418// --------------- // 419// bits.Add* // 420// --------------- // 421 422func Add(x, y, ci uint) (r, co uint) { 423 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 424 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 425 // ppc64x: "ADDC", "ADDE", "ADDZE" 426 // s390x:"ADDE","ADDC\t[$]-1," 427 // riscv64: "ADD","SLTU" 428 return bits.Add(x, y, ci) 429} 430 431func AddC(x, ci uint) (r, co uint) { 432 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 433 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 434 // loong64: "ADDV", "SGTU" 435 // ppc64x: "ADDC", "ADDE", "ADDZE" 436 // s390x:"ADDE","ADDC\t[$]-1," 437 // mips64:"ADDV","SGTU" 438 // riscv64: "ADD","SLTU" 439 return bits.Add(x, 7, ci) 440} 441 442func AddZ(x, y uint) (r, co uint) { 443 // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" 444 // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" 445 // loong64: "ADDV", "SGTU" 446 // ppc64x: "ADDC", -"ADDE", "ADDZE" 447 // s390x:"ADDC",-"ADDC\t[$]-1," 448 // mips64:"ADDV","SGTU" 449 // riscv64: "ADD","SLTU" 450 return bits.Add(x, y, 0) 451} 452 453func AddR(x, y, ci uint) uint { 454 // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" 455 // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" 456 // loong64: "ADDV", -"SGTU" 457 // ppc64x: "ADDC", "ADDE", -"ADDZE" 458 // s390x:"ADDE","ADDC\t[$]-1," 459 // mips64:"ADDV",-"SGTU" 460 // riscv64: "ADD",-"SLTU" 461 r, _ := bits.Add(x, y, ci) 462 return r 463} 464 465func AddM(p, q, r *[3]uint) { 466 var c uint 467 r[0], c = bits.Add(p[0], q[0], c) 468 // arm64:"ADCS",-"ADD\t",-"CMP" 469 // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" 470 // s390x:"ADDE",-"ADDC\t[$]-1," 471 r[1], c = bits.Add(p[1], q[1], c) 472 r[2], c = bits.Add(p[2], q[2], c) 473} 474 475func Add64(x, y, ci uint64) (r, co uint64) { 476 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 477 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 478 // loong64: "ADDV", "SGTU" 479 // ppc64x: "ADDC", "ADDE", "ADDZE" 480 // s390x:"ADDE","ADDC\t[$]-1," 481 // mips64:"ADDV","SGTU" 482 // riscv64: "ADD","SLTU" 483 return bits.Add64(x, y, ci) 484} 485 486func Add64C(x, ci uint64) (r, co uint64) { 487 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 488 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 489 // loong64: "ADDV", "SGTU" 490 // ppc64x: "ADDC", "ADDE", "ADDZE" 491 // s390x:"ADDE","ADDC\t[$]-1," 492 // mips64:"ADDV","SGTU" 493 // riscv64: "ADD","SLTU" 494 return bits.Add64(x, 7, ci) 495} 496 497func Add64Z(x, y uint64) (r, co uint64) { 498 // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" 499 // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" 500 // loong64: "ADDV", "SGTU" 501 // ppc64x: "ADDC", -"ADDE", "ADDZE" 502 // s390x:"ADDC",-"ADDC\t[$]-1," 503 // mips64:"ADDV","SGTU" 504 // riscv64: "ADD","SLTU" 505 return bits.Add64(x, y, 0) 506} 507 508func Add64R(x, y, ci uint64) uint64 { 509 // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" 510 // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" 511 // loong64: "ADDV", -"SGTU" 512 // ppc64x: "ADDC", "ADDE", -"ADDZE" 513 // s390x:"ADDE","ADDC\t[$]-1," 514 // mips64:"ADDV",-"SGTU" 515 // riscv64: "ADD",-"SLTU" 516 r, _ := bits.Add64(x, y, ci) 517 return r 518} 519 520func Add64M(p, q, r *[3]uint64) { 521 var c uint64 522 r[0], c = bits.Add64(p[0], q[0], c) 523 // arm64:"ADCS",-"ADD\t",-"CMP" 524 // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" 525 // ppc64x: -"ADDC", "ADDE", -"ADDZE" 526 // s390x:"ADDE",-"ADDC\t[$]-1," 527 r[1], c = bits.Add64(p[1], q[1], c) 528 r[2], c = bits.Add64(p[2], q[2], c) 529} 530 531func Add64M0(p, q, r *[3]uint64) { 532 var c uint64 533 r[0], c = bits.Add64(p[0], q[0], 0) 534 // ppc64x: -"ADDC", -"ADDE", "ADDZE\tR[1-9]" 535 r[1], c = bits.Add64(p[1], 0, c) 536 // ppc64x: -"ADDC", "ADDE", -"ADDZE" 537 r[2], c = bits.Add64(p[2], p[2], c) 538} 539 540func Add64MSaveC(p, q, r, c *[2]uint64) { 541 // ppc64x: "ADDC\tR", "ADDZE" 542 r[0], c[0] = bits.Add64(p[0], q[0], 0) 543 // ppc64x: "ADDC\t[$]-1", "ADDE", "ADDZE" 544 r[1], c[1] = bits.Add64(p[1], q[1], c[0]) 545} 546 547func Add64PanicOnOverflowEQ(a, b uint64) uint64 { 548 r, c := bits.Add64(a, b, 0) 549 // s390x:"BRC\t[$]3,",-"ADDE" 550 if c == 1 { 551 panic("overflow") 552 } 553 return r 554} 555 556func Add64PanicOnOverflowNE(a, b uint64) uint64 { 557 r, c := bits.Add64(a, b, 0) 558 // s390x:"BRC\t[$]3,",-"ADDE" 559 if c != 0 { 560 panic("overflow") 561 } 562 return r 563} 564 565func Add64PanicOnOverflowGT(a, b uint64) uint64 { 566 r, c := bits.Add64(a, b, 0) 567 // s390x:"BRC\t[$]3,",-"ADDE" 568 if c > 0 { 569 panic("overflow") 570 } 571 return r 572} 573 574func Add64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { 575 var r [2]uint64 576 var c uint64 577 r[0], c = bits.Add64(a[0], b[0], c) 578 r[1], c = bits.Add64(a[1], b[1], c) 579 // s390x:"BRC\t[$]3," 580 if c == 1 { 581 panic("overflow") 582 } 583 return r 584} 585 586func Add64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { 587 var r [2]uint64 588 var c uint64 589 r[0], c = bits.Add64(a[0], b[0], c) 590 r[1], c = bits.Add64(a[1], b[1], c) 591 // s390x:"BRC\t[$]3," 592 if c != 0 { 593 panic("overflow") 594 } 595 return r 596} 597 598func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { 599 var r [2]uint64 600 var c uint64 601 r[0], c = bits.Add64(a[0], b[0], c) 602 r[1], c = bits.Add64(a[1], b[1], c) 603 // s390x:"BRC\t[$]3," 604 if c > 0 { 605 panic("overflow") 606 } 607 return r 608} 609 610// Verify independent carry chain operations are scheduled efficiently 611// and do not cause unnecessary save/restore of the CA bit. 612// 613// This is an example of why CarryChainTail priority must be lower 614// (earlier in the block) than Memory. f[0]=f1 could be scheduled 615// after the first two lower 64 bit limb adds, but before either 616// high 64 bit limbs are added. 617// 618// This is what happened on PPC64 when compiling 619// crypto/internal/edwards25519/field.feMulGeneric. 620func Add64MultipleChains(a, b, c, d [2]uint64) { 621 var cx, d1, d2 uint64 622 a1, a2 := a[0], a[1] 623 b1, b2 := b[0], b[1] 624 c1, c2 := c[0], c[1] 625 626 // ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" 627 d1, cx = bits.Add64(a1, b1, 0) 628 // ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER" 629 d2, _ = bits.Add64(a2, b2, cx) 630 631 // ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" 632 d1, cx = bits.Add64(c1, d1, 0) 633 // ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER" 634 d2, _ = bits.Add64(c2, d2, cx) 635 d[0] = d1 636 d[1] = d2 637} 638 639// --------------- // 640// bits.Sub* // 641// --------------- // 642 643func Sub(x, y, ci uint) (r, co uint) { 644 // amd64:"NEGL","SBBQ","NEGQ" 645 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 646 // loong64:"SUBV","SGTU" 647 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 648 // s390x:"SUBE" 649 // mips64:"SUBV","SGTU" 650 // riscv64: "SUB","SLTU" 651 return bits.Sub(x, y, ci) 652} 653 654func SubC(x, ci uint) (r, co uint) { 655 // amd64:"NEGL","SBBQ","NEGQ" 656 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 657 // loong64:"SUBV","SGTU" 658 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 659 // s390x:"SUBE" 660 // mips64:"SUBV","SGTU" 661 // riscv64: "SUB","SLTU" 662 return bits.Sub(x, 7, ci) 663} 664 665func SubZ(x, y uint) (r, co uint) { 666 // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" 667 // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" 668 // loong64:"SUBV","SGTU" 669 // ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG" 670 // s390x:"SUBC" 671 // mips64:"SUBV","SGTU" 672 // riscv64: "SUB","SLTU" 673 return bits.Sub(x, y, 0) 674} 675 676func SubR(x, y, ci uint) uint { 677 // amd64:"NEGL","SBBQ",-"NEGQ" 678 // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" 679 // loong64:"SUBV",-"SGTU" 680 // ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG" 681 // s390x:"SUBE" 682 // riscv64: "SUB",-"SLTU" 683 r, _ := bits.Sub(x, y, ci) 684 return r 685} 686func SubM(p, q, r *[3]uint) { 687 var c uint 688 r[0], c = bits.Sub(p[0], q[0], c) 689 // amd64:"SBBQ",-"NEGL",-"NEGQ" 690 // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" 691 // ppc64x:-"SUBC", "SUBE", -"SUBZE", -"NEG" 692 // s390x:"SUBE" 693 r[1], c = bits.Sub(p[1], q[1], c) 694 r[2], c = bits.Sub(p[2], q[2], c) 695} 696 697func Sub64(x, y, ci uint64) (r, co uint64) { 698 // amd64:"NEGL","SBBQ","NEGQ" 699 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 700 // loong64:"SUBV","SGTU" 701 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 702 // s390x:"SUBE" 703 // mips64:"SUBV","SGTU" 704 // riscv64: "SUB","SLTU" 705 return bits.Sub64(x, y, ci) 706} 707 708func Sub64C(x, ci uint64) (r, co uint64) { 709 // amd64:"NEGL","SBBQ","NEGQ" 710 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 711 // loong64:"SUBV","SGTU" 712 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 713 // s390x:"SUBE" 714 // mips64:"SUBV","SGTU" 715 // riscv64: "SUB","SLTU" 716 return bits.Sub64(x, 7, ci) 717} 718 719func Sub64Z(x, y uint64) (r, co uint64) { 720 // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" 721 // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" 722 // loong64:"SUBV","SGTU" 723 // ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG" 724 // s390x:"SUBC" 725 // mips64:"SUBV","SGTU" 726 // riscv64: "SUB","SLTU" 727 return bits.Sub64(x, y, 0) 728} 729 730func Sub64R(x, y, ci uint64) uint64 { 731 // amd64:"NEGL","SBBQ",-"NEGQ" 732 // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" 733 // loong64:"SUBV",-"SGTU" 734 // ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG" 735 // s390x:"SUBE" 736 // riscv64: "SUB",-"SLTU" 737 r, _ := bits.Sub64(x, y, ci) 738 return r 739} 740func Sub64M(p, q, r *[3]uint64) { 741 var c uint64 742 r[0], c = bits.Sub64(p[0], q[0], c) 743 // amd64:"SBBQ",-"NEGL",-"NEGQ" 744 // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" 745 // s390x:"SUBE" 746 r[1], c = bits.Sub64(p[1], q[1], c) 747 r[2], c = bits.Sub64(p[2], q[2], c) 748} 749 750func Sub64MSaveC(p, q, r, c *[2]uint64) { 751 // ppc64x:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG" 752 r[0], c[0] = bits.Sub64(p[0], q[0], 0) 753 // ppc64x:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG" 754 r[1], c[1] = bits.Sub64(p[1], q[1], c[0]) 755} 756 757func Sub64PanicOnOverflowEQ(a, b uint64) uint64 { 758 r, b := bits.Sub64(a, b, 0) 759 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 760 if b == 1 { 761 panic("overflow") 762 } 763 return r 764} 765 766func Sub64PanicOnOverflowNE(a, b uint64) uint64 { 767 r, b := bits.Sub64(a, b, 0) 768 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 769 if b != 0 { 770 panic("overflow") 771 } 772 return r 773} 774 775func Sub64PanicOnOverflowGT(a, b uint64) uint64 { 776 r, b := bits.Sub64(a, b, 0) 777 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 778 if b > 0 { 779 panic("overflow") 780 } 781 return r 782} 783 784func Sub64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { 785 var r [2]uint64 786 var c uint64 787 r[0], c = bits.Sub64(a[0], b[0], c) 788 r[1], c = bits.Sub64(a[1], b[1], c) 789 // s390x:"BRC\t[$]12," 790 if c == 1 { 791 panic("overflow") 792 } 793 return r 794} 795 796func Sub64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { 797 var r [2]uint64 798 var c uint64 799 r[0], c = bits.Sub64(a[0], b[0], c) 800 r[1], c = bits.Sub64(a[1], b[1], c) 801 // s390x:"BRC\t[$]12," 802 if c != 0 { 803 panic("overflow") 804 } 805 return r 806} 807 808func Sub64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { 809 var r [2]uint64 810 var c uint64 811 r[0], c = bits.Sub64(a[0], b[0], c) 812 r[1], c = bits.Sub64(a[1], b[1], c) 813 // s390x:"BRC\t[$]12," 814 if c > 0 { 815 panic("overflow") 816 } 817 return r 818} 819 820// --------------- // 821// bits.Mul* // 822// --------------- // 823 824func Mul(x, y uint) (hi, lo uint) { 825 // amd64:"MULQ" 826 // arm64:"UMULH","MUL" 827 // ppc64x:"MULHDU","MULLD" 828 // s390x:"MLGR" 829 // mips64: "MULVU" 830 // riscv64:"MULHU","MUL" 831 return bits.Mul(x, y) 832} 833 834func Mul64(x, y uint64) (hi, lo uint64) { 835 // amd64:"MULQ" 836 // arm64:"UMULH","MUL" 837 // ppc64x:"MULHDU","MULLD" 838 // s390x:"MLGR" 839 // mips64: "MULVU" 840 // riscv64:"MULHU","MUL" 841 return bits.Mul64(x, y) 842} 843 844func Mul64HiOnly(x, y uint64) uint64 { 845 // arm64:"UMULH",-"MUL" 846 // riscv64:"MULHU",-"MUL\t" 847 hi, _ := bits.Mul64(x, y) 848 return hi 849} 850 851func Mul64LoOnly(x, y uint64) uint64 { 852 // arm64:"MUL",-"UMULH" 853 // riscv64:"MUL\t",-"MULHU" 854 _, lo := bits.Mul64(x, y) 855 return lo 856} 857 858// --------------- // 859// bits.Div* // 860// --------------- // 861 862func Div(hi, lo, x uint) (q, r uint) { 863 // amd64:"DIVQ" 864 return bits.Div(hi, lo, x) 865} 866 867func Div32(hi, lo, x uint32) (q, r uint32) { 868 // arm64:"ORR","UDIV","MSUB",-"UREM" 869 return bits.Div32(hi, lo, x) 870} 871 872func Div64(hi, lo, x uint64) (q, r uint64) { 873 // amd64:"DIVQ" 874 return bits.Div64(hi, lo, x) 875} 876 877func Div64degenerate(x uint64) (q, r uint64) { 878 // amd64:-"DIVQ" 879 return bits.Div64(0, x, 5) 880} 881