1// Copyright (c) 2017, Google Inc. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15// delocate performs several transformations of textual assembly code. See 16// crypto/fipsmodule/FIPS.md for an overview. 17package main 18 19import ( 20 "bytes" 21 "errors" 22 "flag" 23 "fmt" 24 "os" 25 "os/exec" 26 "path/filepath" 27 "sort" 28 "strconv" 29 "strings" 30 31 "boringssl.googlesource.com/boringssl/util/ar" 32 "boringssl.googlesource.com/boringssl/util/fipstools/fipscommon" 33) 34 35// inputFile represents a textual assembly file. 36type inputFile struct { 37 path string 38 // index is a unique identifier given to this file. It's used for 39 // mapping local symbols. 40 index int 41 // isArchive indicates that the input should be processed as an ar 42 // file. 43 isArchive bool 44 // contents contains the contents of the file. 45 contents string 46 // ast points to the head of the syntax tree. 47 ast *node32 48} 49 50type stringWriter interface { 51 WriteString(string) (int, error) 52} 53 54type processorType int 55 56const ( 57 x86_64 processorType = iota + 1 58 aarch64 59) 60 61// delocation holds the state needed during a delocation operation. 62type delocation struct { 63 processor processorType 64 output stringWriter 65 // commentIndicator starts a comment, e.g. "//" or "#" 66 commentIndicator string 67 68 // symbols is the set of symbols defined in the module. 69 symbols map[string]struct{} 70 // redirectors maps from out-call symbol name to the name of a 71 // redirector function for that symbol. E.g. “memcpy” -> 72 // “bcm_redirector_memcpy”. 73 redirectors map[string]string 74 // bssAccessorsNeeded maps from a BSS symbol name to the symbol that 75 // should be used to reference it. E.g. “P384_data_storage” -> 76 // “P384_data_storage”. 77 bssAccessorsNeeded map[string]string 78 // gotExternalsNeeded is a set of symbol names for which we need 79 // “delta” symbols: symbols that contain the offset from their location 80 // to the memory in question. 81 gotExternalsNeeded map[string]struct{} 82 // gotDeltaNeeded is true if the code needs to load the value of 83 // _GLOBAL_OFFSET_TABLE_. 84 gotDeltaNeeded bool 85 // gotOffsetsNeeded contains the symbols whose @GOT offsets are needed. 86 gotOffsetsNeeded map[string]struct{} 87 // gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed. 88 gotOffOffsetsNeeded map[string]struct{} 89 90 currentInput inputFile 91} 92 93func (d *delocation) contents(node *node32) string { 94 return d.currentInput.contents[node.begin:node.end] 95} 96 97// writeNode writes out an AST node. 98func (d *delocation) writeNode(node *node32) { 99 if _, err := d.output.WriteString(d.contents(node)); err != nil { 100 panic(err) 101 } 102} 103 104func (d *delocation) writeCommentedNode(node *node32) { 105 line := d.contents(node) 106 if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil { 107 panic(err) 108 } 109} 110 111func locateError(err error, with *node32, in inputFile) error { 112 posMap := translatePositions([]rune(in.contents), []int{int(with.begin)}) 113 var line int 114 for _, pos := range posMap { 115 line = pos.line 116 } 117 118 return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err) 119} 120 121func (d *delocation) processInput(input inputFile) (err error) { 122 d.currentInput = input 123 124 var origStatement *node32 125 defer func() { 126 if err := recover(); err != nil { 127 panic(locateError(fmt.Errorf("%s", err), origStatement, input)) 128 } 129 }() 130 131 for statement := input.ast.up; statement != nil; statement = statement.next { 132 assertNodeType(statement, ruleStatement) 133 origStatement = statement 134 135 node := skipWS(statement.up) 136 if node == nil { 137 d.writeNode(statement) 138 continue 139 } 140 141 switch node.pegRule { 142 case ruleGlobalDirective, ruleComment, ruleLocationDirective: 143 d.writeNode(statement) 144 case ruleDirective: 145 statement, err = d.processDirective(statement, node.up) 146 case ruleLabelContainingDirective: 147 statement, err = d.processLabelContainingDirective(statement, node.up) 148 case ruleLabel: 149 statement, err = d.processLabel(statement, node.up) 150 case ruleInstruction: 151 switch d.processor { 152 case x86_64: 153 statement, err = d.processIntelInstruction(statement, node.up) 154 case aarch64: 155 statement, err = d.processAarch64Instruction(statement, node.up) 156 default: 157 panic("unknown processor") 158 } 159 default: 160 panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule])) 161 } 162 163 if err != nil { 164 return locateError(err, origStatement, input) 165 } 166 } 167 168 return nil 169} 170 171func (d *delocation) processDirective(statement, directive *node32) (*node32, error) { 172 assertNodeType(directive, ruleDirectiveName) 173 directiveName := d.contents(directive) 174 175 var args []string 176 forEachPath(directive, func(arg *node32) { 177 // If the argument is a quoted string, use the raw contents. 178 // (Note that this doesn't unescape the string, but that's not 179 // needed so far. 180 if arg.up != nil { 181 arg = arg.up 182 assertNodeType(arg, ruleQuotedArg) 183 if arg.up == nil { 184 args = append(args, "") 185 return 186 } 187 arg = arg.up 188 assertNodeType(arg, ruleQuotedText) 189 } 190 args = append(args, d.contents(arg)) 191 }, ruleArgs, ruleArg) 192 193 switch directiveName { 194 case "comm", "lcomm": 195 if len(args) < 1 { 196 return nil, errors.New("comm directive has no arguments") 197 } 198 d.bssAccessorsNeeded[args[0]] = args[0] 199 d.writeNode(statement) 200 201 case "data": 202 // ASAN and some versions of MSAN are adding a .data section, 203 // and adding references to symbols within it to the code. We 204 // will have to work around this in the future. 205 return nil, errors.New(".data section found in module") 206 207 case "section": 208 section := args[0] 209 210 if section == ".data.rel.ro" { 211 // In a normal build, this is an indication of a 212 // problem but any references from the module to this 213 // section will result in a relocation and thus will 214 // break the integrity check. ASAN can generate these 215 // sections and so we will likely have to work around 216 // that in the future. 217 return nil, errors.New(".data.rel.ro section found in module") 218 } 219 220 sectionType, ok := sectionType(section) 221 if !ok { 222 // Unknown sections are permitted in order to be robust 223 // to different compiler modes. 224 d.writeNode(statement) 225 break 226 } 227 228 switch sectionType { 229 case ".rodata", ".text": 230 // Move .rodata to .text so it may be accessed without 231 // a relocation. GCC with -fmerge-constants will place 232 // strings into separate sections, so we move all 233 // sections named like .rodata. Also move .text.startup 234 // so the self-test function is also in the module. 235 d.writeCommentedNode(statement) 236 d.output.WriteString(".text\n") 237 238 case ".data": 239 // See above about .data 240 return nil, errors.New(".data section found in module") 241 242 case ".init_array", ".fini_array", ".ctors", ".dtors": 243 // init_array/ctors/dtors contains function 244 // pointers to constructor/destructor 245 // functions. These contain relocations, but 246 // they're in a different section anyway. 247 d.writeNode(statement) 248 break 249 250 case ".debug", ".note": 251 d.writeNode(statement) 252 break 253 254 case ".bss": 255 d.writeNode(statement) 256 return d.handleBSS(statement) 257 } 258 259 default: 260 d.writeNode(statement) 261 } 262 263 return statement, nil 264} 265 266func (d *delocation) processSymbolExpr(expr *node32, b *strings.Builder) bool { 267 changed := false 268 assertNodeType(expr, ruleSymbolExpr) 269 270 for expr != nil { 271 atom := expr.up 272 assertNodeType(atom, ruleSymbolAtom) 273 274 for term := atom.up; term != nil; term = skipWS(term.next) { 275 if term.pegRule == ruleSymbolExpr { 276 changed = d.processSymbolExpr(term, b) || changed 277 continue 278 } 279 280 if term.pegRule != ruleLocalSymbol { 281 b.WriteString(d.contents(term)) 282 continue 283 } 284 285 oldSymbol := d.contents(term) 286 newSymbol := d.mapLocalSymbol(oldSymbol) 287 if newSymbol != oldSymbol { 288 changed = true 289 } 290 291 b.WriteString(newSymbol) 292 } 293 294 next := skipWS(atom.next) 295 if next == nil { 296 break 297 } 298 assertNodeType(next, ruleSymbolOperator) 299 b.WriteString(d.contents(next)) 300 next = skipWS(next.next) 301 assertNodeType(next, ruleSymbolExpr) 302 expr = next 303 } 304 return changed 305} 306 307func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) { 308 // The symbols within directives need to be mapped so that local 309 // symbols in two different .s inputs don't collide. 310 changed := false 311 assertNodeType(directive, ruleLabelContainingDirectiveName) 312 name := d.contents(directive) 313 314 node := directive.next 315 assertNodeType(node, ruleWS) 316 317 node = node.next 318 assertNodeType(node, ruleSymbolArgs) 319 320 var args []string 321 for node = skipWS(node.up); node != nil; node = skipWS(node.next) { 322 assertNodeType(node, ruleSymbolArg) 323 arg := node.up 324 assertNodeType(arg, ruleSymbolExpr) 325 326 var b strings.Builder 327 changed = d.processSymbolExpr(arg, &b) || changed 328 329 args = append(args, b.String()) 330 } 331 332 if !changed { 333 d.writeNode(statement) 334 } else { 335 d.writeCommentedNode(statement) 336 d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n") 337 } 338 339 return statement, nil 340} 341 342func (d *delocation) processLabel(statement, label *node32) (*node32, error) { 343 symbol := d.contents(label) 344 345 switch label.pegRule { 346 case ruleLocalLabel: 347 d.output.WriteString(symbol + ":\n") 348 case ruleLocalSymbol: 349 // symbols need to be mapped so that local symbols from two 350 // different .s inputs don't collide. 351 d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n") 352 case ruleSymbolName: 353 d.output.WriteString(localTargetName(symbol) + ":\n") 354 d.writeNode(statement) 355 default: 356 return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule]) 357 } 358 359 return statement, nil 360} 361 362// instructionArgs collects all the arguments to an instruction. 363func instructionArgs(node *node32) (argNodes []*node32) { 364 for node = skipWS(node); node != nil; node = skipWS(node.next) { 365 assertNodeType(node, ruleInstructionArg) 366 argNodes = append(argNodes, node.up) 367 } 368 369 return argNodes 370} 371 372// Aarch64 support 373 374// gotHelperName returns the name of a synthesised function that returns an 375// address from the GOT. 376func gotHelperName(symbol string) string { 377 return ".Lboringssl_loadgot_" + symbol 378} 379 380// loadAarch64Address emits instructions to put the address of |symbol| 381// (optionally adjusted by |offsetStr|) into |targetReg|. 382func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) { 383 // There are two paths here: either the symbol is known to be local in which 384 // case adr is used to get the address (within 1MiB), or a GOT reference is 385 // really needed in which case the code needs to jump to a helper function. 386 // 387 // A helper function is needed because using code appears to be the only way 388 // to load a GOT value. On other platforms we have ".quad foo@GOT" outside of 389 // the module, but on Aarch64 that results in a "COPY" relocation and linker 390 // comments suggest it's a weird hack. So, for each GOT symbol needed, we emit 391 // a function outside of the module that returns the address from the GOT in 392 // x0. 393 394 d.writeCommentedNode(statement) 395 396 _, isKnown := d.symbols[symbol] 397 isLocal := strings.HasPrefix(symbol, ".L") 398 if isKnown || isLocal || isSynthesized(symbol) { 399 if isLocal { 400 symbol = d.mapLocalSymbol(symbol) 401 } else if isKnown { 402 symbol = localTargetName(symbol) 403 } 404 405 d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n") 406 407 return statement, nil 408 } 409 410 if len(offsetStr) != 0 { 411 panic("non-zero offset for helper-based reference") 412 } 413 414 var helperFunc string 415 if symbol == "OPENSSL_armcap_P" { 416 helperFunc = ".LOPENSSL_armcap_P_addr" 417 } else { 418 // GOT helpers also dereference the GOT entry, thus the subsequent ldr 419 // instruction, which would normally do the dereferencing, needs to be 420 // dropped. GOT helpers have to include the dereference because the 421 // assembler doesn't support ":got_lo12:foo" offsets except in an ldr 422 // instruction. 423 d.gotExternalsNeeded[symbol] = struct{}{} 424 helperFunc = gotHelperName(symbol) 425 } 426 427 // Clear the red-zone. I can't find a definitive answer about whether Linux 428 // Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a 429 // 128-byte one. Thus conservatively clear a 128-byte red-zone. 430 d.output.WriteString("\tsub sp, sp, 128\n") 431 432 // Save x0 (which will be stomped by the return value) and the link register 433 // to the stack. Then save the program counter into the link register and 434 // jump to the helper function. 435 d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n") 436 d.output.WriteString("\tbl " + helperFunc + "\n") 437 438 if targetReg == "x0" { 439 // If the target happens to be x0 then restore the link register from the 440 // stack and send the saved value of x0 to the zero register. 441 d.output.WriteString("\tldp xzr, lr, [sp], #16\n") 442 } else { 443 // Otherwise move the result into place and restore registers. 444 d.output.WriteString("\tmov " + targetReg + ", x0\n") 445 d.output.WriteString("\tldp x0, lr, [sp], #16\n") 446 } 447 448 // Revert the red-zone adjustment. 449 d.output.WriteString("\tadd sp, sp, 128\n") 450 451 return statement, nil 452} 453 454func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) { 455 assertNodeType(instruction, ruleInstructionName) 456 instructionName := d.contents(instruction) 457 458 argNodes := instructionArgs(instruction.next) 459 460 switch instructionName { 461 case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg": 462 // These functions are special because they take a condition-code name as 463 // an argument and that looks like a symbol reference. 464 d.writeNode(statement) 465 return statement, nil 466 467 case "mrs": 468 // Functions that take special register names also look like a symbol 469 // reference to the parser. 470 d.writeNode(statement) 471 return statement, nil 472 473 case "adrp": 474 // adrp always generates a relocation, even when the target symbol is in the 475 // same segment, because the page-offset of the code isn't known until link 476 // time. Thus adrp instructions are turned into either adr instructions 477 // (limiting the module to 1MiB offsets) or calls to helper functions, both of 478 // which load the full address. Later instructions, which add the low 12 bits 479 // of offset, are tweaked to remove the offset since it's already included. 480 // Loads of GOT symbols are slightly more complex because it's not possible to 481 // avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr 482 // instruction, which would normally do the dereferencing, is dropped 483 // completely. (Or turned into a mov if it targets a different register.) 484 assertNodeType(argNodes[0], ruleRegisterOrConstant) 485 targetReg := d.contents(argNodes[0]) 486 if !strings.HasPrefix(targetReg, "x") { 487 panic("adrp targetting register " + targetReg + ", which has the wrong size") 488 } 489 490 var symbol, offset string 491 switch argNodes[1].pegRule { 492 case ruleGOTSymbolOffset: 493 symbol = d.contents(argNodes[1].up) 494 case ruleMemoryRef: 495 assertNodeType(argNodes[1].up, ruleSymbolRef) 496 node, empty := d.gatherOffsets(argNodes[1].up.up, "") 497 if len(empty) != 0 { 498 panic("prefix offsets found for adrp") 499 } 500 symbol = d.contents(node) 501 _, offset = d.gatherOffsets(node.next, "") 502 default: 503 panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule]) 504 } 505 506 return d.loadAarch64Address(statement, targetReg, symbol, offset) 507 } 508 509 var args []string 510 changed := false 511 512 for _, arg := range argNodes { 513 fullArg := arg 514 515 switch arg.pegRule { 516 case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak: 517 args = append(args, d.contents(fullArg)) 518 519 case ruleGOTSymbolOffset: 520 // These should only be arguments to adrp and thus unreachable. 521 panic("unreachable") 522 523 case ruleMemoryRef: 524 ref := arg.up 525 526 switch ref.pegRule { 527 case ruleSymbolRef: 528 // This is a branch. Either the target needs to be written to a local 529 // version of the symbol to ensure that no relocations are emitted, or 530 // it needs to jump to a redirector function. 531 symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up) 532 changed = didChange 533 534 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 535 symbol = localTargetName(symbol) 536 changed = true 537 } else if !symbolIsLocal && !isSynthesized(symbol) { 538 redirector := redirectorName(symbol) 539 d.redirectors[symbol] = redirector 540 symbol = redirector 541 changed = true 542 } else if didChange && symbolIsLocal && len(offset) > 0 { 543 // didChange is set when the inputFile index is not 0; which is the index of the 544 // first file copied to the output, which is the generated assembly of bcm.c. 545 // In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index) 546 // in order to ensure they don't collide. `index` gets incremented per file. 547 // If there is offset after the symbol, append the `offset`. 548 symbol = symbol + offset 549 } 550 551 args = append(args, symbol) 552 553 case ruleARMBaseIndexScale: 554 parts := ref.up 555 assertNodeType(parts, ruleARMRegister) 556 baseAddrReg := d.contents(parts) 557 parts = skipWS(parts.next) 558 559 // Only two forms need special handling. First there's memory references 560 // like "[x*, :got_lo12:foo]". The base register here will have been the 561 // target of an adrp instruction to load the page address, but the adrp 562 // will have turned into loading the full address *and dereferencing it*, 563 // above. Thus this instruction needs to be dropped otherwise we'll be 564 // dereferencing twice. 565 // 566 // Second there are forms like "[x*, :lo12:foo]" where the code has used 567 // adrp to load the page address into x*. That adrp will have been turned 568 // into loading the full address so just the offset needs to be dropped. 569 570 if parts != nil { 571 if parts.pegRule == ruleARMGOTLow12 { 572 if instructionName != "ldr" { 573 panic("Symbol reference outside of ldr instruction") 574 } 575 576 if skipWS(parts.next) != nil || parts.up.next != nil { 577 panic("can't handle tweak or post-increment with symbol references") 578 } 579 580 // The GOT helper already dereferenced the entry so, at most, just a mov 581 // is needed to put things in the right register. 582 d.writeCommentedNode(statement) 583 if baseAddrReg != args[0] { 584 d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n") 585 } 586 return statement, nil 587 } else if parts.pegRule == ruleLow12BitsSymbolRef { 588 if instructionName != "ldr" { 589 panic("Symbol reference outside of ldr instruction") 590 } 591 592 if skipWS(parts.next) != nil || parts.up.next != nil { 593 panic("can't handle tweak or post-increment with symbol references") 594 } 595 596 // Suppress the offset; adrp loaded the full address. 597 args = append(args, "["+baseAddrReg+"]") 598 changed = true 599 continue 600 } 601 } 602 603 args = append(args, d.contents(fullArg)) 604 605 case ruleLow12BitsSymbolRef: 606 // These are the second instruction in a pair: 607 // adrp x0, symbol // Load the page address into x0 608 // add x1, x0, :lo12:symbol // Adds the page offset. 609 // 610 // The adrp instruction will have been turned into a sequence that loads 611 // the full address, above, thus the offset is turned into zero. If that 612 // results in the instruction being a nop, then it is deleted. 613 if instructionName != "add" { 614 panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName)) 615 } 616 617 if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") { 618 panic("address arithmetic with incorrectly sized register") 619 } 620 621 if args[0] == args[1] { 622 d.writeCommentedNode(statement) 623 return statement, nil 624 } 625 626 args = append(args, "#0") 627 changed = true 628 629 default: 630 panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule])) 631 } 632 633 default: 634 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 635 } 636 } 637 638 if changed { 639 d.writeCommentedNode(statement) 640 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 641 d.output.WriteString(replacement) 642 } else { 643 d.writeNode(statement) 644 } 645 646 return statement, nil 647} 648 649func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) { 650 for symRef != nil && symRef.pegRule == ruleOffset { 651 offset := d.contents(symRef) 652 if offset[0] != '+' && offset[0] != '-' { 653 offset = "+" + offset 654 } 655 offsets = offsets + offset 656 symRef = symRef.next 657 } 658 return symRef, offsets 659} 660 661func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) { 662 if memRef.pegRule != ruleSymbolRef { 663 return "", "", "", false, false, memRef 664 } 665 666 symRef := memRef.up 667 nextRef = memRef.next 668 669 // (Offset* '+')? 670 symRef, offset = d.gatherOffsets(symRef, offset) 671 672 // (LocalSymbol / SymbolName) 673 symbol = d.contents(symRef) 674 if symRef.pegRule == ruleLocalSymbol { 675 symbolIsLocal = true 676 mapped := d.mapLocalSymbol(symbol) 677 if mapped != symbol { 678 symbol = mapped 679 didChange = true 680 } 681 } 682 symRef = symRef.next 683 684 // Offset* 685 symRef, offset = d.gatherOffsets(symRef, offset) 686 687 // ('@' Section / Offset*)? 688 if symRef != nil { 689 assertNodeType(symRef, ruleSection) 690 section = d.contents(symRef) 691 symRef = symRef.next 692 693 symRef, offset = d.gatherOffsets(symRef, offset) 694 } 695 696 if symRef != nil { 697 panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule])) 698 } 699 700 return 701} 702 703/* Intel */ 704 705type instructionType int 706 707const ( 708 instrPush instructionType = iota 709 instrMove 710 // instrTransformingMove is essentially a move, but it performs some 711 // transformation of the data during the process. 712 instrTransformingMove 713 instrJump 714 instrConditionalMove 715 // instrCombine merges the source and destination in some fashion, for example 716 // a 2-operand bitwise operation. 717 instrCombine 718 // instrMemoryVectorCombine is similer to instrCombine, but the source 719 // register must be a memory reference and the destination register 720 // must be a vector register. 721 instrMemoryVectorCombine 722 // instrThreeArg merges two sources into a destination in some fashion. 723 instrThreeArg 724 // instrCompare takes two arguments and writes outputs to the flags register. 725 instrCompare 726 instrOther 727) 728 729func classifyInstruction(instr string, args []*node32) instructionType { 730 switch instr { 731 case "push", "pushq": 732 if len(args) == 1 { 733 return instrPush 734 } 735 736 case "mov", "movq", "vmovq", "movsd", "vmovsd": 737 if len(args) == 2 { 738 return instrMove 739 } 740 741 case "cmovneq", "cmoveq": 742 if len(args) == 2 { 743 return instrConditionalMove 744 } 745 746 case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo": 747 if len(args) == 1 { 748 return instrJump 749 } 750 751 case "orq", "andq", "xorq": 752 if len(args) == 2 { 753 return instrCombine 754 } 755 756 case "cmpq": 757 if len(args) == 2 { 758 return instrCompare 759 } 760 761 case "sarxq", "shlxq", "shrxq": 762 if len(args) == 3 { 763 return instrThreeArg 764 } 765 766 case "vpbroadcastq": 767 if len(args) == 2 { 768 return instrTransformingMove 769 } 770 771 case "movlps", "movhps": 772 if len(args) == 2 { 773 return instrMemoryVectorCombine 774 } 775 } 776 777 return instrOther 778} 779 780func push(w stringWriter) wrapperFunc { 781 return func(k func()) { 782 w.WriteString("\tpushq %rax\n") 783 k() 784 w.WriteString("\txchg %rax, (%rsp)\n") 785 } 786} 787 788func compare(w stringWriter, instr, a, b string) wrapperFunc { 789 return func(k func()) { 790 k() 791 w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b)) 792 } 793} 794 795func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc { 796 d.gotExternalsNeeded[symbol+"@"+section] = struct{}{} 797 798 return func(k func()) { 799 if !redzoneCleared { 800 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 801 } 802 w.WriteString("\tpushf\n") 803 w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination)) 804 w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination)) 805 w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination)) 806 w.WriteString("\tpopf\n") 807 if !redzoneCleared { 808 w.WriteString("\tleaq\t128(%rsp), %rsp\n") 809 } 810 } 811} 812 813func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc { 814 return func(k func()) { 815 if !redzoneCleared { 816 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 817 defer w.WriteString("\tleaq 128(%rsp), %rsp\n") 818 } 819 w.WriteString("\tpushfq\n") 820 k() 821 w.WriteString("\tpopfq\n") 822 } 823} 824 825func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) { 826 candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"} 827 828 var reg string 829NextCandidate: 830 for _, candidate := range candidates { 831 for _, avoid := range avoidRegs { 832 if candidate == avoid { 833 continue NextCandidate 834 } 835 } 836 837 reg = candidate 838 break 839 } 840 841 if len(reg) == 0 { 842 panic("too many excluded registers") 843 } 844 845 return func(k func()) { 846 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 847 w.WriteString("\tpushq " + reg + "\n") 848 k() 849 w.WriteString("\tpopq " + reg + "\n") 850 w.WriteString("\tleaq 128(%rsp), %rsp\n") 851 }, reg 852} 853 854func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc { 855 return func(k func()) { 856 k() 857 prefix := "" 858 if isAVX { 859 prefix = "v" 860 } 861 w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n") 862 } 863} 864 865func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc { 866 return func(k func()) { 867 k() 868 w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n") 869 } 870} 871 872func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 873 return func(k func()) { 874 k() 875 w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n") 876 } 877} 878 879func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc { 880 return func(k func()) { 881 k() 882 w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n") 883 } 884} 885 886func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 887 return func(k func()) { 888 k() 889 // These instructions can only read from memory, so push 890 // tempReg and read from the stack. Note we assume the red zone 891 // was previously cleared by saveRegister(). 892 w.WriteString("\tpushq " + source + "\n") 893 w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n") 894 w.WriteString("\tleaq 8(%rsp), %rsp\n") 895 } 896} 897 898func isValidLEATarget(reg string) bool { 899 return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm") 900} 901 902func undoConditionalMove(w stringWriter, instr string) wrapperFunc { 903 var invertedCondition string 904 905 switch instr { 906 case "cmoveq": 907 invertedCondition = "ne" 908 case "cmovneq": 909 invertedCondition = "e" 910 default: 911 panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr)) 912 } 913 914 return func(k func()) { 915 w.WriteString("\tj" + invertedCondition + " 999f\n") 916 k() 917 w.WriteString("999:\n") 918 } 919} 920 921func (d *delocation) isRIPRelative(node *node32) bool { 922 return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)" 923} 924 925func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) { 926 var prefix string 927 if instruction.pegRule == ruleInstructionPrefix { 928 prefix = d.contents(instruction) 929 instruction = skipWS(instruction.next) 930 } 931 932 assertNodeType(instruction, ruleInstructionName) 933 instructionName := d.contents(instruction) 934 935 argNodes := instructionArgs(instruction.next) 936 937 var wrappers wrapperStack 938 var args []string 939 changed := false 940 941Args: 942 for i, arg := range argNodes { 943 fullArg := arg 944 isIndirect := false 945 946 if arg.pegRule == ruleIndirectionIndicator { 947 arg = arg.next 948 isIndirect = true 949 } 950 951 switch arg.pegRule { 952 case ruleRegisterOrConstant, ruleLocalLabelRef: 953 args = append(args, d.contents(fullArg)) 954 955 case ruleMemoryRef: 956 symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) 957 changed = didChange 958 959 if symbol == "OPENSSL_ia32cap_P" && section == "" { 960 if instructionName != "leaq" { 961 return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName) 962 } 963 964 if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 { 965 return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName) 966 } 967 968 target := argNodes[1] 969 assertNodeType(target, ruleRegisterOrConstant) 970 reg := d.contents(target) 971 972 if !strings.HasPrefix(reg, "%r") { 973 return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg) 974 } 975 976 changed = true 977 978 // Flag-altering instructions (i.e. addq) are going to be used so the 979 // flags need to be preserved. 980 wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */)) 981 982 wrappers = append(wrappers, func(k func()) { 983 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n") 984 d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n") 985 }) 986 987 break Args 988 } 989 990 switch section { 991 case "": 992 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 993 symbol = localTargetName(symbol) 994 changed = true 995 } 996 997 case "PLT": 998 if classifyInstruction(instructionName, argNodes) != instrJump { 999 return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName) 1000 } 1001 1002 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1003 symbol = localTargetName(symbol) 1004 changed = true 1005 } else if !symbolIsLocal && !isSynthesized(symbol) { 1006 // Unknown symbol via PLT is an 1007 // out-call from the module, e.g. 1008 // memcpy. 1009 d.redirectors[symbol+"@"+section] = redirectorName(symbol) 1010 symbol = redirectorName(symbol) 1011 } 1012 1013 changed = true 1014 1015 case "GOTPCREL": 1016 if len(offset) > 0 { 1017 return nil, errors.New("loading from GOT with offset is unsupported") 1018 } 1019 if !d.isRIPRelative(memRef) { 1020 return nil, errors.New("GOT access must be IP-relative") 1021 } 1022 1023 useGOT := false 1024 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1025 symbol = localTargetName(symbol) 1026 changed = true 1027 } else if !isSynthesized(symbol) { 1028 useGOT = true 1029 } 1030 1031 classification := classifyInstruction(instructionName, argNodes) 1032 if classification != instrThreeArg && classification != instrCompare && i != 0 { 1033 return nil, errors.New("GOT access must be source operand") 1034 } 1035 1036 // Reduce the instruction to movq symbol@GOTPCREL, targetReg. 1037 var targetReg string 1038 var redzoneCleared bool 1039 switch classification { 1040 case instrPush: 1041 wrappers = append(wrappers, push(d.output)) 1042 targetReg = "%rax" 1043 case instrConditionalMove: 1044 wrappers = append(wrappers, undoConditionalMove(d.output, instructionName)) 1045 fallthrough 1046 case instrMove: 1047 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1048 targetReg = d.contents(argNodes[1]) 1049 case instrCompare: 1050 otherSource := d.contents(argNodes[i^1]) 1051 saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource}) 1052 redzoneCleared = true 1053 wrappers = append(wrappers, saveRegWrapper) 1054 if i == 0 { 1055 wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource)) 1056 } else { 1057 wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg)) 1058 } 1059 targetReg = tempReg 1060 case instrTransformingMove: 1061 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1062 targetReg = d.contents(argNodes[1]) 1063 wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg)) 1064 if isValidLEATarget(targetReg) { 1065 return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.") 1066 } 1067 case instrCombine: 1068 targetReg = d.contents(argNodes[1]) 1069 if !isValidLEATarget(targetReg) { 1070 return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers") 1071 } 1072 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg}) 1073 redzoneCleared = true 1074 wrappers = append(wrappers, saveRegWrapper) 1075 1076 wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg)) 1077 targetReg = tempReg 1078 case instrMemoryVectorCombine: 1079 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1080 targetReg = d.contents(argNodes[1]) 1081 if isValidLEATarget(targetReg) { 1082 return nil, errors.New("target register must be an XMM register") 1083 } 1084 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1085 wrappers = append(wrappers, saveRegWrapper) 1086 redzoneCleared = true 1087 wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg)) 1088 targetReg = tempReg 1089 case instrThreeArg: 1090 if n := len(argNodes); n != 3 { 1091 return nil, fmt.Errorf("three-argument instruction has %d arguments", n) 1092 } 1093 if i != 0 && i != 1 { 1094 return nil, errors.New("GOT access must be from source operand") 1095 } 1096 targetReg = d.contents(argNodes[2]) 1097 1098 otherSource := d.contents(argNodes[1]) 1099 if i == 1 { 1100 otherSource = d.contents(argNodes[0]) 1101 } 1102 1103 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource}) 1104 redzoneCleared = true 1105 wrappers = append(wrappers, saveRegWrapper) 1106 1107 if i == 0 { 1108 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg)) 1109 } else { 1110 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg)) 1111 } 1112 targetReg = tempReg 1113 default: 1114 return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName) 1115 } 1116 1117 if !isValidLEATarget(targetReg) { 1118 // Sometimes the compiler will load from the GOT to an 1119 // XMM register, which is not a valid target of an LEA 1120 // instruction. 1121 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1122 wrappers = append(wrappers, saveRegWrapper) 1123 isAVX := strings.HasPrefix(instructionName, "v") 1124 wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg)) 1125 targetReg = tempReg 1126 if redzoneCleared { 1127 return nil, fmt.Errorf("internal error: Red Zone was already cleared") 1128 } 1129 redzoneCleared = true 1130 } 1131 1132 if symbol == "OPENSSL_ia32cap_P" { 1133 // Flag-altering instructions (i.e. addq) are going to be used so the 1134 // flags need to be preserved. 1135 wrappers = append(wrappers, saveFlags(d.output, redzoneCleared)) 1136 wrappers = append(wrappers, func(k func()) { 1137 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n") 1138 d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n") 1139 }) 1140 } else if useGOT { 1141 wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared)) 1142 } else { 1143 wrappers = append(wrappers, func(k func()) { 1144 d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg)) 1145 }) 1146 } 1147 changed = true 1148 break Args 1149 1150 default: 1151 return nil, fmt.Errorf("Unknown section type %q", section) 1152 } 1153 1154 if !changed && len(section) > 0 { 1155 panic("section was not handled") 1156 } 1157 section = "" 1158 1159 argStr := "" 1160 if isIndirect { 1161 argStr += "*" 1162 } 1163 argStr += symbol 1164 argStr += offset 1165 1166 for ; memRef != nil; memRef = memRef.next { 1167 argStr += d.contents(memRef) 1168 } 1169 1170 args = append(args, argStr) 1171 1172 case ruleGOTAddress: 1173 if instructionName != "leaq" { 1174 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ used outside of lea") 1175 } 1176 if i != 0 || len(argNodes) != 2 { 1177 return nil, fmt.Errorf("Load of _GLOBAL_OFFSET_TABLE_ address didn't have expected form") 1178 } 1179 d.gotDeltaNeeded = true 1180 changed = true 1181 targetReg := d.contents(argNodes[1]) 1182 args = append(args, ".Lboringssl_got_delta(%rip)") 1183 wrappers = append(wrappers, func(k func()) { 1184 k() 1185 d.output.WriteString(fmt.Sprintf("\taddq .Lboringssl_got_delta(%%rip), %s\n", targetReg)) 1186 }) 1187 1188 case ruleGOTLocation: 1189 if instructionName != "movabsq" { 1190 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq") 1191 } 1192 if i != 0 || len(argNodes) != 2 { 1193 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form") 1194 } 1195 1196 d.gotDeltaNeeded = true 1197 changed = true 1198 instructionName = "movq" 1199 assertNodeType(arg.up, ruleLocalSymbol) 1200 baseSymbol := d.mapLocalSymbol(d.contents(arg.up)) 1201 targetReg := d.contents(argNodes[1]) 1202 args = append(args, ".Lboringssl_got_delta(%rip)") 1203 wrappers = append(wrappers, func(k func()) { 1204 k() 1205 d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg)) 1206 }) 1207 1208 case ruleGOTSymbolOffset: 1209 if instructionName != "movabsq" { 1210 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq") 1211 } 1212 if i != 0 || len(argNodes) != 2 { 1213 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form") 1214 } 1215 1216 assertNodeType(arg.up, ruleSymbolName) 1217 symbol := d.contents(arg.up) 1218 if strings.HasPrefix(symbol, ".L") { 1219 symbol = d.mapLocalSymbol(symbol) 1220 } 1221 targetReg := d.contents(argNodes[1]) 1222 1223 var prefix string 1224 isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF") 1225 if isGOTOFF { 1226 prefix = "gotoff" 1227 d.gotOffOffsetsNeeded[symbol] = struct{}{} 1228 } else { 1229 prefix = "got" 1230 d.gotOffsetsNeeded[symbol] = struct{}{} 1231 } 1232 changed = true 1233 1234 wrappers = append(wrappers, func(k func()) { 1235 // Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time 1236 // of writing) emits 64-bit relocations anyway, so the following four bytes 1237 // get stomped. Thus we use 64-bit offsets. 1238 d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg)) 1239 }) 1240 1241 default: 1242 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 1243 } 1244 } 1245 1246 if changed { 1247 d.writeCommentedNode(statement) 1248 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 1249 if len(prefix) != 0 { 1250 replacement = "\t" + prefix + replacement 1251 } 1252 wrappers.do(func() { 1253 d.output.WriteString(replacement) 1254 }) 1255 } else { 1256 d.writeNode(statement) 1257 } 1258 1259 return statement, nil 1260} 1261 1262func (d *delocation) handleBSS(statement *node32) (*node32, error) { 1263 lastStatement := statement 1264 for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next { 1265 node := skipWS(statement.up) 1266 if node == nil { 1267 d.writeNode(statement) 1268 continue 1269 } 1270 1271 switch node.pegRule { 1272 case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective: 1273 d.writeNode(statement) 1274 1275 case ruleDirective: 1276 directive := node.up 1277 assertNodeType(directive, ruleDirectiveName) 1278 directiveName := d.contents(directive) 1279 if directiveName == "text" || directiveName == "section" || directiveName == "data" { 1280 return lastStatement, nil 1281 } 1282 d.writeNode(statement) 1283 1284 case ruleLabel: 1285 label := node.up 1286 d.writeNode(statement) 1287 1288 if label.pegRule != ruleLocalSymbol { 1289 symbol := d.contents(label) 1290 localSymbol := localTargetName(symbol) 1291 d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol)) 1292 1293 d.bssAccessorsNeeded[symbol] = localSymbol 1294 } 1295 1296 case ruleLabelContainingDirective: 1297 var err error 1298 statement, err = d.processLabelContainingDirective(statement, node.up) 1299 if err != nil { 1300 return nil, err 1301 } 1302 1303 default: 1304 return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement)) 1305 } 1306 } 1307 1308 return lastStatement, nil 1309} 1310 1311func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) { 1312 w.WriteString(".p2align 2\n") 1313 w.WriteString(".hidden " + funcName + "\n") 1314 w.WriteString(".type " + funcName + ", @function\n") 1315 w.WriteString(funcName + ":\n") 1316 w.WriteString(".cfi_startproc\n") 1317 // We insert a landing pad (`bti c` instruction) unconditionally at the beginning of 1318 // every generated function so that they can be called indirectly (with `blr` or 1319 // `br x16/x17`). The instruction is encoded in the HINT space as `hint #34` and is 1320 // a no-op on machines or program states not supporting BTI (Branch Target Identification). 1321 // None of the generated function bodies call other functions (with bl or blr), so we only 1322 // insert a landing pad instead of signing and validating $lr with `paciasp` and `autiasp`. 1323 // Normally we would also generate a .note.gnu.property section to annotate the assembly 1324 // file as BTI-compatible, but if the input assembly files are BTI-compatible, they should 1325 // already have those sections so there is no need to add an extra one ourselves. 1326 w.WriteString("\thint #34 // bti c\n") 1327 writeContents(w) 1328 w.WriteString(".cfi_endproc\n") 1329 w.WriteString(".size " + funcName + ", .-" + funcName + "\n") 1330} 1331 1332func transform(w stringWriter, inputs []inputFile) error { 1333 // symbols contains all defined symbols. 1334 symbols := make(map[string]struct{}) 1335 // fileNumbers is the set of IDs seen in .file directives. 1336 fileNumbers := make(map[int]struct{}) 1337 // maxObservedFileNumber contains the largest seen file number in a 1338 // .file directive. Zero is not a valid number. 1339 maxObservedFileNumber := 0 1340 // fileDirectivesContainMD5 is true if the compiler is outputting MD5 1341 // checksums in .file directives. If it does so, then this script needs 1342 // to match that behaviour otherwise warnings result. 1343 fileDirectivesContainMD5 := false 1344 1345 // OPENSSL_ia32cap_get will be synthesized by this script. 1346 symbols["OPENSSL_ia32cap_get"] = struct{}{} 1347 1348 for _, input := range inputs { 1349 forEachPath(input.ast.up, func(node *node32) { 1350 symbol := input.contents[node.begin:node.end] 1351 if _, ok := symbols[symbol]; ok { 1352 panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path)) 1353 } 1354 symbols[symbol] = struct{}{} 1355 }, ruleStatement, ruleLabel, ruleSymbolName) 1356 1357 forEachPath(input.ast.up, func(node *node32) { 1358 assertNodeType(node, ruleLocationDirective) 1359 directive := input.contents[node.begin:node.end] 1360 if !strings.HasPrefix(directive, ".file") { 1361 return 1362 } 1363 parts := strings.Fields(directive) 1364 if len(parts) == 2 { 1365 // This is a .file directive with just a 1366 // filename. Clang appears to generate just one 1367 // of these at the beginning of the output for 1368 // the compilation unit. Ignore it. 1369 return 1370 } 1371 fileNo, err := strconv.Atoi(parts[1]) 1372 if err != nil { 1373 panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive)) 1374 } 1375 1376 if _, ok := fileNumbers[fileNo]; ok { 1377 panic(fmt.Sprintf("Duplicate file number %d observed", fileNo)) 1378 } 1379 fileNumbers[fileNo] = struct{}{} 1380 1381 if fileNo > maxObservedFileNumber { 1382 maxObservedFileNumber = fileNo 1383 } 1384 1385 for _, token := range parts[2:] { 1386 if token == "md5" { 1387 fileDirectivesContainMD5 = true 1388 } 1389 } 1390 }, ruleStatement, ruleLocationDirective) 1391 } 1392 1393 processor := x86_64 1394 if len(inputs) > 0 { 1395 processor = detectProcessor(inputs[0]) 1396 } 1397 1398 commentIndicator := "#" 1399 if processor == aarch64 { 1400 commentIndicator = "//" 1401 } 1402 1403 d := &delocation{ 1404 symbols: symbols, 1405 processor: processor, 1406 commentIndicator: commentIndicator, 1407 output: w, 1408 redirectors: make(map[string]string), 1409 bssAccessorsNeeded: make(map[string]string), 1410 gotExternalsNeeded: make(map[string]struct{}), 1411 gotOffsetsNeeded: make(map[string]struct{}), 1412 gotOffOffsetsNeeded: make(map[string]struct{}), 1413 } 1414 1415 w.WriteString(".text\n") 1416 var fileTrailing string 1417 if fileDirectivesContainMD5 { 1418 fileTrailing = " md5 0x00000000000000000000000000000000" 1419 } 1420 w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing)) 1421 w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1)) 1422 w.WriteString("BORINGSSL_bcm_text_start:\n") 1423 1424 for _, input := range inputs { 1425 if err := d.processInput(input); err != nil { 1426 return err 1427 } 1428 } 1429 1430 w.WriteString(".text\n") 1431 w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1)) 1432 w.WriteString("BORINGSSL_bcm_text_end:\n") 1433 1434 // Emit redirector functions. Each is a single jump instruction. 1435 var redirectorNames []string 1436 for name := range d.redirectors { 1437 redirectorNames = append(redirectorNames, name) 1438 } 1439 sort.Strings(redirectorNames) 1440 1441 for _, name := range redirectorNames { 1442 redirector := d.redirectors[name] 1443 switch d.processor { 1444 case aarch64: 1445 writeAarch64Function(w, redirector, func(w stringWriter) { 1446 w.WriteString("\tb " + name + "\n") 1447 }) 1448 1449 case x86_64: 1450 w.WriteString(".type " + redirector + ", @function\n") 1451 w.WriteString(redirector + ":\n") 1452 w.WriteString("\tjmp\t" + name + "\n") 1453 } 1454 } 1455 1456 var accessorNames []string 1457 for accessor := range d.bssAccessorsNeeded { 1458 accessorNames = append(accessorNames, accessor) 1459 } 1460 sort.Strings(accessorNames) 1461 1462 // Emit BSS accessor functions. Each is a single LEA followed by RET. 1463 for _, name := range accessorNames { 1464 funcName := accessorName(name) 1465 target := d.bssAccessorsNeeded[name] 1466 1467 switch d.processor { 1468 case x86_64: 1469 w.WriteString(".type " + funcName + ", @function\n") 1470 w.WriteString(funcName + ":\n") 1471 w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n") 1472 1473 case aarch64: 1474 writeAarch64Function(w, funcName, func(w stringWriter) { 1475 w.WriteString("\tadrp x0, " + target + "\n") 1476 w.WriteString("\tadd x0, x0, :lo12:" + target + "\n") 1477 w.WriteString("\tret\n") 1478 }) 1479 } 1480 } 1481 1482 switch d.processor { 1483 case aarch64: 1484 externalNames := sortedSet(d.gotExternalsNeeded) 1485 for _, symbol := range externalNames { 1486 writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) { 1487 w.WriteString("\tadrp x0, :got:" + symbol + "\n") 1488 w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n") 1489 w.WriteString("\tret\n") 1490 }) 1491 } 1492 1493 writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) { 1494 w.WriteString("\tadrp x0, OPENSSL_armcap_P\n") 1495 w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n") 1496 w.WriteString("\tret\n") 1497 }) 1498 1499 case x86_64: 1500 externalNames := sortedSet(d.gotExternalsNeeded) 1501 for _, name := range externalNames { 1502 parts := strings.SplitN(name, "@", 2) 1503 symbol, section := parts[0], parts[1] 1504 w.WriteString(".type " + symbol + "_" + section + "_external, @object\n") 1505 w.WriteString(".size " + symbol + "_" + section + "_external, 8\n") 1506 w.WriteString(symbol + "_" + section + "_external:\n") 1507 // Ideally this would be .quad foo@GOTPCREL, but clang's 1508 // assembler cannot emit a 64-bit GOTPCREL relocation. Instead, 1509 // we manually sign-extend the value, knowing that the GOT is 1510 // always at the end, thus foo@GOTPCREL has a positive value. 1511 w.WriteString("\t.long " + symbol + "@" + section + "\n") 1512 w.WriteString("\t.long 0\n") 1513 } 1514 1515 w.WriteString(".type OPENSSL_ia32cap_get, @function\n") 1516 w.WriteString(".globl OPENSSL_ia32cap_get\n") 1517 w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n") 1518 w.WriteString("OPENSSL_ia32cap_get:\n") 1519 w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n") 1520 w.WriteString("\tret\n") 1521 1522 w.WriteString(".extern OPENSSL_ia32cap_P\n") 1523 w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n") 1524 w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n") 1525 w.WriteString("OPENSSL_ia32cap_addr_delta:\n") 1526 w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n") 1527 1528 if d.gotDeltaNeeded { 1529 w.WriteString(".Lboringssl_got_delta:\n") 1530 w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n") 1531 } 1532 1533 for _, name := range sortedSet(d.gotOffsetsNeeded) { 1534 w.WriteString(".Lboringssl_got_" + name + ":\n") 1535 w.WriteString("\t.quad " + name + "@GOT\n") 1536 } 1537 for _, name := range sortedSet(d.gotOffOffsetsNeeded) { 1538 w.WriteString(".Lboringssl_gotoff_" + name + ":\n") 1539 w.WriteString("\t.quad " + name + "@GOTOFF\n") 1540 } 1541 } 1542 1543 w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n") 1544 w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n") 1545 w.WriteString("BORINGSSL_bcm_text_hash:\n") 1546 for _, b := range fipscommon.UninitHashValue { 1547 w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n") 1548 } 1549 1550 return nil 1551} 1552 1553// preprocess runs source through the C preprocessor. 1554func preprocess(cppCommand []string, path string) ([]byte, error) { 1555 var args []string 1556 args = append(args, cppCommand...) 1557 args = append(args, path) 1558 1559 cpp := exec.Command(args[0], args[1:]...) 1560 cpp.Stderr = os.Stderr 1561 var result bytes.Buffer 1562 cpp.Stdout = &result 1563 1564 if err := cpp.Run(); err != nil { 1565 return nil, err 1566 } 1567 1568 return result.Bytes(), nil 1569} 1570 1571func parseInputs(inputs []inputFile, cppCommand []string) error { 1572 for i, input := range inputs { 1573 var contents string 1574 1575 if input.isArchive { 1576 arFile, err := os.Open(input.path) 1577 if err != nil { 1578 return err 1579 } 1580 defer arFile.Close() 1581 1582 ar, err := ar.ParseAR(arFile) 1583 if err != nil { 1584 return err 1585 } 1586 1587 if len(ar) != 1 { 1588 return fmt.Errorf("expected one file in archive, but found %d", len(ar)) 1589 } 1590 1591 for _, c := range ar { 1592 contents = string(c) 1593 } 1594 } else { 1595 var inBytes []byte 1596 var err error 1597 1598 if len(cppCommand) > 0 { 1599 inBytes, err = preprocess(cppCommand, input.path) 1600 } else { 1601 inBytes, err = os.ReadFile(input.path) 1602 } 1603 if err != nil { 1604 return err 1605 } 1606 1607 contents = string(inBytes) 1608 } 1609 1610 asm := Asm{Buffer: contents, Pretty: true} 1611 asm.Init() 1612 if err := asm.Parse(); err != nil { 1613 return fmt.Errorf("error while parsing %q: %s", input.path, err) 1614 } 1615 ast := asm.AST() 1616 1617 inputs[i].contents = contents 1618 inputs[i].ast = ast 1619 } 1620 1621 return nil 1622} 1623 1624// includePathFromHeaderFilePath returns an include directory path based on the 1625// path of a specific header file. It walks up the path and assumes that the 1626// include files are rooted in a directory called "openssl". 1627func includePathFromHeaderFilePath(path string) (string, error) { 1628 dir := path 1629 for { 1630 var file string 1631 dir, file = filepath.Split(dir) 1632 1633 if file == "openssl" { 1634 return dir, nil 1635 } 1636 1637 if len(dir) == 0 { 1638 break 1639 } 1640 dir = dir[:len(dir)-1] 1641 } 1642 1643 return "", fmt.Errorf("failed to find 'openssl' path element in header file path %q", path) 1644} 1645 1646func main() { 1647 // The .a file, if given, is expected to be an archive of textual 1648 // assembly sources. That's odd, but CMake really wants to create 1649 // archive files so it's the only way that we can make it work. 1650 arInput := flag.String("a", "", "Path to a .a file containing assembly sources") 1651 outFile := flag.String("o", "", "Path to output assembly") 1652 ccPath := flag.String("cc", "", "Path to the C compiler for preprocessing inputs") 1653 ccFlags := flag.String("cc-flags", "", "Flags for the C compiler when preprocessing") 1654 1655 flag.Parse() 1656 1657 if len(*outFile) == 0 { 1658 fmt.Fprintf(os.Stderr, "Must give argument to -o.\n") 1659 os.Exit(1) 1660 } 1661 1662 var inputs []inputFile 1663 if len(*arInput) > 0 { 1664 inputs = append(inputs, inputFile{ 1665 path: *arInput, 1666 index: 0, 1667 isArchive: true, 1668 }) 1669 } 1670 1671 includePaths := make(map[string]struct{}) 1672 1673 for i, path := range flag.Args() { 1674 if len(path) == 0 { 1675 continue 1676 } 1677 1678 // Header files are not processed but their path is remembered 1679 // and passed as -I arguments when invoking the preprocessor. 1680 if strings.HasSuffix(path, ".h") { 1681 dir, err := includePathFromHeaderFilePath(path) 1682 if err != nil { 1683 fmt.Fprintf(os.Stderr, "%s\n", err) 1684 os.Exit(1) 1685 } 1686 includePaths[dir] = struct{}{} 1687 continue 1688 } 1689 1690 inputs = append(inputs, inputFile{ 1691 path: path, 1692 index: i + 1, 1693 }) 1694 } 1695 1696 var cppCommand []string 1697 if len(*ccPath) > 0 { 1698 cppCommand = append(cppCommand, *ccPath) 1699 cppCommand = append(cppCommand, strings.Fields(*ccFlags)...) 1700 // Some of ccFlags might be superfluous when running the 1701 // preprocessor, but we don't want the compiler complaining that 1702 // "argument unused during compilation". 1703 cppCommand = append(cppCommand, "-Wno-unused-command-line-argument") 1704 1705 for includePath := range includePaths { 1706 cppCommand = append(cppCommand, "-I"+includePath) 1707 } 1708 1709 // -E requests only preprocessing. 1710 cppCommand = append(cppCommand, "-E") 1711 } 1712 1713 if err := parseInputs(inputs, cppCommand); err != nil { 1714 fmt.Fprintf(os.Stderr, "%s\n", err) 1715 os.Exit(1) 1716 } 1717 1718 out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 1719 if err != nil { 1720 panic(err) 1721 } 1722 defer out.Close() 1723 1724 if err := transform(out, inputs); err != nil { 1725 fmt.Fprintf(os.Stderr, "%s\n", err) 1726 os.Exit(1) 1727 } 1728} 1729 1730func forEachPath(node *node32, cb func(*node32), rules ...pegRule) { 1731 if node == nil { 1732 return 1733 } 1734 1735 if len(rules) == 0 { 1736 cb(node) 1737 return 1738 } 1739 1740 rule := rules[0] 1741 childRules := rules[1:] 1742 1743 for ; node != nil; node = node.next { 1744 if node.pegRule != rule { 1745 continue 1746 } 1747 1748 if len(childRules) == 0 { 1749 cb(node) 1750 } else { 1751 forEachPath(node.up, cb, childRules...) 1752 } 1753 } 1754} 1755 1756func skipNodes(node *node32, ruleToSkip pegRule) *node32 { 1757 for ; node != nil && node.pegRule == ruleToSkip; node = node.next { 1758 } 1759 return node 1760} 1761 1762func skipWS(node *node32) *node32 { 1763 return skipNodes(node, ruleWS) 1764} 1765 1766func assertNodeType(node *node32, expected pegRule) { 1767 if rule := node.pegRule; rule != expected { 1768 panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected])) 1769 } 1770} 1771 1772type wrapperFunc func(func()) 1773 1774type wrapperStack []wrapperFunc 1775 1776func (w *wrapperStack) do(baseCase func()) { 1777 if len(*w) == 0 { 1778 baseCase() 1779 return 1780 } 1781 1782 wrapper := (*w)[0] 1783 *w = (*w)[1:] 1784 wrapper(func() { w.do(baseCase) }) 1785} 1786 1787// localTargetName returns the name of the local target label for a global 1788// symbol named name. 1789func localTargetName(name string) string { 1790 return ".L" + name + "_local_target" 1791} 1792 1793func isSynthesized(symbol string) bool { 1794 return strings.HasSuffix(symbol, "_bss_get") || 1795 symbol == "OPENSSL_ia32cap_get" || 1796 strings.HasPrefix(symbol, "BORINGSSL_bcm_text_") 1797} 1798 1799func redirectorName(symbol string) string { 1800 return "bcm_redirector_" + symbol 1801} 1802 1803// sectionType returns the type of a section. I.e. a section called “.text.foo” 1804// is a “.text” section. 1805func sectionType(section string) (string, bool) { 1806 if len(section) == 0 || section[0] != '.' { 1807 return "", false 1808 } 1809 1810 i := strings.Index(section[1:], ".") 1811 if i != -1 { 1812 section = section[:i+1] 1813 } 1814 1815 if strings.HasPrefix(section, ".debug_") { 1816 return ".debug", true 1817 } 1818 1819 return section, true 1820} 1821 1822// accessorName returns the name of the accessor function for a BSS symbol 1823// named name. 1824func accessorName(name string) string { 1825 return name + "_bss_get" 1826} 1827 1828func (d *delocation) mapLocalSymbol(symbol string) string { 1829 if d.currentInput.index == 0 { 1830 return symbol 1831 } 1832 return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index) 1833} 1834 1835func detectProcessor(input inputFile) processorType { 1836 for statement := input.ast.up; statement != nil; statement = statement.next { 1837 node := skipNodes(statement.up, ruleWS) 1838 if node == nil || node.pegRule != ruleInstruction { 1839 continue 1840 } 1841 1842 instruction := node.up 1843 instructionName := input.contents[instruction.begin:instruction.end] 1844 1845 switch instructionName { 1846 case "movq", "call", "leaq": 1847 return x86_64 1848 case "str", "bl", "ldr", "st1": 1849 return aarch64 1850 } 1851 } 1852 1853 panic("processed entire input and didn't recognise any instructions.") 1854} 1855 1856func sortedSet(m map[string]struct{}) []string { 1857 ret := make([]string, 0, len(m)) 1858 for key := range m { 1859 ret = append(ret, key) 1860 } 1861 sort.Strings(ret) 1862 return ret 1863} 1864