xref: /aosp_15_r20/external/boringssl/src/util/fipstools/delocate/delocate.go (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1// Copyright (c) 2017, Google Inc.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// delocate performs several transformations of textual assembly code. See
16// crypto/fipsmodule/FIPS.md for an overview.
17package main
18
19import (
20	"bytes"
21	"errors"
22	"flag"
23	"fmt"
24	"os"
25	"os/exec"
26	"path/filepath"
27	"sort"
28	"strconv"
29	"strings"
30
31	"boringssl.googlesource.com/boringssl/util/ar"
32	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
33)
34
35// inputFile represents a textual assembly file.
36type inputFile struct {
37	path string
38	// index is a unique identifier given to this file. It's used for
39	// mapping local symbols.
40	index int
41	// isArchive indicates that the input should be processed as an ar
42	// file.
43	isArchive bool
44	// contents contains the contents of the file.
45	contents string
46	// ast points to the head of the syntax tree.
47	ast *node32
48}
49
50type stringWriter interface {
51	WriteString(string) (int, error)
52}
53
54type processorType int
55
56const (
57	x86_64 processorType = iota + 1
58	aarch64
59)
60
61// delocation holds the state needed during a delocation operation.
62type delocation struct {
63	processor processorType
64	output    stringWriter
65	// commentIndicator starts a comment, e.g. "//" or "#"
66	commentIndicator string
67
68	// symbols is the set of symbols defined in the module.
69	symbols map[string]struct{}
70	// redirectors maps from out-call symbol name to the name of a
71	// redirector function for that symbol. E.g. “memcpy” ->
72	// “bcm_redirector_memcpy”.
73	redirectors map[string]string
74	// bssAccessorsNeeded maps from a BSS symbol name to the symbol that
75	// should be used to reference it. E.g. “P384_data_storage” ->
76	// “P384_data_storage”.
77	bssAccessorsNeeded map[string]string
78	// gotExternalsNeeded is a set of symbol names for which we need
79	// “delta” symbols: symbols that contain the offset from their location
80	// to the memory in question.
81	gotExternalsNeeded map[string]struct{}
82	// gotDeltaNeeded is true if the code needs to load the value of
83	// _GLOBAL_OFFSET_TABLE_.
84	gotDeltaNeeded bool
85	// gotOffsetsNeeded contains the symbols whose @GOT offsets are needed.
86	gotOffsetsNeeded map[string]struct{}
87	// gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed.
88	gotOffOffsetsNeeded map[string]struct{}
89
90	currentInput inputFile
91}
92
93func (d *delocation) contents(node *node32) string {
94	return d.currentInput.contents[node.begin:node.end]
95}
96
97// writeNode writes out an AST node.
98func (d *delocation) writeNode(node *node32) {
99	if _, err := d.output.WriteString(d.contents(node)); err != nil {
100		panic(err)
101	}
102}
103
104func (d *delocation) writeCommentedNode(node *node32) {
105	line := d.contents(node)
106	if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil {
107		panic(err)
108	}
109}
110
111func locateError(err error, with *node32, in inputFile) error {
112	posMap := translatePositions([]rune(in.contents), []int{int(with.begin)})
113	var line int
114	for _, pos := range posMap {
115		line = pos.line
116	}
117
118	return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err)
119}
120
121func (d *delocation) processInput(input inputFile) (err error) {
122	d.currentInput = input
123
124	var origStatement *node32
125	defer func() {
126		if err := recover(); err != nil {
127			panic(locateError(fmt.Errorf("%s", err), origStatement, input))
128		}
129	}()
130
131	for statement := input.ast.up; statement != nil; statement = statement.next {
132		assertNodeType(statement, ruleStatement)
133		origStatement = statement
134
135		node := skipWS(statement.up)
136		if node == nil {
137			d.writeNode(statement)
138			continue
139		}
140
141		switch node.pegRule {
142		case ruleGlobalDirective, ruleComment, ruleLocationDirective:
143			d.writeNode(statement)
144		case ruleDirective:
145			statement, err = d.processDirective(statement, node.up)
146		case ruleLabelContainingDirective:
147			statement, err = d.processLabelContainingDirective(statement, node.up)
148		case ruleLabel:
149			statement, err = d.processLabel(statement, node.up)
150		case ruleInstruction:
151			switch d.processor {
152			case x86_64:
153				statement, err = d.processIntelInstruction(statement, node.up)
154			case aarch64:
155				statement, err = d.processAarch64Instruction(statement, node.up)
156			default:
157				panic("unknown processor")
158			}
159		default:
160			panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule]))
161		}
162
163		if err != nil {
164			return locateError(err, origStatement, input)
165		}
166	}
167
168	return nil
169}
170
171func (d *delocation) processDirective(statement, directive *node32) (*node32, error) {
172	assertNodeType(directive, ruleDirectiveName)
173	directiveName := d.contents(directive)
174
175	var args []string
176	forEachPath(directive, func(arg *node32) {
177		// If the argument is a quoted string, use the raw contents.
178		// (Note that this doesn't unescape the string, but that's not
179		// needed so far.
180		if arg.up != nil {
181			arg = arg.up
182			assertNodeType(arg, ruleQuotedArg)
183			if arg.up == nil {
184				args = append(args, "")
185				return
186			}
187			arg = arg.up
188			assertNodeType(arg, ruleQuotedText)
189		}
190		args = append(args, d.contents(arg))
191	}, ruleArgs, ruleArg)
192
193	switch directiveName {
194	case "comm", "lcomm":
195		if len(args) < 1 {
196			return nil, errors.New("comm directive has no arguments")
197		}
198		d.bssAccessorsNeeded[args[0]] = args[0]
199		d.writeNode(statement)
200
201	case "data":
202		// ASAN and some versions of MSAN are adding a .data section,
203		// and adding references to symbols within it to the code. We
204		// will have to work around this in the future.
205		return nil, errors.New(".data section found in module")
206
207	case "section":
208		section := args[0]
209
210		if section == ".data.rel.ro" {
211			// In a normal build, this is an indication of a
212			// problem but any references from the module to this
213			// section will result in a relocation and thus will
214			// break the integrity check. ASAN can generate these
215			// sections and so we will likely have to work around
216			// that in the future.
217			return nil, errors.New(".data.rel.ro section found in module")
218		}
219
220		sectionType, ok := sectionType(section)
221		if !ok {
222			// Unknown sections are permitted in order to be robust
223			// to different compiler modes.
224			d.writeNode(statement)
225			break
226		}
227
228		switch sectionType {
229		case ".rodata", ".text":
230			// Move .rodata to .text so it may be accessed without
231			// a relocation. GCC with -fmerge-constants will place
232			// strings into separate sections, so we move all
233			// sections named like .rodata. Also move .text.startup
234			// so the self-test function is also in the module.
235			d.writeCommentedNode(statement)
236			d.output.WriteString(".text\n")
237
238		case ".data":
239			// See above about .data
240			return nil, errors.New(".data section found in module")
241
242		case ".init_array", ".fini_array", ".ctors", ".dtors":
243			// init_array/ctors/dtors contains function
244			// pointers to constructor/destructor
245			// functions. These contain relocations, but
246			// they're in a different section anyway.
247			d.writeNode(statement)
248			break
249
250		case ".debug", ".note":
251			d.writeNode(statement)
252			break
253
254		case ".bss":
255			d.writeNode(statement)
256			return d.handleBSS(statement)
257		}
258
259	default:
260		d.writeNode(statement)
261	}
262
263	return statement, nil
264}
265
266func (d *delocation) processSymbolExpr(expr *node32, b *strings.Builder) bool {
267	changed := false
268	assertNodeType(expr, ruleSymbolExpr)
269
270	for expr != nil {
271		atom := expr.up
272		assertNodeType(atom, ruleSymbolAtom)
273
274		for term := atom.up; term != nil; term = skipWS(term.next) {
275			if term.pegRule == ruleSymbolExpr {
276				changed = d.processSymbolExpr(term, b) || changed
277				continue
278			}
279
280			if term.pegRule != ruleLocalSymbol {
281				b.WriteString(d.contents(term))
282				continue
283			}
284
285			oldSymbol := d.contents(term)
286			newSymbol := d.mapLocalSymbol(oldSymbol)
287			if newSymbol != oldSymbol {
288				changed = true
289			}
290
291			b.WriteString(newSymbol)
292		}
293
294		next := skipWS(atom.next)
295		if next == nil {
296			break
297		}
298		assertNodeType(next, ruleSymbolOperator)
299		b.WriteString(d.contents(next))
300		next = skipWS(next.next)
301		assertNodeType(next, ruleSymbolExpr)
302		expr = next
303	}
304	return changed
305}
306
307func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) {
308	// The symbols within directives need to be mapped so that local
309	// symbols in two different .s inputs don't collide.
310	changed := false
311	assertNodeType(directive, ruleLabelContainingDirectiveName)
312	name := d.contents(directive)
313
314	node := directive.next
315	assertNodeType(node, ruleWS)
316
317	node = node.next
318	assertNodeType(node, ruleSymbolArgs)
319
320	var args []string
321	for node = skipWS(node.up); node != nil; node = skipWS(node.next) {
322		assertNodeType(node, ruleSymbolArg)
323		arg := node.up
324		assertNodeType(arg, ruleSymbolExpr)
325
326		var b strings.Builder
327		changed = d.processSymbolExpr(arg, &b) || changed
328
329		args = append(args, b.String())
330	}
331
332	if !changed {
333		d.writeNode(statement)
334	} else {
335		d.writeCommentedNode(statement)
336		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
337	}
338
339	return statement, nil
340}
341
342func (d *delocation) processLabel(statement, label *node32) (*node32, error) {
343	symbol := d.contents(label)
344
345	switch label.pegRule {
346	case ruleLocalLabel:
347		d.output.WriteString(symbol + ":\n")
348	case ruleLocalSymbol:
349		// symbols need to be mapped so that local symbols from two
350		// different .s inputs don't collide.
351		d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n")
352	case ruleSymbolName:
353		d.output.WriteString(localTargetName(symbol) + ":\n")
354		d.writeNode(statement)
355	default:
356		return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule])
357	}
358
359	return statement, nil
360}
361
362// instructionArgs collects all the arguments to an instruction.
363func instructionArgs(node *node32) (argNodes []*node32) {
364	for node = skipWS(node); node != nil; node = skipWS(node.next) {
365		assertNodeType(node, ruleInstructionArg)
366		argNodes = append(argNodes, node.up)
367	}
368
369	return argNodes
370}
371
372// Aarch64 support
373
374// gotHelperName returns the name of a synthesised function that returns an
375// address from the GOT.
376func gotHelperName(symbol string) string {
377	return ".Lboringssl_loadgot_" + symbol
378}
379
380// loadAarch64Address emits instructions to put the address of |symbol|
381// (optionally adjusted by |offsetStr|) into |targetReg|.
382func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) {
383	// There are two paths here: either the symbol is known to be local in which
384	// case adr is used to get the address (within 1MiB), or a GOT reference is
385	// really needed in which case the code needs to jump to a helper function.
386	//
387	// A helper function is needed because using code appears to be the only way
388	// to load a GOT value. On other platforms we have ".quad foo@GOT" outside of
389	// the module, but on Aarch64 that results in a "COPY" relocation and linker
390	// comments suggest it's a weird hack. So, for each GOT symbol needed, we emit
391	// a function outside of the module that returns the address from the GOT in
392	// x0.
393
394	d.writeCommentedNode(statement)
395
396	_, isKnown := d.symbols[symbol]
397	isLocal := strings.HasPrefix(symbol, ".L")
398	if isKnown || isLocal || isSynthesized(symbol) {
399		if isLocal {
400			symbol = d.mapLocalSymbol(symbol)
401		} else if isKnown {
402			symbol = localTargetName(symbol)
403		}
404
405		d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n")
406
407		return statement, nil
408	}
409
410	if len(offsetStr) != 0 {
411		panic("non-zero offset for helper-based reference")
412	}
413
414	var helperFunc string
415	if symbol == "OPENSSL_armcap_P" {
416		helperFunc = ".LOPENSSL_armcap_P_addr"
417	} else {
418		// GOT helpers also dereference the GOT entry, thus the subsequent ldr
419		// instruction, which would normally do the dereferencing, needs to be
420		// dropped. GOT helpers have to include the dereference because the
421		// assembler doesn't support ":got_lo12:foo" offsets except in an ldr
422		// instruction.
423		d.gotExternalsNeeded[symbol] = struct{}{}
424		helperFunc = gotHelperName(symbol)
425	}
426
427	// Clear the red-zone. I can't find a definitive answer about whether Linux
428	// Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a
429	// 128-byte one. Thus conservatively clear a 128-byte red-zone.
430	d.output.WriteString("\tsub sp, sp, 128\n")
431
432	// Save x0 (which will be stomped by the return value) and the link register
433	// to the stack. Then save the program counter into the link register and
434	// jump to the helper function.
435	d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n")
436	d.output.WriteString("\tbl " + helperFunc + "\n")
437
438	if targetReg == "x0" {
439		// If the target happens to be x0 then restore the link register from the
440		// stack and send the saved value of x0 to the zero register.
441		d.output.WriteString("\tldp xzr, lr, [sp], #16\n")
442	} else {
443		// Otherwise move the result into place and restore registers.
444		d.output.WriteString("\tmov " + targetReg + ", x0\n")
445		d.output.WriteString("\tldp x0, lr, [sp], #16\n")
446	}
447
448	// Revert the red-zone adjustment.
449	d.output.WriteString("\tadd sp, sp, 128\n")
450
451	return statement, nil
452}
453
454func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) {
455	assertNodeType(instruction, ruleInstructionName)
456	instructionName := d.contents(instruction)
457
458	argNodes := instructionArgs(instruction.next)
459
460	switch instructionName {
461	case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg":
462		// These functions are special because they take a condition-code name as
463		// an argument and that looks like a symbol reference.
464		d.writeNode(statement)
465		return statement, nil
466
467	case "mrs":
468		// Functions that take special register names also look like a symbol
469		// reference to the parser.
470		d.writeNode(statement)
471		return statement, nil
472
473	case "adrp":
474		// adrp always generates a relocation, even when the target symbol is in the
475		// same segment, because the page-offset of the code isn't known until link
476		// time. Thus adrp instructions are turned into either adr instructions
477		// (limiting the module to 1MiB offsets) or calls to helper functions, both of
478		// which load the full address. Later instructions, which add the low 12 bits
479		// of offset, are tweaked to remove the offset since it's already included.
480		// Loads of GOT symbols are slightly more complex because it's not possible to
481		// avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr
482		// instruction, which would normally do the dereferencing, is dropped
483		// completely. (Or turned into a mov if it targets a different register.)
484		assertNodeType(argNodes[0], ruleRegisterOrConstant)
485		targetReg := d.contents(argNodes[0])
486		if !strings.HasPrefix(targetReg, "x") {
487			panic("adrp targetting register " + targetReg + ", which has the wrong size")
488		}
489
490		var symbol, offset string
491		switch argNodes[1].pegRule {
492		case ruleGOTSymbolOffset:
493			symbol = d.contents(argNodes[1].up)
494		case ruleMemoryRef:
495			assertNodeType(argNodes[1].up, ruleSymbolRef)
496			node, empty := d.gatherOffsets(argNodes[1].up.up, "")
497			if len(empty) != 0 {
498				panic("prefix offsets found for adrp")
499			}
500			symbol = d.contents(node)
501			_, offset = d.gatherOffsets(node.next, "")
502		default:
503			panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule])
504		}
505
506		return d.loadAarch64Address(statement, targetReg, symbol, offset)
507	}
508
509	var args []string
510	changed := false
511
512	for _, arg := range argNodes {
513		fullArg := arg
514
515		switch arg.pegRule {
516		case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak:
517			args = append(args, d.contents(fullArg))
518
519		case ruleGOTSymbolOffset:
520			// These should only be arguments to adrp and thus unreachable.
521			panic("unreachable")
522
523		case ruleMemoryRef:
524			ref := arg.up
525
526			switch ref.pegRule {
527			case ruleSymbolRef:
528				// This is a branch. Either the target needs to be written to a local
529				// version of the symbol to ensure that no relocations are emitted, or
530				// it needs to jump to a redirector function.
531				symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up)
532				changed = didChange
533
534				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
535					symbol = localTargetName(symbol)
536					changed = true
537				} else if !symbolIsLocal && !isSynthesized(symbol) {
538					redirector := redirectorName(symbol)
539					d.redirectors[symbol] = redirector
540					symbol = redirector
541					changed = true
542				} else if didChange && symbolIsLocal && len(offset) > 0 {
543					// didChange is set when the inputFile index is not 0; which is the index of the
544					// first file copied to the output, which is the generated assembly of bcm.c.
545					// In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index)
546					// in order to ensure they don't collide. `index` gets incremented per file.
547					// If there is offset after the symbol, append the `offset`.
548					symbol = symbol + offset
549				}
550
551				args = append(args, symbol)
552
553			case ruleARMBaseIndexScale:
554				parts := ref.up
555				assertNodeType(parts, ruleARMRegister)
556				baseAddrReg := d.contents(parts)
557				parts = skipWS(parts.next)
558
559				// Only two forms need special handling. First there's memory references
560				// like "[x*, :got_lo12:foo]". The base register here will have been the
561				// target of an adrp instruction to load the page address, but the adrp
562				// will have turned into loading the full address *and dereferencing it*,
563				// above. Thus this instruction needs to be dropped otherwise we'll be
564				// dereferencing twice.
565				//
566				// Second there are forms like "[x*, :lo12:foo]" where the code has used
567				// adrp to load the page address into x*. That adrp will have been turned
568				// into loading the full address so just the offset needs to be dropped.
569
570				if parts != nil {
571					if parts.pegRule == ruleARMGOTLow12 {
572						if instructionName != "ldr" {
573							panic("Symbol reference outside of ldr instruction")
574						}
575
576						if skipWS(parts.next) != nil || parts.up.next != nil {
577							panic("can't handle tweak or post-increment with symbol references")
578						}
579
580						// The GOT helper already dereferenced the entry so, at most, just a mov
581						// is needed to put things in the right register.
582						d.writeCommentedNode(statement)
583						if baseAddrReg != args[0] {
584							d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n")
585						}
586						return statement, nil
587					} else if parts.pegRule == ruleLow12BitsSymbolRef {
588						if instructionName != "ldr" {
589							panic("Symbol reference outside of ldr instruction")
590						}
591
592						if skipWS(parts.next) != nil || parts.up.next != nil {
593							panic("can't handle tweak or post-increment with symbol references")
594						}
595
596						// Suppress the offset; adrp loaded the full address.
597						args = append(args, "["+baseAddrReg+"]")
598						changed = true
599						continue
600					}
601				}
602
603				args = append(args, d.contents(fullArg))
604
605			case ruleLow12BitsSymbolRef:
606				// These are the second instruction in a pair:
607				//   adrp x0, symbol           // Load the page address into x0
608				//   add x1, x0, :lo12:symbol  // Adds the page offset.
609				//
610				// The adrp instruction will have been turned into a sequence that loads
611				// the full address, above, thus the offset is turned into zero. If that
612				// results in the instruction being a nop, then it is deleted.
613				if instructionName != "add" {
614					panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName))
615				}
616
617				if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") {
618					panic("address arithmetic with incorrectly sized register")
619				}
620
621				if args[0] == args[1] {
622					d.writeCommentedNode(statement)
623					return statement, nil
624				}
625
626				args = append(args, "#0")
627				changed = true
628
629			default:
630				panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule]))
631			}
632
633		default:
634			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
635		}
636	}
637
638	if changed {
639		d.writeCommentedNode(statement)
640		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
641		d.output.WriteString(replacement)
642	} else {
643		d.writeNode(statement)
644	}
645
646	return statement, nil
647}
648
649func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
650	for symRef != nil && symRef.pegRule == ruleOffset {
651		offset := d.contents(symRef)
652		if offset[0] != '+' && offset[0] != '-' {
653			offset = "+" + offset
654		}
655		offsets = offsets + offset
656		symRef = symRef.next
657	}
658	return symRef, offsets
659}
660
661func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) {
662	if memRef.pegRule != ruleSymbolRef {
663		return "", "", "", false, false, memRef
664	}
665
666	symRef := memRef.up
667	nextRef = memRef.next
668
669	// (Offset* '+')?
670	symRef, offset = d.gatherOffsets(symRef, offset)
671
672	// (LocalSymbol / SymbolName)
673	symbol = d.contents(symRef)
674	if symRef.pegRule == ruleLocalSymbol {
675		symbolIsLocal = true
676		mapped := d.mapLocalSymbol(symbol)
677		if mapped != symbol {
678			symbol = mapped
679			didChange = true
680		}
681	}
682	symRef = symRef.next
683
684	// Offset*
685	symRef, offset = d.gatherOffsets(symRef, offset)
686
687	// ('@' Section / Offset*)?
688	if symRef != nil {
689		assertNodeType(symRef, ruleSection)
690		section = d.contents(symRef)
691		symRef = symRef.next
692
693		symRef, offset = d.gatherOffsets(symRef, offset)
694	}
695
696	if symRef != nil {
697		panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule]))
698	}
699
700	return
701}
702
703/* Intel */
704
705type instructionType int
706
707const (
708	instrPush instructionType = iota
709	instrMove
710	// instrTransformingMove is essentially a move, but it performs some
711	// transformation of the data during the process.
712	instrTransformingMove
713	instrJump
714	instrConditionalMove
715	// instrCombine merges the source and destination in some fashion, for example
716	// a 2-operand bitwise operation.
717	instrCombine
718	// instrMemoryVectorCombine is similer to instrCombine, but the source
719	// register must be a memory reference and the destination register
720	// must be a vector register.
721	instrMemoryVectorCombine
722	// instrThreeArg merges two sources into a destination in some fashion.
723	instrThreeArg
724	// instrCompare takes two arguments and writes outputs to the flags register.
725	instrCompare
726	instrOther
727)
728
729func classifyInstruction(instr string, args []*node32) instructionType {
730	switch instr {
731	case "push", "pushq":
732		if len(args) == 1 {
733			return instrPush
734		}
735
736	case "mov", "movq", "vmovq", "movsd", "vmovsd":
737		if len(args) == 2 {
738			return instrMove
739		}
740
741	case "cmovneq", "cmoveq":
742		if len(args) == 2 {
743			return instrConditionalMove
744		}
745
746	case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo":
747		if len(args) == 1 {
748			return instrJump
749		}
750
751	case "orq", "andq", "xorq":
752		if len(args) == 2 {
753			return instrCombine
754		}
755
756	case "cmpq":
757		if len(args) == 2 {
758			return instrCompare
759		}
760
761	case "sarxq", "shlxq", "shrxq":
762		if len(args) == 3 {
763			return instrThreeArg
764		}
765
766	case "vpbroadcastq":
767		if len(args) == 2 {
768			return instrTransformingMove
769		}
770
771	case "movlps", "movhps":
772		if len(args) == 2 {
773			return instrMemoryVectorCombine
774		}
775	}
776
777	return instrOther
778}
779
780func push(w stringWriter) wrapperFunc {
781	return func(k func()) {
782		w.WriteString("\tpushq %rax\n")
783		k()
784		w.WriteString("\txchg %rax, (%rsp)\n")
785	}
786}
787
788func compare(w stringWriter, instr, a, b string) wrapperFunc {
789	return func(k func()) {
790		k()
791		w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b))
792	}
793}
794
795func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc {
796	d.gotExternalsNeeded[symbol+"@"+section] = struct{}{}
797
798	return func(k func()) {
799		if !redzoneCleared {
800			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
801		}
802		w.WriteString("\tpushf\n")
803		w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination))
804		w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination))
805		w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination))
806		w.WriteString("\tpopf\n")
807		if !redzoneCleared {
808			w.WriteString("\tleaq\t128(%rsp), %rsp\n")
809		}
810	}
811}
812
813func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc {
814	return func(k func()) {
815		if !redzoneCleared {
816			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
817			defer w.WriteString("\tleaq 128(%rsp), %rsp\n")
818		}
819		w.WriteString("\tpushfq\n")
820		k()
821		w.WriteString("\tpopfq\n")
822	}
823}
824
825func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) {
826	candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"}
827
828	var reg string
829NextCandidate:
830	for _, candidate := range candidates {
831		for _, avoid := range avoidRegs {
832			if candidate == avoid {
833				continue NextCandidate
834			}
835		}
836
837		reg = candidate
838		break
839	}
840
841	if len(reg) == 0 {
842		panic("too many excluded registers")
843	}
844
845	return func(k func()) {
846		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
847		w.WriteString("\tpushq " + reg + "\n")
848		k()
849		w.WriteString("\tpopq " + reg + "\n")
850		w.WriteString("\tleaq 128(%rsp), %rsp\n")
851	}, reg
852}
853
854func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc {
855	return func(k func()) {
856		k()
857		prefix := ""
858		if isAVX {
859			prefix = "v"
860		}
861		w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n")
862	}
863}
864
865func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc {
866	return func(k func()) {
867		k()
868		w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n")
869	}
870}
871
872func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
873	return func(k func()) {
874		k()
875		w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n")
876	}
877}
878
879func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc {
880	return func(k func()) {
881		k()
882		w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n")
883	}
884}
885
886func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
887	return func(k func()) {
888		k()
889		// These instructions can only read from memory, so push
890		// tempReg and read from the stack. Note we assume the red zone
891		// was previously cleared by saveRegister().
892		w.WriteString("\tpushq " + source + "\n")
893		w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n")
894		w.WriteString("\tleaq 8(%rsp), %rsp\n")
895	}
896}
897
898func isValidLEATarget(reg string) bool {
899	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
900}
901
902func undoConditionalMove(w stringWriter, instr string) wrapperFunc {
903	var invertedCondition string
904
905	switch instr {
906	case "cmoveq":
907		invertedCondition = "ne"
908	case "cmovneq":
909		invertedCondition = "e"
910	default:
911		panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr))
912	}
913
914	return func(k func()) {
915		w.WriteString("\tj" + invertedCondition + " 999f\n")
916		k()
917		w.WriteString("999:\n")
918	}
919}
920
921func (d *delocation) isRIPRelative(node *node32) bool {
922	return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)"
923}
924
925func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) {
926	var prefix string
927	if instruction.pegRule == ruleInstructionPrefix {
928		prefix = d.contents(instruction)
929		instruction = skipWS(instruction.next)
930	}
931
932	assertNodeType(instruction, ruleInstructionName)
933	instructionName := d.contents(instruction)
934
935	argNodes := instructionArgs(instruction.next)
936
937	var wrappers wrapperStack
938	var args []string
939	changed := false
940
941Args:
942	for i, arg := range argNodes {
943		fullArg := arg
944		isIndirect := false
945
946		if arg.pegRule == ruleIndirectionIndicator {
947			arg = arg.next
948			isIndirect = true
949		}
950
951		switch arg.pegRule {
952		case ruleRegisterOrConstant, ruleLocalLabelRef:
953			args = append(args, d.contents(fullArg))
954
955		case ruleMemoryRef:
956			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
957			changed = didChange
958
959			if symbol == "OPENSSL_ia32cap_P" && section == "" {
960				if instructionName != "leaq" {
961					return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName)
962				}
963
964				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
965					return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName)
966				}
967
968				target := argNodes[1]
969				assertNodeType(target, ruleRegisterOrConstant)
970				reg := d.contents(target)
971
972				if !strings.HasPrefix(reg, "%r") {
973					return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg)
974				}
975
976				changed = true
977
978				// Flag-altering instructions (i.e. addq) are going to be used so the
979				// flags need to be preserved.
980				wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */))
981
982				wrappers = append(wrappers, func(k func()) {
983					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
984					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
985				})
986
987				break Args
988			}
989
990			switch section {
991			case "":
992				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
993					symbol = localTargetName(symbol)
994					changed = true
995				}
996
997			case "PLT":
998				if classifyInstruction(instructionName, argNodes) != instrJump {
999					return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName)
1000				}
1001
1002				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1003					symbol = localTargetName(symbol)
1004					changed = true
1005				} else if !symbolIsLocal && !isSynthesized(symbol) {
1006					// Unknown symbol via PLT is an
1007					// out-call from the module, e.g.
1008					// memcpy.
1009					d.redirectors[symbol+"@"+section] = redirectorName(symbol)
1010					symbol = redirectorName(symbol)
1011				}
1012
1013				changed = true
1014
1015			case "GOTPCREL":
1016				if len(offset) > 0 {
1017					return nil, errors.New("loading from GOT with offset is unsupported")
1018				}
1019				if !d.isRIPRelative(memRef) {
1020					return nil, errors.New("GOT access must be IP-relative")
1021				}
1022
1023				useGOT := false
1024				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1025					symbol = localTargetName(symbol)
1026					changed = true
1027				} else if !isSynthesized(symbol) {
1028					useGOT = true
1029				}
1030
1031				classification := classifyInstruction(instructionName, argNodes)
1032				if classification != instrThreeArg && classification != instrCompare && i != 0 {
1033					return nil, errors.New("GOT access must be source operand")
1034				}
1035
1036				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
1037				var targetReg string
1038				var redzoneCleared bool
1039				switch classification {
1040				case instrPush:
1041					wrappers = append(wrappers, push(d.output))
1042					targetReg = "%rax"
1043				case instrConditionalMove:
1044					wrappers = append(wrappers, undoConditionalMove(d.output, instructionName))
1045					fallthrough
1046				case instrMove:
1047					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1048					targetReg = d.contents(argNodes[1])
1049				case instrCompare:
1050					otherSource := d.contents(argNodes[i^1])
1051					saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource})
1052					redzoneCleared = true
1053					wrappers = append(wrappers, saveRegWrapper)
1054					if i == 0 {
1055						wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource))
1056					} else {
1057						wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg))
1058					}
1059					targetReg = tempReg
1060				case instrTransformingMove:
1061					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1062					targetReg = d.contents(argNodes[1])
1063					wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg))
1064					if isValidLEATarget(targetReg) {
1065						return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
1066					}
1067				case instrCombine:
1068					targetReg = d.contents(argNodes[1])
1069					if !isValidLEATarget(targetReg) {
1070						return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers")
1071					}
1072					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg})
1073					redzoneCleared = true
1074					wrappers = append(wrappers, saveRegWrapper)
1075
1076					wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg))
1077					targetReg = tempReg
1078				case instrMemoryVectorCombine:
1079					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1080					targetReg = d.contents(argNodes[1])
1081					if isValidLEATarget(targetReg) {
1082						return nil, errors.New("target register must be an XMM register")
1083					}
1084					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1085					wrappers = append(wrappers, saveRegWrapper)
1086					redzoneCleared = true
1087					wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg))
1088					targetReg = tempReg
1089				case instrThreeArg:
1090					if n := len(argNodes); n != 3 {
1091						return nil, fmt.Errorf("three-argument instruction has %d arguments", n)
1092					}
1093					if i != 0 && i != 1 {
1094						return nil, errors.New("GOT access must be from source operand")
1095					}
1096					targetReg = d.contents(argNodes[2])
1097
1098					otherSource := d.contents(argNodes[1])
1099					if i == 1 {
1100						otherSource = d.contents(argNodes[0])
1101					}
1102
1103					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource})
1104					redzoneCleared = true
1105					wrappers = append(wrappers, saveRegWrapper)
1106
1107					if i == 0 {
1108						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg))
1109					} else {
1110						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg))
1111					}
1112					targetReg = tempReg
1113				default:
1114					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
1115				}
1116
1117				if !isValidLEATarget(targetReg) {
1118					// Sometimes the compiler will load from the GOT to an
1119					// XMM register, which is not a valid target of an LEA
1120					// instruction.
1121					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1122					wrappers = append(wrappers, saveRegWrapper)
1123					isAVX := strings.HasPrefix(instructionName, "v")
1124					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg))
1125					targetReg = tempReg
1126					if redzoneCleared {
1127						return nil, fmt.Errorf("internal error: Red Zone was already cleared")
1128					}
1129					redzoneCleared = true
1130				}
1131
1132				if symbol == "OPENSSL_ia32cap_P" {
1133					// Flag-altering instructions (i.e. addq) are going to be used so the
1134					// flags need to be preserved.
1135					wrappers = append(wrappers, saveFlags(d.output, redzoneCleared))
1136					wrappers = append(wrappers, func(k func()) {
1137						d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n")
1138						d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n")
1139					})
1140				} else if useGOT {
1141					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
1142				} else {
1143					wrappers = append(wrappers, func(k func()) {
1144						d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg))
1145					})
1146				}
1147				changed = true
1148				break Args
1149
1150			default:
1151				return nil, fmt.Errorf("Unknown section type %q", section)
1152			}
1153
1154			if !changed && len(section) > 0 {
1155				panic("section was not handled")
1156			}
1157			section = ""
1158
1159			argStr := ""
1160			if isIndirect {
1161				argStr += "*"
1162			}
1163			argStr += symbol
1164			argStr += offset
1165
1166			for ; memRef != nil; memRef = memRef.next {
1167				argStr += d.contents(memRef)
1168			}
1169
1170			args = append(args, argStr)
1171
1172		case ruleGOTAddress:
1173			if instructionName != "leaq" {
1174				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ used outside of lea")
1175			}
1176			if i != 0 || len(argNodes) != 2 {
1177				return nil, fmt.Errorf("Load of _GLOBAL_OFFSET_TABLE_ address didn't have expected form")
1178			}
1179			d.gotDeltaNeeded = true
1180			changed = true
1181			targetReg := d.contents(argNodes[1])
1182			args = append(args, ".Lboringssl_got_delta(%rip)")
1183			wrappers = append(wrappers, func(k func()) {
1184				k()
1185				d.output.WriteString(fmt.Sprintf("\taddq .Lboringssl_got_delta(%%rip), %s\n", targetReg))
1186			})
1187
1188		case ruleGOTLocation:
1189			if instructionName != "movabsq" {
1190				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq")
1191			}
1192			if i != 0 || len(argNodes) != 2 {
1193				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form")
1194			}
1195
1196			d.gotDeltaNeeded = true
1197			changed = true
1198			instructionName = "movq"
1199			assertNodeType(arg.up, ruleLocalSymbol)
1200			baseSymbol := d.mapLocalSymbol(d.contents(arg.up))
1201			targetReg := d.contents(argNodes[1])
1202			args = append(args, ".Lboringssl_got_delta(%rip)")
1203			wrappers = append(wrappers, func(k func()) {
1204				k()
1205				d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg))
1206			})
1207
1208		case ruleGOTSymbolOffset:
1209			if instructionName != "movabsq" {
1210				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq")
1211			}
1212			if i != 0 || len(argNodes) != 2 {
1213				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form")
1214			}
1215
1216			assertNodeType(arg.up, ruleSymbolName)
1217			symbol := d.contents(arg.up)
1218			if strings.HasPrefix(symbol, ".L") {
1219				symbol = d.mapLocalSymbol(symbol)
1220			}
1221			targetReg := d.contents(argNodes[1])
1222
1223			var prefix string
1224			isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF")
1225			if isGOTOFF {
1226				prefix = "gotoff"
1227				d.gotOffOffsetsNeeded[symbol] = struct{}{}
1228			} else {
1229				prefix = "got"
1230				d.gotOffsetsNeeded[symbol] = struct{}{}
1231			}
1232			changed = true
1233
1234			wrappers = append(wrappers, func(k func()) {
1235				// Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time
1236				// of writing) emits 64-bit relocations anyway, so the following four bytes
1237				// get stomped. Thus we use 64-bit offsets.
1238				d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg))
1239			})
1240
1241		default:
1242			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1243		}
1244	}
1245
1246	if changed {
1247		d.writeCommentedNode(statement)
1248		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1249		if len(prefix) != 0 {
1250			replacement = "\t" + prefix + replacement
1251		}
1252		wrappers.do(func() {
1253			d.output.WriteString(replacement)
1254		})
1255	} else {
1256		d.writeNode(statement)
1257	}
1258
1259	return statement, nil
1260}
1261
1262func (d *delocation) handleBSS(statement *node32) (*node32, error) {
1263	lastStatement := statement
1264	for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next {
1265		node := skipWS(statement.up)
1266		if node == nil {
1267			d.writeNode(statement)
1268			continue
1269		}
1270
1271		switch node.pegRule {
1272		case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective:
1273			d.writeNode(statement)
1274
1275		case ruleDirective:
1276			directive := node.up
1277			assertNodeType(directive, ruleDirectiveName)
1278			directiveName := d.contents(directive)
1279			if directiveName == "text" || directiveName == "section" || directiveName == "data" {
1280				return lastStatement, nil
1281			}
1282			d.writeNode(statement)
1283
1284		case ruleLabel:
1285			label := node.up
1286			d.writeNode(statement)
1287
1288			if label.pegRule != ruleLocalSymbol {
1289				symbol := d.contents(label)
1290				localSymbol := localTargetName(symbol)
1291				d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol))
1292
1293				d.bssAccessorsNeeded[symbol] = localSymbol
1294			}
1295
1296		case ruleLabelContainingDirective:
1297			var err error
1298			statement, err = d.processLabelContainingDirective(statement, node.up)
1299			if err != nil {
1300				return nil, err
1301			}
1302
1303		default:
1304			return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement))
1305		}
1306	}
1307
1308	return lastStatement, nil
1309}
1310
1311func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) {
1312	w.WriteString(".p2align 2\n")
1313	w.WriteString(".hidden " + funcName + "\n")
1314	w.WriteString(".type " + funcName + ", @function\n")
1315	w.WriteString(funcName + ":\n")
1316	w.WriteString(".cfi_startproc\n")
1317	// We insert a landing pad (`bti c` instruction) unconditionally at the beginning of
1318	// every generated function so that they can be called indirectly (with `blr` or
1319	// `br x16/x17`). The instruction is encoded in the HINT space as `hint #34` and is
1320	// a no-op on machines or program states not supporting BTI (Branch Target Identification).
1321	// None of the generated function bodies call other functions (with bl or blr), so we only
1322	// insert a landing pad instead of signing and validating $lr with `paciasp` and `autiasp`.
1323	// Normally we would also generate a .note.gnu.property section to annotate the assembly
1324	// file as BTI-compatible, but if the input assembly files are BTI-compatible, they should
1325	// already have those sections so there is no need to add an extra one ourselves.
1326	w.WriteString("\thint #34 // bti c\n")
1327	writeContents(w)
1328	w.WriteString(".cfi_endproc\n")
1329	w.WriteString(".size " + funcName + ", .-" + funcName + "\n")
1330}
1331
1332func transform(w stringWriter, inputs []inputFile) error {
1333	// symbols contains all defined symbols.
1334	symbols := make(map[string]struct{})
1335	// fileNumbers is the set of IDs seen in .file directives.
1336	fileNumbers := make(map[int]struct{})
1337	// maxObservedFileNumber contains the largest seen file number in a
1338	// .file directive. Zero is not a valid number.
1339	maxObservedFileNumber := 0
1340	// fileDirectivesContainMD5 is true if the compiler is outputting MD5
1341	// checksums in .file directives. If it does so, then this script needs
1342	// to match that behaviour otherwise warnings result.
1343	fileDirectivesContainMD5 := false
1344
1345	// OPENSSL_ia32cap_get will be synthesized by this script.
1346	symbols["OPENSSL_ia32cap_get"] = struct{}{}
1347
1348	for _, input := range inputs {
1349		forEachPath(input.ast.up, func(node *node32) {
1350			symbol := input.contents[node.begin:node.end]
1351			if _, ok := symbols[symbol]; ok {
1352				panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path))
1353			}
1354			symbols[symbol] = struct{}{}
1355		}, ruleStatement, ruleLabel, ruleSymbolName)
1356
1357		forEachPath(input.ast.up, func(node *node32) {
1358			assertNodeType(node, ruleLocationDirective)
1359			directive := input.contents[node.begin:node.end]
1360			if !strings.HasPrefix(directive, ".file") {
1361				return
1362			}
1363			parts := strings.Fields(directive)
1364			if len(parts) == 2 {
1365				// This is a .file directive with just a
1366				// filename. Clang appears to generate just one
1367				// of these at the beginning of the output for
1368				// the compilation unit. Ignore it.
1369				return
1370			}
1371			fileNo, err := strconv.Atoi(parts[1])
1372			if err != nil {
1373				panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive))
1374			}
1375
1376			if _, ok := fileNumbers[fileNo]; ok {
1377				panic(fmt.Sprintf("Duplicate file number %d observed", fileNo))
1378			}
1379			fileNumbers[fileNo] = struct{}{}
1380
1381			if fileNo > maxObservedFileNumber {
1382				maxObservedFileNumber = fileNo
1383			}
1384
1385			for _, token := range parts[2:] {
1386				if token == "md5" {
1387					fileDirectivesContainMD5 = true
1388				}
1389			}
1390		}, ruleStatement, ruleLocationDirective)
1391	}
1392
1393	processor := x86_64
1394	if len(inputs) > 0 {
1395		processor = detectProcessor(inputs[0])
1396	}
1397
1398	commentIndicator := "#"
1399	if processor == aarch64 {
1400		commentIndicator = "//"
1401	}
1402
1403	d := &delocation{
1404		symbols:             symbols,
1405		processor:           processor,
1406		commentIndicator:    commentIndicator,
1407		output:              w,
1408		redirectors:         make(map[string]string),
1409		bssAccessorsNeeded:  make(map[string]string),
1410		gotExternalsNeeded:  make(map[string]struct{}),
1411		gotOffsetsNeeded:    make(map[string]struct{}),
1412		gotOffOffsetsNeeded: make(map[string]struct{}),
1413	}
1414
1415	w.WriteString(".text\n")
1416	var fileTrailing string
1417	if fileDirectivesContainMD5 {
1418		fileTrailing = " md5 0x00000000000000000000000000000000"
1419	}
1420	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing))
1421	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1))
1422	w.WriteString("BORINGSSL_bcm_text_start:\n")
1423
1424	for _, input := range inputs {
1425		if err := d.processInput(input); err != nil {
1426			return err
1427		}
1428	}
1429
1430	w.WriteString(".text\n")
1431	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1))
1432	w.WriteString("BORINGSSL_bcm_text_end:\n")
1433
1434	// Emit redirector functions. Each is a single jump instruction.
1435	var redirectorNames []string
1436	for name := range d.redirectors {
1437		redirectorNames = append(redirectorNames, name)
1438	}
1439	sort.Strings(redirectorNames)
1440
1441	for _, name := range redirectorNames {
1442		redirector := d.redirectors[name]
1443		switch d.processor {
1444		case aarch64:
1445			writeAarch64Function(w, redirector, func(w stringWriter) {
1446				w.WriteString("\tb " + name + "\n")
1447			})
1448
1449		case x86_64:
1450			w.WriteString(".type " + redirector + ", @function\n")
1451			w.WriteString(redirector + ":\n")
1452			w.WriteString("\tjmp\t" + name + "\n")
1453		}
1454	}
1455
1456	var accessorNames []string
1457	for accessor := range d.bssAccessorsNeeded {
1458		accessorNames = append(accessorNames, accessor)
1459	}
1460	sort.Strings(accessorNames)
1461
1462	// Emit BSS accessor functions. Each is a single LEA followed by RET.
1463	for _, name := range accessorNames {
1464		funcName := accessorName(name)
1465		target := d.bssAccessorsNeeded[name]
1466
1467		switch d.processor {
1468		case x86_64:
1469			w.WriteString(".type " + funcName + ", @function\n")
1470			w.WriteString(funcName + ":\n")
1471			w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n")
1472
1473		case aarch64:
1474			writeAarch64Function(w, funcName, func(w stringWriter) {
1475				w.WriteString("\tadrp x0, " + target + "\n")
1476				w.WriteString("\tadd x0, x0, :lo12:" + target + "\n")
1477				w.WriteString("\tret\n")
1478			})
1479		}
1480	}
1481
1482	switch d.processor {
1483	case aarch64:
1484		externalNames := sortedSet(d.gotExternalsNeeded)
1485		for _, symbol := range externalNames {
1486			writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) {
1487				w.WriteString("\tadrp x0, :got:" + symbol + "\n")
1488				w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n")
1489				w.WriteString("\tret\n")
1490			})
1491		}
1492
1493		writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) {
1494			w.WriteString("\tadrp x0, OPENSSL_armcap_P\n")
1495			w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n")
1496			w.WriteString("\tret\n")
1497		})
1498
1499	case x86_64:
1500		externalNames := sortedSet(d.gotExternalsNeeded)
1501		for _, name := range externalNames {
1502			parts := strings.SplitN(name, "@", 2)
1503			symbol, section := parts[0], parts[1]
1504			w.WriteString(".type " + symbol + "_" + section + "_external, @object\n")
1505			w.WriteString(".size " + symbol + "_" + section + "_external, 8\n")
1506			w.WriteString(symbol + "_" + section + "_external:\n")
1507			// Ideally this would be .quad foo@GOTPCREL, but clang's
1508			// assembler cannot emit a 64-bit GOTPCREL relocation. Instead,
1509			// we manually sign-extend the value, knowing that the GOT is
1510			// always at the end, thus foo@GOTPCREL has a positive value.
1511			w.WriteString("\t.long " + symbol + "@" + section + "\n")
1512			w.WriteString("\t.long 0\n")
1513		}
1514
1515		w.WriteString(".type OPENSSL_ia32cap_get, @function\n")
1516		w.WriteString(".globl OPENSSL_ia32cap_get\n")
1517		w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n")
1518		w.WriteString("OPENSSL_ia32cap_get:\n")
1519		w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n")
1520		w.WriteString("\tret\n")
1521
1522		w.WriteString(".extern OPENSSL_ia32cap_P\n")
1523		w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n")
1524		w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n")
1525		w.WriteString("OPENSSL_ia32cap_addr_delta:\n")
1526		w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n")
1527
1528		if d.gotDeltaNeeded {
1529			w.WriteString(".Lboringssl_got_delta:\n")
1530			w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n")
1531		}
1532
1533		for _, name := range sortedSet(d.gotOffsetsNeeded) {
1534			w.WriteString(".Lboringssl_got_" + name + ":\n")
1535			w.WriteString("\t.quad " + name + "@GOT\n")
1536		}
1537		for _, name := range sortedSet(d.gotOffOffsetsNeeded) {
1538			w.WriteString(".Lboringssl_gotoff_" + name + ":\n")
1539			w.WriteString("\t.quad " + name + "@GOTOFF\n")
1540		}
1541	}
1542
1543	w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n")
1544	w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n")
1545	w.WriteString("BORINGSSL_bcm_text_hash:\n")
1546	for _, b := range fipscommon.UninitHashValue {
1547		w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n")
1548	}
1549
1550	return nil
1551}
1552
1553// preprocess runs source through the C preprocessor.
1554func preprocess(cppCommand []string, path string) ([]byte, error) {
1555	var args []string
1556	args = append(args, cppCommand...)
1557	args = append(args, path)
1558
1559	cpp := exec.Command(args[0], args[1:]...)
1560	cpp.Stderr = os.Stderr
1561	var result bytes.Buffer
1562	cpp.Stdout = &result
1563
1564	if err := cpp.Run(); err != nil {
1565		return nil, err
1566	}
1567
1568	return result.Bytes(), nil
1569}
1570
1571func parseInputs(inputs []inputFile, cppCommand []string) error {
1572	for i, input := range inputs {
1573		var contents string
1574
1575		if input.isArchive {
1576			arFile, err := os.Open(input.path)
1577			if err != nil {
1578				return err
1579			}
1580			defer arFile.Close()
1581
1582			ar, err := ar.ParseAR(arFile)
1583			if err != nil {
1584				return err
1585			}
1586
1587			if len(ar) != 1 {
1588				return fmt.Errorf("expected one file in archive, but found %d", len(ar))
1589			}
1590
1591			for _, c := range ar {
1592				contents = string(c)
1593			}
1594		} else {
1595			var inBytes []byte
1596			var err error
1597
1598			if len(cppCommand) > 0 {
1599				inBytes, err = preprocess(cppCommand, input.path)
1600			} else {
1601				inBytes, err = os.ReadFile(input.path)
1602			}
1603			if err != nil {
1604				return err
1605			}
1606
1607			contents = string(inBytes)
1608		}
1609
1610		asm := Asm{Buffer: contents, Pretty: true}
1611		asm.Init()
1612		if err := asm.Parse(); err != nil {
1613			return fmt.Errorf("error while parsing %q: %s", input.path, err)
1614		}
1615		ast := asm.AST()
1616
1617		inputs[i].contents = contents
1618		inputs[i].ast = ast
1619	}
1620
1621	return nil
1622}
1623
1624// includePathFromHeaderFilePath returns an include directory path based on the
1625// path of a specific header file. It walks up the path and assumes that the
1626// include files are rooted in a directory called "openssl".
1627func includePathFromHeaderFilePath(path string) (string, error) {
1628	dir := path
1629	for {
1630		var file string
1631		dir, file = filepath.Split(dir)
1632
1633		if file == "openssl" {
1634			return dir, nil
1635		}
1636
1637		if len(dir) == 0 {
1638			break
1639		}
1640		dir = dir[:len(dir)-1]
1641	}
1642
1643	return "", fmt.Errorf("failed to find 'openssl' path element in header file path %q", path)
1644}
1645
1646func main() {
1647	// The .a file, if given, is expected to be an archive of textual
1648	// assembly sources. That's odd, but CMake really wants to create
1649	// archive files so it's the only way that we can make it work.
1650	arInput := flag.String("a", "", "Path to a .a file containing assembly sources")
1651	outFile := flag.String("o", "", "Path to output assembly")
1652	ccPath := flag.String("cc", "", "Path to the C compiler for preprocessing inputs")
1653	ccFlags := flag.String("cc-flags", "", "Flags for the C compiler when preprocessing")
1654
1655	flag.Parse()
1656
1657	if len(*outFile) == 0 {
1658		fmt.Fprintf(os.Stderr, "Must give argument to -o.\n")
1659		os.Exit(1)
1660	}
1661
1662	var inputs []inputFile
1663	if len(*arInput) > 0 {
1664		inputs = append(inputs, inputFile{
1665			path:      *arInput,
1666			index:     0,
1667			isArchive: true,
1668		})
1669	}
1670
1671	includePaths := make(map[string]struct{})
1672
1673	for i, path := range flag.Args() {
1674		if len(path) == 0 {
1675			continue
1676		}
1677
1678		// Header files are not processed but their path is remembered
1679		// and passed as -I arguments when invoking the preprocessor.
1680		if strings.HasSuffix(path, ".h") {
1681			dir, err := includePathFromHeaderFilePath(path)
1682			if err != nil {
1683				fmt.Fprintf(os.Stderr, "%s\n", err)
1684				os.Exit(1)
1685			}
1686			includePaths[dir] = struct{}{}
1687			continue
1688		}
1689
1690		inputs = append(inputs, inputFile{
1691			path:  path,
1692			index: i + 1,
1693		})
1694	}
1695
1696	var cppCommand []string
1697	if len(*ccPath) > 0 {
1698		cppCommand = append(cppCommand, *ccPath)
1699		cppCommand = append(cppCommand, strings.Fields(*ccFlags)...)
1700		// Some of ccFlags might be superfluous when running the
1701		// preprocessor, but we don't want the compiler complaining that
1702		// "argument unused during compilation".
1703		cppCommand = append(cppCommand, "-Wno-unused-command-line-argument")
1704
1705		for includePath := range includePaths {
1706			cppCommand = append(cppCommand, "-I"+includePath)
1707		}
1708
1709		// -E requests only preprocessing.
1710		cppCommand = append(cppCommand, "-E")
1711	}
1712
1713	if err := parseInputs(inputs, cppCommand); err != nil {
1714		fmt.Fprintf(os.Stderr, "%s\n", err)
1715		os.Exit(1)
1716	}
1717
1718	out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
1719	if err != nil {
1720		panic(err)
1721	}
1722	defer out.Close()
1723
1724	if err := transform(out, inputs); err != nil {
1725		fmt.Fprintf(os.Stderr, "%s\n", err)
1726		os.Exit(1)
1727	}
1728}
1729
1730func forEachPath(node *node32, cb func(*node32), rules ...pegRule) {
1731	if node == nil {
1732		return
1733	}
1734
1735	if len(rules) == 0 {
1736		cb(node)
1737		return
1738	}
1739
1740	rule := rules[0]
1741	childRules := rules[1:]
1742
1743	for ; node != nil; node = node.next {
1744		if node.pegRule != rule {
1745			continue
1746		}
1747
1748		if len(childRules) == 0 {
1749			cb(node)
1750		} else {
1751			forEachPath(node.up, cb, childRules...)
1752		}
1753	}
1754}
1755
1756func skipNodes(node *node32, ruleToSkip pegRule) *node32 {
1757	for ; node != nil && node.pegRule == ruleToSkip; node = node.next {
1758	}
1759	return node
1760}
1761
1762func skipWS(node *node32) *node32 {
1763	return skipNodes(node, ruleWS)
1764}
1765
1766func assertNodeType(node *node32, expected pegRule) {
1767	if rule := node.pegRule; rule != expected {
1768		panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected]))
1769	}
1770}
1771
1772type wrapperFunc func(func())
1773
1774type wrapperStack []wrapperFunc
1775
1776func (w *wrapperStack) do(baseCase func()) {
1777	if len(*w) == 0 {
1778		baseCase()
1779		return
1780	}
1781
1782	wrapper := (*w)[0]
1783	*w = (*w)[1:]
1784	wrapper(func() { w.do(baseCase) })
1785}
1786
1787// localTargetName returns the name of the local target label for a global
1788// symbol named name.
1789func localTargetName(name string) string {
1790	return ".L" + name + "_local_target"
1791}
1792
1793func isSynthesized(symbol string) bool {
1794	return strings.HasSuffix(symbol, "_bss_get") ||
1795		symbol == "OPENSSL_ia32cap_get" ||
1796		strings.HasPrefix(symbol, "BORINGSSL_bcm_text_")
1797}
1798
1799func redirectorName(symbol string) string {
1800	return "bcm_redirector_" + symbol
1801}
1802
1803// sectionType returns the type of a section. I.e. a section called “.text.foo”
1804// is a “.text” section.
1805func sectionType(section string) (string, bool) {
1806	if len(section) == 0 || section[0] != '.' {
1807		return "", false
1808	}
1809
1810	i := strings.Index(section[1:], ".")
1811	if i != -1 {
1812		section = section[:i+1]
1813	}
1814
1815	if strings.HasPrefix(section, ".debug_") {
1816		return ".debug", true
1817	}
1818
1819	return section, true
1820}
1821
1822// accessorName returns the name of the accessor function for a BSS symbol
1823// named name.
1824func accessorName(name string) string {
1825	return name + "_bss_get"
1826}
1827
1828func (d *delocation) mapLocalSymbol(symbol string) string {
1829	if d.currentInput.index == 0 {
1830		return symbol
1831	}
1832	return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index)
1833}
1834
1835func detectProcessor(input inputFile) processorType {
1836	for statement := input.ast.up; statement != nil; statement = statement.next {
1837		node := skipNodes(statement.up, ruleWS)
1838		if node == nil || node.pegRule != ruleInstruction {
1839			continue
1840		}
1841
1842		instruction := node.up
1843		instructionName := input.contents[instruction.begin:instruction.end]
1844
1845		switch instructionName {
1846		case "movq", "call", "leaq":
1847			return x86_64
1848		case "str", "bl", "ldr", "st1":
1849			return aarch64
1850		}
1851	}
1852
1853	panic("processed entire input and didn't recognise any instructions.")
1854}
1855
1856func sortedSet(m map[string]struct{}) []string {
1857	ret := make([]string, 0, len(m))
1858	for key := range m {
1859		ret = append(ret, key)
1860	}
1861	sort.Strings(ret)
1862	return ret
1863}
1864