1#!/usr/bin/env python3 2 3""" 4strip_asm.py - Cleanup ASM output for the specified file 5""" 6 7import os 8import re 9import sys 10from argparse import ArgumentParser 11 12 13def find_used_labels(asm): 14 found = set() 15 label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") 16 for line in asm.splitlines(): 17 m = label_re.match(line) 18 if m: 19 found.add(".L%s" % m.group(1)) 20 return found 21 22 23def normalize_labels(asm): 24 decls = set() 25 label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") 26 for line in asm.splitlines(): 27 m = label_decl.match(line) 28 if m: 29 decls.add(m.group(0)) 30 if len(decls) == 0: 31 return asm 32 needs_dot = next(iter(decls))[0] != "." 33 if not needs_dot: 34 return asm 35 for ld in decls: 36 asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm) 37 return asm 38 39 40def transform_labels(asm): 41 asm = normalize_labels(asm) 42 used_decls = find_used_labels(asm) 43 new_asm = "" 44 label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") 45 for line in asm.splitlines(): 46 m = label_decl.match(line) 47 if not m or m.group(0) in used_decls: 48 new_asm += line 49 new_asm += "\n" 50 return new_asm 51 52 53def is_identifier(tk): 54 if len(tk) == 0: 55 return False 56 first = tk[0] 57 if not first.isalpha() and first != "_": 58 return False 59 for i in range(1, len(tk)): 60 c = tk[i] 61 if not c.isalnum() and c != "_": 62 return False 63 return True 64 65 66def process_identifiers(line): 67 """ 68 process_identifiers - process all identifiers and modify them to have 69 consistent names across all platforms; specifically across ELF and MachO. 70 For example, MachO inserts an additional understore at the beginning of 71 names. This function removes that. 72 """ 73 parts = re.split(r"([a-zA-Z0-9_]+)", line) 74 new_line = "" 75 for tk in parts: 76 if is_identifier(tk): 77 if tk.startswith("__Z"): 78 tk = tk[1:] 79 elif ( 80 tk.startswith("_") 81 and len(tk) > 1 82 and tk[1].isalpha() 83 and tk[1] != "Z" 84 ): 85 tk = tk[1:] 86 new_line += tk 87 return new_line 88 89 90def process_asm(asm): 91 """ 92 Strip the ASM of unwanted directives and lines 93 """ 94 new_contents = "" 95 asm = transform_labels(asm) 96 97 # TODO: Add more things we want to remove 98 discard_regexes = [ 99 re.compile(r"\s+\..*$"), # directive 100 re.compile(r"\s*#(NO_APP|APP)$"), # inline ASM 101 re.compile(r"\s*#.*$"), # comment line 102 re.compile( 103 r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)" 104 ), # global directive 105 re.compile( 106 r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)" 107 ), 108 ] 109 keep_regexes: list[re.Pattern] = [] 110 fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") 111 for line in asm.splitlines(): 112 # Remove Mach-O attribute 113 line = line.replace("@GOTPCREL", "") 114 add_line = True 115 for reg in discard_regexes: 116 if reg.match(line) is not None: 117 add_line = False 118 break 119 for reg in keep_regexes: 120 if reg.match(line) is not None: 121 add_line = True 122 break 123 if add_line: 124 if fn_label_def.match(line) and len(new_contents) != 0: 125 new_contents += "\n" 126 line = process_identifiers(line) 127 new_contents += line 128 new_contents += "\n" 129 return new_contents 130 131 132def main(): 133 parser = ArgumentParser(description="generate a stripped assembly file") 134 parser.add_argument( 135 "input", 136 metavar="input", 137 type=str, 138 nargs=1, 139 help="An input assembly file", 140 ) 141 parser.add_argument( 142 "out", metavar="output", type=str, nargs=1, help="The output file" 143 ) 144 args, unknown_args = parser.parse_known_args() 145 input = args.input[0] 146 output = args.out[0] 147 if not os.path.isfile(input): 148 print("ERROR: input file '%s' does not exist" % input) 149 sys.exit(1) 150 151 with open(input, "r") as f: 152 contents = f.read() 153 new_contents = process_asm(contents) 154 with open(output, "w") as f: 155 f.write(new_contents) 156 157 158if __name__ == "__main__": 159 main() 160 161# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 162# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; 163# kate: indent-mode python; remove-trailing-spaces modified; 164