diff options
Diffstat (limited to 'tools/strip_asm.py')
-rwxr-xr-x | tools/strip_asm.py | 118 |
1 files changed, 65 insertions, 53 deletions
diff --git a/tools/strip_asm.py b/tools/strip_asm.py index d131dc7..bc3a774 100755 --- a/tools/strip_asm.py +++ b/tools/strip_asm.py @@ -4,48 +4,49 @@ strip_asm.py - Cleanup ASM output for the specified file """ -from argparse import ArgumentParser -import sys import os import re +import sys +from argparse import ArgumentParser + def find_used_labels(asm): found = set() - label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") - for l in asm.splitlines(): - m = label_re.match(l) + label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") + for line in asm.splitlines(): + m = label_re.match(line) if m: - found.add('.L%s' % m.group(1)) + found.add(".L%s" % m.group(1)) return found def normalize_labels(asm): decls = set() label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") - for l in asm.splitlines(): - m = label_decl.match(l) + for line in asm.splitlines(): + m = label_decl.match(line) if m: decls.add(m.group(0)) if len(decls) == 0: return asm - needs_dot = next(iter(decls))[0] != '.' + needs_dot = next(iter(decls))[0] != "." if not needs_dot: return asm for ld in decls: - asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm) + asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm) return asm def transform_labels(asm): asm = normalize_labels(asm) used_decls = find_used_labels(asm) - new_asm = '' - label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") - for l in asm.splitlines(): - m = label_decl.match(l) + new_asm = "" + label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") + for line in asm.splitlines(): + m = label_decl.match(line) if not m or m.group(0) in used_decls: - new_asm += l - new_asm += '\n' + new_asm += line + new_asm += "\n" return new_asm @@ -53,29 +54,34 @@ def is_identifier(tk): if len(tk) == 0: return False first = tk[0] - if not first.isalpha() and first != '_': + if not first.isalpha() and first != "_": return False for i in range(1, len(tk)): c = tk[i] - if not c.isalnum() and c != '_': + if not c.isalnum() and c != "_": return False return True -def process_identifiers(l): + +def process_identifiers(line): """ process_identifiers - process all identifiers and modify them to have consistent names across all platforms; specifically across ELF and MachO. For example, MachO inserts an additional understore at the beginning of names. This function removes that. """ - parts = re.split(r'([a-zA-Z0-9_]+)', l) - new_line = '' + parts = re.split(r"([a-zA-Z0-9_]+)", line) + new_line = "" for tk in parts: if is_identifier(tk): - if tk.startswith('__Z'): + if tk.startswith("__Z"): tk = tk[1:] - elif tk.startswith('_') and len(tk) > 1 and \ - tk[1].isalpha() and tk[1] != 'Z': + elif ( + tk.startswith("_") + and len(tk) > 1 + and tk[1].isalpha() + and tk[1] != "Z" + ): tk = tk[1:] new_line += tk return new_line @@ -85,65 +91,71 @@ def process_asm(asm): """ Strip the ASM of unwanted directives and lines """ - new_contents = '' + new_contents = "" asm = transform_labels(asm) # TODO: Add more things we want to remove discard_regexes = [ - re.compile("\s+\..*$"), # directive - re.compile("\s*#(NO_APP|APP)$"), #inline ASM - re.compile("\s*#.*$"), # comment line - re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive - re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"), - ] - keep_regexes = [ - + re.compile(r"\s+\..*$"), # directive + re.compile(r"\s*#(NO_APP|APP)$"), # inline ASM + re.compile(r"\s*#.*$"), # comment line + re.compile( + r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)" + ), # global directive + re.compile( + r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)" + ), ] + keep_regexes: list[re.Pattern] = [] fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") - for l in asm.splitlines(): + for line in asm.splitlines(): # Remove Mach-O attribute - l = l.replace('@GOTPCREL', '') + line = line.replace("@GOTPCREL", "") add_line = True for reg in discard_regexes: - if reg.match(l) is not None: + if reg.match(line) is not None: add_line = False break for reg in keep_regexes: - if reg.match(l) is not None: + if reg.match(line) is not None: add_line = True break if add_line: - if fn_label_def.match(l) and len(new_contents) != 0: - new_contents += '\n' - l = process_identifiers(l) - new_contents += l - new_contents += '\n' + if fn_label_def.match(line) and len(new_contents) != 0: + new_contents += "\n" + line = process_identifiers(line) + new_contents += line + new_contents += "\n" return new_contents + def main(): - parser = ArgumentParser( - description='generate a stripped assembly file') + parser = ArgumentParser(description="generate a stripped assembly file") parser.add_argument( - 'input', metavar='input', type=str, nargs=1, - help='An input assembly file') + "input", + metavar="input", + type=str, + nargs=1, + help="An input assembly file", + ) parser.add_argument( - 'out', metavar='output', type=str, nargs=1, - help='The output file') + "out", metavar="output", type=str, nargs=1, help="The output file" + ) args, unknown_args = parser.parse_known_args() input = args.input[0] output = args.out[0] if not os.path.isfile(input): - print(("ERROR: input file '%s' does not exist") % input) + print("ERROR: input file '%s' does not exist" % input) sys.exit(1) - contents = None - with open(input, 'r') as f: + + with open(input, "r") as f: contents = f.read() new_contents = process_asm(contents) - with open(output, 'w') as f: + with open(output, "w") as f: f.write(new_contents) -if __name__ == '__main__': +if __name__ == "__main__": main() # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 |