#!/bin/python import os import sys import argparse from dwex import formats from elftools.dwarf.locationlists import LocationParser, LocationExpr from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp, DW_OP_opcode2name from dwex.dwarfone import DWARFExprParserV1 from pprint import pprint SCRIPT_VERSION = '0.2.0' configuration = { 'include_file_name': False, 'print_debug_info': False, 'all_members': False, 'no_array_expand': False, } supported_types = [ 'DW_TAG_base_type', 'DW_TAG_structure_type', 'DW_TAG_array_type', 'DW_TAG_union_type', 'DW_TAG_enumeration_type', 'DW_TAG_pointer_type', ] def eprint(*args, **kwargs): if configuration['print_debug_info']: print(*args, file=sys.stderr, **kwargs) def epprint(*args, **kwargs): if configuration['print_debug_info']: pprint(*args, stream=sys.stderr, **kwargs) class DWARFParseError(Exception): """ "Opened, could not parse" """ def __init__(self, exc, di): Exception.__init__(self, "DWARF parsing error: " + format(exc)) self.dwarfinfo = di # Some additional data for every DIE def decorate_die(die, i): die._i = i die._children = None return die def load_children(parent_die): # Load and cache child DIEs in the parent DIE, if necessary # Assumes the check if the DIE has children has been already performed if not hasattr(parent_die, "_children") or parent_die._children is None: # TODO: wait cursor here. It may cause disk I/O try: parent_die._children = [decorate_die(die, i) for (i, die) in enumerate(parent_die.iter_children())] except KeyError as ke: # Catching #1516 # QMessageBox(QMessageBox.Icon.Warning, "DWARF Explorer", # "This executable file is corrupt or incompatible with the current version of DWARF Explorer. Please consider creating a new issue at https://github.com/sevaa/dwex/, and share this file with the tech support.", # QMessageBox.StandardButton.Ok, QApplication.instance().win).show() print("This executable file is corrupt or incompatible.") parent_die._children = [] def safe_DIE_name(die, default=''): name = '' if 'DW_AT_name' in die.attributes: name = die.attributes['DW_AT_name'].value.decode('utf-8', errors='ignore') elif configuration['print_debug_info']: name = die.tag else: name = default return name class Bear(): def __init__(self, filename): di = formats.read_dwarf(filename, self.resolve_arch) if not di: # Covers both False and None print("Something went wrong") exit(1) # Some degree of graceful handling of wrong format try: # Some cached top level stuff # Notably, iter_CUs doesn't cache di._ranges = None # Loaded on first use def decorate_cu(cu, i): cu._i = i cu._lineprogram = None cu._exprparser = None return cu # We'll need them first thing, might as well load here di._unsorted_CUs = [decorate_cu(cu, i) for (i, cu) in enumerate(di.iter_CUs())] if not len(di._unsorted_CUs): return None # Weird, but saw it once - debug sections present, but no CUs # For quick CU search by offset within the info section, regardless of sorting di._CU_offsets = [cu.cu_offset for cu in di._unsorted_CUs] di._CUs = list(di._unsorted_CUs) di._locparser = None # Created on first use self.dwarfinfo = di self.filename = filename except AssertionError as ass: # Covers exeptions during parsing raise DWARFParseError(ass, di) # A list containing variables in a disctionary # Description of used fields: # name: variable name # type: test description of the type # size: size of the variable # address: absolute address of the variable # children: a dictionary of child elements self.myVariables = [] self.flat_list = [] self.top_dies = [decorate_die(CU.get_top_DIE(), i) for (i, CU) in enumerate(di._CUs)] for top_die in self.top_dies: # top dies only contain Compile Units # Preload children load_children(top_die) children_dies = list() for child_die in top_die._children: if child_die.tag == 'DW_TAG_variable': entry = { # Name should be on every element, if not set something so it can be printed 'name': safe_DIE_name(child_die, '?') } if 'DW_AT_location' in child_die.attributes: if LocationParser.attribute_has_location(child_die.attributes['DW_AT_location'], child_die.cu['version']): ll = self.parse_location(child_die, child_die.attributes['DW_AT_location']) lloc = self.dump_expr(child_die, ll.loc_expr) # print(hex(lloc[0].args[0])) entry['address'] = hex(lloc[0].args[0]) # if isinstance(ll, LocationExpr): # return '; '.join(self.dump_expr(child_die, ll.loc_expr)) # else: # return "Loc list: 0x%x" % child_die.attributes['DW_AT_location'].value if 'DW_AT_type' in child_die.attributes: typ_die = child_die.get_DIE_from_attribute('DW_AT_type') self.truly_resolve_type(entry, typ_die) children_dies.append(entry) self.myVariables.append({ 'name': safe_DIE_name(top_die, '?'), 'children': children_dies }) def truly_resolve_type(self, entry, die_type): if die_type.tag == 'DW_TAG_volatile_type': die_type = die_type.get_DIE_from_attribute('DW_AT_type') entry['type'] = safe_DIE_name(die_type, '?') if ('DW_AT_member_location' in die_type.attributes) or ('DW_AT_data_member_location' in die_type.attributes): at_member_location_name = 'DW_AT_member_location' if 'DW_AT_member_location' in die_type.attributes else 'DW_AT_data_member_location' if LocationParser.attribute_has_location(die_type.attributes[at_member_location_name], die_type.cu['version']): ll = self.parse_location(die_type, die_type.attributes[at_member_location_name]) lloc = self.dump_expr(die_type, ll.loc_expr) entry['address'] = hex(int(entry['address'], 16) + lloc[0].args[0]) if 'DW_AT_type' in die_type.attributes and die_type.tag not in supported_types: # Check if the type is a redefinition of a base type die_type_test = die_type while 'DW_AT_type' in die_type_test.attributes: die_type_test = die_type_test.get_DIE_from_attribute('DW_AT_type') if die_type_test.tag in supported_types: die_type = die_type_test break if 'DW_AT_byte_size' in die_type.attributes: entry['size_byte'] = die_type.attributes['DW_AT_byte_size'].value if die_type.tag == 'DW_TAG_base_type': real_type_name = safe_DIE_name(die_type, '?') if real_type_name != '?' and real_type_name != entry['type']: entry['type'] = '{name} ({real})'.format(name=entry['type'], real=safe_DIE_name(die_type, '?')) elif die_type.tag == 'DW_TAG_structure_type': load_children(die_type) child_dies = [] for child_die in die_type._children: if child_die.tag != 'DW_TAG_member' and not configuration['all_members']: continue if 'DW_AT_type' in child_die.attributes: typ_die = child_die.get_DIE_from_attribute('DW_AT_type') elif child_die.tag in supported_types: typ_die = child_die else: eprint('Child DIE with no type information') epprint(child_die) continue child_entry = dict() child_entry['name'] = safe_DIE_name(child_die, '?') if 'address' in entry: if ('DW_AT_member_location' in child_die.attributes) or ('DW_AT_data_member_location' in child_die.attributes): at_member_location_name = 'DW_AT_member_location' if 'DW_AT_member_location' in child_die.attributes else 'DW_AT_data_member_location' if LocationParser.attribute_has_location(child_die.attributes[at_member_location_name], child_die.cu['version']): ll = self.parse_location(child_die, child_die.attributes[at_member_location_name]) lloc = self.dump_expr(child_die, ll.loc_expr) child_entry['address'] = hex(int(entry['address'], 16) + lloc[0].args[0]) else: child_entry['address'] = entry['address'] self.truly_resolve_type(child_entry, typ_die) child_dies.append(child_entry) entry['children'] = child_dies elif die_type.tag == 'DW_TAG_array_type': self.truly_resolve_type(entry, die_type.get_DIE_from_attribute('DW_AT_type')) load_children(die_type) if 'DW_AT_upper_bound' in die_type._children[0].attributes: entry['number_of_elements'] = die_type._children[0].attributes['DW_AT_upper_bound'].value + 1 elif die_type.tag == 'DW_TAG_union_type': load_children(die_type) child_entries = [] for child_die in die_type._children: if child_die.tag != 'DW_TAG_member' and not configuration['all_members']: continue if 'DW_AT_type' in child_die.attributes: typ_die = child_die.get_DIE_from_attribute('DW_AT_type') elif child_die.tag in supported_types: typ_die = child_die else: eprint('Child DIE with no type information') epprint(child_die) continue child_entry = dict() child_entry['name'] = safe_DIE_name(child_die, '?') if 'address' in entry: child_entry['address'] = entry['address'] self.truly_resolve_type(child_entry, typ_die) child_entries.append(child_entry) entry['children'] = child_entries elif die_type.tag == 'DW_TAG_enumeration_type': entry['type'] = safe_DIE_name(die_type, 'ENUM') elif die_type.tag == 'DW_TAG_pointer_type': entry['type'] = safe_DIE_name(die_type, 'POINTER') else: eprint("Unsupported type:", die_type.tag) def flatten_type(self, parent=None): for CU in self.myVariables: for child in CU['children']: if configuration["include_file_name"]: self.pettanko(child, CU['name'], ":") else: self.pettanko(child) def pettanko(self, entry : dict, a_parent_name : str = '', separator : str = '.'): flat_entry = dict() if a_parent_name: flat_entry['name'] = '{parent}{separator}{child}'.format(parent=a_parent_name, separator=separator, child=entry['name']) else: flat_entry['name'] = entry['name'] if 'address' in entry: flat_entry['address'] = entry['address'] if 'size_byte' in entry: flat_entry['size_byte'] = entry['size_byte'] if 'number_of_elements' in entry: flat_entry['size_byte'] = flat_entry['size_byte'] * entry['number_of_elements'] if 'type' in entry: flat_entry['type'] = entry['type'] self.flat_list.append(flat_entry) if 'children' in entry and 'number_of_elements' not in entry: for kid in entry['children']: self.pettanko(kid, flat_entry['name']) if 'number_of_elements' in entry: step = 1 if configuration['no_array_expand']: step = entry['number_of_elements'] - 1 for index in range(0, entry['number_of_elements'], step): kid = flat_entry.copy() kid['name'] = '{name}[{index}]'.format(name=flat_entry['name'], index=index) if ('address' in flat_entry) and ('size_byte' in entry): kid['address'] = hex(int(flat_entry['address'], 16) + entry['size_byte'] * index) self.pettanko(kid) kid_parent = self.flat_list[-1] if 'children' in entry: for kid_of_kid in entry['children']: self.pettanko(kid_of_kid, kid_parent['name']) def pretty_print(self): for entry in self.flat_list: address = '' if 'address' in entry: address = entry['address'] if not address and entry['name'] == '?': continue print('{address}\t{variable_name}'.format(address=address, variable_name=entry['name'])) def parse_location(self, die, attr): di = die.dwarfinfo if di._locparser is None: di._locparser = LocationParser(di.location_lists()) return di._locparser.parse_from_attribute(attr, die.cu['version'], die=die) # Expr is an expression blob # Returns a list of strings for ops # Format: op arg, arg... def dump_expr(self, die, expr): if die.cu._exprparser is None: die.cu._exprparser = DWARFExprParser(die.cu.structs) if die.cu['version'] > 1 else DWARFExprParserV1(die.cu.structs) # Challenge: for nested expressions, args is a list with a list of commands # For those, the format is: op {op arg, arg; op arg, arg} # Can't just check for iterable, str is iterable too return die.cu._exprparser.parse_expr(expr) def resolve_arch(self, arches): print("resolve_arch: Unsupported feature") return None def main(): from dwex.patch import monkeypatch monkeypatch() parser = argparse.ArgumentParser( description='Expands symbols (global variables) types.', epilog='Data displayed by this script are informative only!' ) parser.add_argument('elf_file', metavar='file', type=str, nargs='?', help='ELF file to try to extract symbols') parser.add_argument('-f', '--include-file-name', dest='include_file_name', action='store_true', help='Display filename at the beginning') parser.add_argument('-d', '--print-debug-info', dest='print_debug_info', action='store_true', help='Print debug infor when parsing DWARF') parser.add_argument('-a', '--all-members', dest='all_members', action='store_true', help='Print information of all children') parser.add_argument('-e', '--no-array-expand', dest='no_array_expand', action='store_true', help='Print only first and last array elements') parser.add_argument('--version', action='version', version='%(prog)s {version}'.format(version=SCRIPT_VERSION)) args = parser.parse_args() configuration['include_file_name'] = args.include_file_name configuration['print_debug_info'] = args.print_debug_info configuration['all_members'] = args.all_members configuration['no_array_expand'] = args.no_array_expand if not args.elf_file: parser.print_help() exit(1) if not os.path.exists(args.elf_file): eprint('File {elf_file} does not exist!'.format(elf_file=args.elf_file)) exit(1) bear = Bear(args.elf_file) bear.flatten_type() bear.pretty_print() if __name__ == "__main__": main()