#!/bin/python # ============================================================ # elf_symbols - dwarf symbols browser # Copyright (C) 2023-2025 Juraj Oravec # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # ============================================================ import os import sys import argparse import math from dwex import formats from elftools.dwarf.locationlists import LocationParser, LocationExpr from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp, DW_OP_opcode2name from dwex.dwarfone import DWARFExprParserV1 from pprint import pprint SCRIPT_VERSION = '0.3.0' configuration = { 'include_file_name': False, 'print_debug_info': False, 'all_members': False, 'no_array_expand': False, 'array_element_limit': 0xffffffffffffffff, 'address_normalization': True, 'display_mangled_names': True, } supported_types = [ 'DW_TAG_base_type', 'DW_TAG_structure_type', 'DW_TAG_array_type', 'DW_TAG_union_type', 'DW_TAG_enumeration_type', 'DW_TAG_pointer_type', ] def eprint(*args, **kwargs): if configuration['print_debug_info']: print(*args, file=sys.stderr, **kwargs) def epprint(*args, **kwargs): if configuration['print_debug_info']: pprint(*args, stream=sys.stderr, **kwargs) class DWARFParseError(Exception): """ "Opened, could not parse" """ def __init__(self, exc, di): Exception.__init__(self, "DWARF parsing error: " + format(exc)) self.dwarfinfo = di # Some additional data for every DIE def decorate_die(die, i): die._i = i die._children = None return die def load_children(parent_die): # Load and cache child DIEs in the parent DIE, if necessary # Assumes the check if the DIE has children has been already performed if not hasattr(parent_die, "_children") or parent_die._children is None: # TODO: wait cursor here. It may cause disk I/O try: parent_die._children = [decorate_die(die, i) for (i, die) in enumerate(parent_die.iter_children())] except KeyError as ke: # Catching #1516 from original project "DWARF Explorer" https://github.com/sevaa/dwex/ print("This executable file is corrupt or incompatible.") parent_die._children = [] def safe_DIE_name(die, default=''): name = '' if 'DW_AT_name' in die.attributes: name = die.attributes['DW_AT_name'].value.decode('utf-8', errors='ignore') elif configuration['print_debug_info']: name = die.tag else: name = default if configuration['display_mangled_names'] and 'DW_AT_linkage_name' in die.attributes: name = '{short_name} - {mangled_name}'.format( short_name=name, mangled_name=die.attributes['DW_AT_linkage_name'].value.decode('utf-8', errors='ignore') ) return name class Bear(): def __init__(self, filename): di = formats.read_dwarf(filename, self.resolve_arch) if not di: # Covers both False and None print("Something went wrong") exit(1) # Some degree of graceful handling of wrong format try: # Some cached top level stuff # Notably, iter_CUs doesn't cache di._ranges = None # Loaded on first use def decorate_cu(cu, i): cu._i = i cu._lineprogram = None cu._exprparser = None return cu # We'll need them first thing, might as well load here di._unsorted_CUs = [decorate_cu(cu, i) for (i, cu) in enumerate(di.iter_CUs())] if not len(di._unsorted_CUs): return None # Weird, but saw it once - debug sections present, but no CUs # For quick CU search by offset within the info section, regardless of sorting di._CU_offsets = [cu.cu_offset for cu in di._unsorted_CUs] di._CUs = list(di._unsorted_CUs) di._locparser = None # Created on first use self.dwarfinfo = di self.filename = filename except AssertionError as ass: # Covers exeptions during parsing raise DWARFParseError(ass, di) # A list containing variables in a disctionary # Description of used fields: # name: variable name # type: test description of the type # size_byte: size of the variable in bytes # size_bit: size of the variable in bits # bit_offset: Variable offset in the given byte / from variable default address # address: absolute address of the variable # children: a dictionary of child elements self.myVariables = list() self.specifications = dict() self.valid_addresses = dict() self.flat_list = [] self.top_dies = [decorate_die(CU.get_top_DIE(), i) for (i, CU) in enumerate(di._CUs)] self.load_specifications() for top_die in self.top_dies: # top dies only contain Compile Units CU_name = safe_DIE_name(top_die, '?') # Preload children load_children(top_die) children_dies = list() for child_die in top_die._children: if child_die.tag == 'DW_TAG_variable' and 'DW_AT_specification' not in child_die.attributes: entry = { # Name should be on every element, if not set something so it can be printed 'name': safe_DIE_name(child_die, '?'), 'CU_name': CU_name } self.resolve_address(child_die, entry, entry) if entry['name'] != '?' and entry['address']: self.valid_addresses[entry['name']] = entry['address'] if 'DW_AT_type' in child_die.attributes: typ_die = child_die.get_DIE_from_attribute('DW_AT_type') self.truly_resolve_type(entry, typ_die) children_dies.append(entry) elif child_die.tag == 'DW_TAG_subprogram': children = self.load_subprogram(child_die, CU_name) children_dies = children_dies + children self.myVariables.append({ 'name': CU_name, 'children': children_dies }) def load_subprogram(self, die, CU_name): # Preload children load_children(die) children_dies = list() for child_die in die._children: if child_die.tag == 'DW_TAG_variable' and 'DW_AT_specification' not in child_die.attributes: entry = { # Name should be on every element, if not set something so it can be printed 'name': safe_DIE_name(child_die, '?'), 'CU_name': CU_name } self.resolve_address(child_die, entry, entry) # Check for static variable if entry['address'] < 0: continue if entry['name'] != '?' and entry['address']: self.valid_addresses[entry['name']] = entry['address'] if 'DW_AT_type' in child_die.attributes: typ_die = child_die.get_DIE_from_attribute('DW_AT_type') self.truly_resolve_type(entry, typ_die) children_dies.append(entry) elif child_die.tag == 'DW_TAG_lexical_block': children = self.load_subprogram(child_die, CU_name) children_dies = children_dies + children return children_dies def load_specifications(self): for top_die in self.top_dies: CU_name = safe_DIE_name(top_die, '?') # preload children load_children(top_die) for child_die in top_die._children: if child_die.tag == 'DW_TAG_variable': if 'DW_AT_specification' in child_die.attributes: entry = { 'name': safe_DIE_name(child_die, '?') } self.resolve_address(child_die, entry, entry) specs_for_die = child_die.get_DIE_from_attribute('DW_AT_specification') name = safe_DIE_name(specs_for_die, '?') self.specifications[name] = { 'address': entry['address'], 'CU_name': CU_name } def resolve_address(self, die, entry_source, entry_dest): l_result = False at_name = "" base_address = 0 address_spec = 0 if 'DW_AT_member_location' in die.attributes: at_name = 'DW_AT_member_location' elif 'DW_AT_data_member_location' in die.attributes: at_name = 'DW_AT_data_member_location' elif 'DW_AT_location' in die.attributes: at_name = 'DW_AT_location' if entry_dest['name'] in self.specifications: if self.specifications[entry_dest['name']]['CU_name'] == entry_dest['CU_name']: address_spec = self.specifications[entry_dest['name']]['address'] elif entry_source['name'] in self.specifications: if self.specifications[entry_source['name']]['CU_name'] == entry_source['CU_name']: address_spec = self.specifications[entry_source['name']]['address'] if address_spec: base_address = address_spec l_result = True if 'address' in entry_source and not base_address: base_address = entry_source['address'] entry_dest['address'] = base_address if at_name: if LocationParser.attribute_has_location(die.attributes[at_name], die.cu['version']): ll = self.parse_location(die, die.attributes[at_name]) lloc = self.dump_expr(die, ll.loc_expr) entry_dest['address'] = base_address + lloc[0].args[0] l_result = True elif LocationParser._attribute_is_constant(die.attributes[at_name], die.cu['version']): entry_dest['address'] = base_address + die.attributes[at_name].value l_result = True else: eprint("Unsupported location information") eprint(at_name, die.cu['version']) return l_result def resolve_bit_size(self, die, entry): if 'DW_AT_bit_size' in die.attributes: entry['size_bit'] = die.attributes['DW_AT_bit_size'].value if 'DW_AT_data_bit_offset' in die.attributes: entry['bit_offset'] = die.attributes['DW_AT_data_bit_offset'].value elif 'DW_AT_bit_offset' in die.attributes: entry['bit_offset'] = die.attributes['DW_AT_bit_offset'].value if 'bit_offset' in entry: if entry['bit_offset'] & 0x100: entry['bit_offset'] = (entry['bit_offset'] & 0xFF) else: bit_size = (math.ceil((entry['bit_offset'] + 1) / 8) * 8) - 1 bit_size_offset = (math.ceil(entry['size_bit'] / 8) * 8) - 1 bit_size = bit_size if bit_size >= bit_size_offset else bit_size_offset entry['bit_offset'] = bit_size - entry['bit_offset'] - (entry['size_bit'] - 1) byte_offset = math.floor(entry['bit_offset'] / 8) entry['address'] = entry['address'] + byte_offset entry['bit_offset'] = entry['bit_offset'] - byte_offset * 8 def truly_resolve_type(self, entry, die_type): if die_type.tag == 'DW_TAG_volatile_type': die_type = die_type.get_DIE_from_attribute('DW_AT_type') entry['type'] = safe_DIE_name(die_type, '?') self.resolve_address(die_type, entry, entry) if 'DW_AT_type' in die_type.attributes and die_type.tag not in supported_types: # Check if the type is a redefinition of a base type die_type_test = die_type while 'DW_AT_type' in die_type_test.attributes: die_type_test = die_type_test.get_DIE_from_attribute('DW_AT_type') if die_type_test.tag in supported_types: die_type = die_type_test break if 'DW_AT_byte_size' in die_type.attributes: entry['size_byte'] = die_type.attributes['DW_AT_byte_size'].value if die_type.tag == 'DW_TAG_base_type': real_type_name = safe_DIE_name(die_type, '?') if real_type_name != '?' and real_type_name != entry['type']: entry['type'] = '{name} ({real})'.format(name=entry['type'], real=safe_DIE_name(die_type, '?')) elif die_type.tag == 'DW_TAG_structure_type': load_children(die_type) child_dies = [] last_member_address = entry['address'] last_member_size_byte = 0 for child_die in die_type._children: if child_die.tag != 'DW_TAG_member' and not configuration['all_members']: continue if 'DW_AT_type' in child_die.attributes: typ_die = child_die.get_DIE_from_attribute('DW_AT_type') elif child_die.tag in supported_types: typ_die = child_die else: eprint('Child DIE with no type information') epprint(child_die) continue child_entry = dict() child_entry['name'] = safe_DIE_name(child_die, '?') self.resolve_address(child_die, entry, child_entry) self.resolve_bit_size(child_die, child_entry) self.truly_resolve_type(child_entry, typ_die) if child_entry['address'] != entry['address']: last_member_address = child_entry['address'] if 'size_byte' in child_entry: last_member_size_byte = child_entry['size_byte'] else: child_entry['address'] = last_member_address + last_member_size_byte child_dies.append(child_entry) entry['children'] = child_dies elif die_type.tag == 'DW_TAG_array_type': self.truly_resolve_type(entry, die_type.get_DIE_from_attribute('DW_AT_type')) load_children(die_type) entry['number_of_elements'] = 0 if 'DW_AT_upper_bound' in die_type._children[0].attributes: upper_bound = die_type._children[0].attributes['DW_AT_upper_bound'].value if upper_bound < configuration['array_element_limit']: entry['number_of_elements'] = upper_bound + 1 elif die_type.tag == 'DW_TAG_union_type': load_children(die_type) child_entries = [] for child_die in die_type._children: if child_die.tag != 'DW_TAG_member' and not configuration['all_members']: continue if 'DW_AT_type' in child_die.attributes: typ_die = child_die.get_DIE_from_attribute('DW_AT_type') elif child_die.tag in supported_types: typ_die = child_die else: eprint('Child DIE with no type information') epprint(child_die) continue child_entry = dict() child_entry['name'] = safe_DIE_name(child_die, '?') if 'address' in entry: child_entry['address'] = entry['address'] self.resolve_bit_size(child_die, child_entry) self.truly_resolve_type(child_entry, typ_die) child_entries.append(child_entry) entry['children'] = child_entries elif die_type.tag == 'DW_TAG_enumeration_type': entry['type'] = safe_DIE_name(die_type, 'ENUM') elif die_type.tag == 'DW_TAG_pointer_type': entry['type'] = safe_DIE_name(die_type, 'POINTER') else: eprint("Unsupported type:", die_type.tag) def normalize_by_address(self): validVariables = list() for CU in self.myVariables: child_entries = list() for child in CU['children']: if (child['name'] != '?') and (child['name'] in self.valid_addresses): if child['address'] == self.valid_addresses[child['name']]: child_entries.append(child) validVariables.append({ 'name': CU['name'], 'children': child_entries }) self.myVariables = validVariables def flatten_type(self, parent=None): for CU in self.myVariables: for child in CU['children']: if configuration["include_file_name"]: self.pettanko(child, CU['name'], ":") else: self.pettanko(child) def pettanko(self, entry : dict, a_parent_name : str = '', separator : str = '.', address_offset : int = 0): flat_entry = dict() if a_parent_name: flat_entry['name'] = '{parent}{separator}{child}'.format(parent=a_parent_name, separator=separator, child=entry['name']) else: flat_entry['name'] = entry['name'] if 'address' in entry: flat_entry['address'] = entry['address'] + address_offset if 'size_byte' in entry: flat_entry['size_byte'] = entry['size_byte'] if 'number_of_elements' in entry: flat_entry['size_byte'] = flat_entry['size_byte'] * entry['number_of_elements'] if 'size_bit' in entry: flat_entry['size_bit'] = entry['size_bit'] if 'bit_offset' in entry: flat_entry['bit_offset'] = entry['bit_offset'] if 'type' in entry: flat_entry['type'] = entry['type'] self.flat_list.append(flat_entry) if 'children' in entry and 'number_of_elements' not in entry: for kid in entry['children']: self.pettanko(entry=kid, a_parent_name=flat_entry['name'], separator=separator, address_offset=address_offset) if 'number_of_elements' in entry: step = 1 if configuration['no_array_expand']: if entry['number_of_elements'] > 1: step = (entry['number_of_elements'] - 1) else: step = entry['number_of_elements'] if step == 0 or entry['number_of_elements'] == 0: return for index in range(0, entry['number_of_elements'], step): kid = flat_entry.copy() kid['name'] = '{name}[{index}]'.format(name=flat_entry['name'], index=index) if 'size_byte' in entry: kid['size_byte'] = entry['size_byte'] size_offset = address_offset if ('address' in flat_entry) and ('size_byte' in entry): size_offset = size_offset + (entry['size_byte'] * index) kid['address'] = flat_entry['address'] + size_offset self.pettanko(entry=kid, separator=separator, address_offset=0) kid_parent = self.flat_list[-1] if 'children' in entry: for kid_of_kid in entry['children']: self.pettanko(entry=kid_of_kid, a_parent_name=kid_parent['name'], separator=separator, address_offset=size_offset) def pretty_print(self): for entry in self.flat_list: address = 0 if 'address' in entry: address = entry['address'] if not address and entry['name'] == '?': continue size_byte = 1 if 'size_byte' in entry: size_byte = entry['size_byte'] if 'size_bit' in entry and 'bit_offset' in entry: bit_mask = 0 for n in range(entry['size_bit']): bit_mask = bit_mask | (1 << n) if entry['bit_offset'] >= 0: bit_mask = bit_mask << entry['bit_offset'] else: eprint("Illegal offset {offset} for variable {variable}").format( offset=entry['bit_offset'], variable=entry['name'] ) address = address + int(((entry['bit_offset'] + 1) / 8) - 1) print('{address}\t&{bit_mask}\t{variable_name}'.format( address=hex(address), variable_name=entry['name'], bit_mask=hex(bit_mask) )) else: print('{address}\t{size_byte}\t{variable_name}'.format( address=hex(address), variable_name=entry['name'], size_byte=size_byte )) def parse_location(self, die, attr): di = die.dwarfinfo if di._locparser is None: di._locparser = LocationParser(di.location_lists()) return di._locparser.parse_from_attribute(attr, die.cu['version'], die=die) # Expr is an expression blob # Returns a list of strings for ops # Format: op arg, arg... def dump_expr(self, die, expr): if die.cu._exprparser is None: if die.cu['version'] > 1: die.cu._exprparser = DWARFExprParser(die.cu.structs) else: die.cu._exprparser = DWARFExprParserV1(die.cu.structs) # Challenge: for nested expressions, args is a list with a list of commands # For those, the format is: op {op arg, arg; op arg, arg} # Can't just check for iterable, str is iterable too return die.cu._exprparser.parse_expr(expr) def resolve_arch(self, arches): print("resolve_arch: Unsupported feature") return None def main(): from dwex.patch import monkeypatch monkeypatch() parser = argparse.ArgumentParser( description='Expands symbols (global variables) types.', epilog='Data displayed by this script are informative only!' ) parser.add_argument('elf_file', metavar='file', type=str, nargs='?', help='ELF file to try to extract symbols') parser.add_argument('-f', '--include-file-name', dest='include_file_name', action='store_true', help='Display filename at the beginning') parser.add_argument('-d', '--print-debug-info', dest='print_debug_info', action='store_true', help='Print debug infor when parsing DWARF') parser.add_argument('-a', '--all-members', dest='all_members', action='store_true', help='Print information of all children') parser.add_argument('-e', '--no-array-expand', dest='no_array_expand', action='store_true', help='Print only first and last array elements') parser.add_argument('--array-element-limit', dest='array_element_limit', action='store', default=hex(configuration['array_element_limit']), help='Maximum hexadecimal number of valid elements in an array') parser.add_argument('-n', '--disable-address-normalization', dest='address_normalization', action='store_false', help='Disable variable normalization by address, hides duplicate entries.') parser.add_argument('--no-mangled-names', dest='display_mangled_names', action='store_false', help='Hide mangled names. By default mangled names are shown after the normal names') parser.add_argument('--version', action='version', version='%(prog)s {version}'.format(version=SCRIPT_VERSION)) args = parser.parse_args() configuration['include_file_name'] = args.include_file_name configuration['print_debug_info'] = args.print_debug_info configuration['all_members'] = args.all_members configuration['no_array_expand'] = args.no_array_expand configuration['array_element_limit'] = int(args.array_element_limit, 16) configuration['address_normalization'] = args.address_normalization configuration['display_mangled_names'] = args.display_mangled_names if not args.elf_file: parser.print_help() exit(1) if not os.path.exists(args.elf_file): eprint('File {elf_file} does not exist!'.format(elf_file=args.elf_file)) exit(1) bear = Bear(args.elf_file) if configuration['address_normalization']: bear.normalize_by_address() bear.flatten_type() bear.pretty_print() if __name__ == "__main__": main()