elf_symbols/main.py
Juraj Oravec 72e983b773
Change LICENSE to GPLv3
Signed-off-by: Juraj Oravec <jurajoravec@mailo.com>
2025-05-24 15:25:45 +02:00

631 lines
25 KiB
Python

#!/bin/python
# ============================================================
# elf_symbols - dwarf symbols browser
# Copyright (C) 2023-2025 Juraj Oravec <jurajoravec@mailo.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# ============================================================
import os
import sys
import argparse
import math
from dwex import formats
from elftools.dwarf.locationlists import LocationParser, LocationExpr
from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp, DW_OP_opcode2name
from dwex.dwarfone import DWARFExprParserV1
from pprint import pprint
SCRIPT_VERSION = '0.3.0'
configuration = {
'include_file_name': False,
'print_debug_info': False,
'all_members': False,
'no_array_expand': False,
'array_element_limit': 0xffffffffffffffff,
'address_normalization': True,
'display_mangled_names': True,
}
supported_types = [
'DW_TAG_base_type',
'DW_TAG_structure_type',
'DW_TAG_array_type',
'DW_TAG_union_type',
'DW_TAG_enumeration_type',
'DW_TAG_pointer_type',
]
def eprint(*args, **kwargs):
if configuration['print_debug_info']:
print(*args, file=sys.stderr, **kwargs)
def epprint(*args, **kwargs):
if configuration['print_debug_info']:
pprint(*args, stream=sys.stderr, **kwargs)
class DWARFParseError(Exception):
""" "Opened, could not parse" """
def __init__(self, exc, di):
Exception.__init__(self, "DWARF parsing error: " + format(exc))
self.dwarfinfo = di
# Some additional data for every DIE
def decorate_die(die, i):
die._i = i
die._children = None
return die
def load_children(parent_die):
# Load and cache child DIEs in the parent DIE, if necessary
# Assumes the check if the DIE has children has been already performed
if not hasattr(parent_die, "_children") or parent_die._children is None:
# TODO: wait cursor here. It may cause disk I/O
try:
parent_die._children = [decorate_die(die, i) for (i, die) in enumerate(parent_die.iter_children())]
except KeyError as ke:
# Catching #1516 from original project "DWARF Explorer" https://github.com/sevaa/dwex/
print("This executable file is corrupt or incompatible.")
parent_die._children = []
def safe_DIE_name(die, default=''):
name = ''
if 'DW_AT_name' in die.attributes:
name = die.attributes['DW_AT_name'].value.decode('utf-8', errors='ignore')
elif configuration['print_debug_info']:
name = die.tag
else:
name = default
if configuration['display_mangled_names'] and 'DW_AT_linkage_name' in die.attributes:
name = '{short_name} - {mangled_name}'.format(
short_name=name,
mangled_name=die.attributes['DW_AT_linkage_name'].value.decode('utf-8', errors='ignore')
)
return name
class Bear():
def __init__(self, filename):
di = formats.read_dwarf(filename, self.resolve_arch)
if not di: # Covers both False and None
print("Something went wrong")
exit(1)
# Some degree of graceful handling of wrong format
try:
# Some cached top level stuff
# Notably, iter_CUs doesn't cache
di._ranges = None # Loaded on first use
def decorate_cu(cu, i):
cu._i = i
cu._lineprogram = None
cu._exprparser = None
return cu
# We'll need them first thing, might as well load here
di._unsorted_CUs = [decorate_cu(cu, i) for (i, cu) in enumerate(di.iter_CUs())]
if not len(di._unsorted_CUs):
return None # Weird, but saw it once - debug sections present, but no CUs
# For quick CU search by offset within the info section, regardless of sorting
di._CU_offsets = [cu.cu_offset for cu in di._unsorted_CUs]
di._CUs = list(di._unsorted_CUs)
di._locparser = None # Created on first use
self.dwarfinfo = di
self.filename = filename
except AssertionError as ass: # Covers exeptions during parsing
raise DWARFParseError(ass, di)
# A list containing variables in a disctionary
# Description of used fields:
# name: variable name
# type: test description of the type
# size_byte: size of the variable in bytes
# size_bit: size of the variable in bits
# bit_offset: Variable offset in the given byte / from variable default address
# address: absolute address of the variable
# children: a dictionary of child elements
self.myVariables = list()
self.specifications = dict()
self.valid_addresses = dict()
self.flat_list = []
self.top_dies = [decorate_die(CU.get_top_DIE(), i) for (i, CU) in enumerate(di._CUs)]
self.load_specifications()
for top_die in self.top_dies:
# top dies only contain Compile Units
CU_name = safe_DIE_name(top_die, '?')
# Preload children
load_children(top_die)
children_dies = list()
for child_die in top_die._children:
if child_die.tag == 'DW_TAG_variable' and 'DW_AT_specification' not in child_die.attributes:
entry = {
# Name should be on every element, if not set something so it can be printed
'name': safe_DIE_name(child_die, '?'),
'CU_name': CU_name
}
self.resolve_address(child_die, entry, entry)
if entry['name'] != '?' and entry['address']:
self.valid_addresses[entry['name']] = entry['address']
if 'DW_AT_type' in child_die.attributes:
typ_die = child_die.get_DIE_from_attribute('DW_AT_type')
self.truly_resolve_type(entry, typ_die)
children_dies.append(entry)
elif child_die.tag == 'DW_TAG_subprogram':
children = self.load_subprogram(child_die, CU_name)
children_dies = children_dies + children
self.myVariables.append({
'name': CU_name,
'children': children_dies
})
def load_subprogram(self, die, CU_name):
# Preload children
load_children(die)
children_dies = list()
for child_die in die._children:
if child_die.tag == 'DW_TAG_variable' and 'DW_AT_specification' not in child_die.attributes:
entry = {
# Name should be on every element, if not set something so it can be printed
'name': safe_DIE_name(child_die, '?'),
'CU_name': CU_name
}
self.resolve_address(child_die, entry, entry)
# Check for static variable
if entry['address'] < 0:
continue
if entry['name'] != '?' and entry['address']:
self.valid_addresses[entry['name']] = entry['address']
if 'DW_AT_type' in child_die.attributes:
typ_die = child_die.get_DIE_from_attribute('DW_AT_type')
self.truly_resolve_type(entry, typ_die)
children_dies.append(entry)
elif child_die.tag == 'DW_TAG_lexical_block':
children = self.load_subprogram(child_die, CU_name)
children_dies = children_dies + children
return children_dies
def load_specifications(self):
for top_die in self.top_dies:
CU_name = safe_DIE_name(top_die, '?')
# preload children
load_children(top_die)
for child_die in top_die._children:
if child_die.tag == 'DW_TAG_variable':
if 'DW_AT_specification' in child_die.attributes:
entry = {
'name': safe_DIE_name(child_die, '?')
}
self.resolve_address(child_die, entry, entry)
specs_for_die = child_die.get_DIE_from_attribute('DW_AT_specification')
name = safe_DIE_name(specs_for_die, '?')
self.specifications[name] = {
'address': entry['address'],
'CU_name': CU_name
}
def resolve_address(self, die, entry_source, entry_dest):
l_result = False
at_name = ""
base_address = 0
address_spec = 0
if 'DW_AT_member_location' in die.attributes:
at_name = 'DW_AT_member_location'
elif 'DW_AT_data_member_location' in die.attributes:
at_name = 'DW_AT_data_member_location'
elif 'DW_AT_location' in die.attributes:
at_name = 'DW_AT_location'
if entry_dest['name'] in self.specifications:
if self.specifications[entry_dest['name']]['CU_name'] == entry_dest['CU_name']:
address_spec = self.specifications[entry_dest['name']]['address']
elif entry_source['name'] in self.specifications:
if self.specifications[entry_source['name']]['CU_name'] == entry_source['CU_name']:
address_spec = self.specifications[entry_source['name']]['address']
if address_spec:
base_address = address_spec
l_result = True
if 'address' in entry_source and not base_address:
base_address = entry_source['address']
entry_dest['address'] = base_address
if at_name:
if LocationParser.attribute_has_location(die.attributes[at_name], die.cu['version']):
ll = self.parse_location(die, die.attributes[at_name])
lloc = self.dump_expr(die, ll.loc_expr)
entry_dest['address'] = base_address + lloc[0].args[0]
l_result = True
elif LocationParser._attribute_is_constant(die.attributes[at_name], die.cu['version']):
entry_dest['address'] = base_address + die.attributes[at_name].value
l_result = True
else:
eprint("Unsupported location information")
eprint(at_name, die.cu['version'])
return l_result
def resolve_bit_size(self, die, entry):
if 'DW_AT_bit_size' in die.attributes:
entry['size_bit'] = die.attributes['DW_AT_bit_size'].value
if 'DW_AT_data_bit_offset' in die.attributes:
entry['bit_offset'] = die.attributes['DW_AT_data_bit_offset'].value
elif 'DW_AT_bit_offset' in die.attributes:
entry['bit_offset'] = die.attributes['DW_AT_bit_offset'].value
if 'bit_offset' in entry:
if entry['bit_offset'] & 0x100:
entry['bit_offset'] = (entry['bit_offset'] & 0xFF)
else:
bit_size = (math.ceil((entry['bit_offset'] + 1) / 8) * 8) - 1
bit_size_offset = (math.ceil(entry['size_bit'] / 8) * 8) - 1
bit_size = bit_size if bit_size >= bit_size_offset else bit_size_offset
entry['bit_offset'] = bit_size - entry['bit_offset'] - (entry['size_bit'] - 1)
byte_offset = math.floor(entry['bit_offset'] / 8)
entry['address'] = entry['address'] + byte_offset
entry['bit_offset'] = entry['bit_offset'] - byte_offset * 8
def truly_resolve_type(self, entry, die_type):
if die_type.tag == 'DW_TAG_volatile_type':
die_type = die_type.get_DIE_from_attribute('DW_AT_type')
entry['type'] = safe_DIE_name(die_type, '?')
self.resolve_address(die_type, entry, entry)
if 'DW_AT_type' in die_type.attributes and die_type.tag not in supported_types:
# Check if the type is a redefinition of a base type
die_type_test = die_type
while 'DW_AT_type' in die_type_test.attributes:
die_type_test = die_type_test.get_DIE_from_attribute('DW_AT_type')
if die_type_test.tag in supported_types:
die_type = die_type_test
break
if 'DW_AT_byte_size' in die_type.attributes:
entry['size_byte'] = die_type.attributes['DW_AT_byte_size'].value
if die_type.tag == 'DW_TAG_base_type':
real_type_name = safe_DIE_name(die_type, '?')
if real_type_name != '?' and real_type_name != entry['type']:
entry['type'] = '{name} ({real})'.format(name=entry['type'],
real=safe_DIE_name(die_type, '?'))
elif die_type.tag == 'DW_TAG_structure_type':
load_children(die_type)
child_dies = []
last_member_address = entry['address']
last_member_size_byte = 0
for child_die in die_type._children:
if child_die.tag != 'DW_TAG_member' and not configuration['all_members']:
continue
if 'DW_AT_type' in child_die.attributes:
typ_die = child_die.get_DIE_from_attribute('DW_AT_type')
elif child_die.tag in supported_types:
typ_die = child_die
else:
eprint('Child DIE with no type information')
epprint(child_die)
continue
child_entry = dict()
child_entry['name'] = safe_DIE_name(child_die, '?')
self.resolve_address(child_die, entry, child_entry)
self.resolve_bit_size(child_die, child_entry)
self.truly_resolve_type(child_entry, typ_die)
if child_entry['address'] != entry['address']:
last_member_address = child_entry['address']
if 'size_byte' in child_entry:
last_member_size_byte = child_entry['size_byte']
else:
child_entry['address'] = last_member_address + last_member_size_byte
child_dies.append(child_entry)
entry['children'] = child_dies
elif die_type.tag == 'DW_TAG_array_type':
self.truly_resolve_type(entry, die_type.get_DIE_from_attribute('DW_AT_type'))
load_children(die_type)
entry['number_of_elements'] = 0
if 'DW_AT_upper_bound' in die_type._children[0].attributes:
upper_bound = die_type._children[0].attributes['DW_AT_upper_bound'].value
if upper_bound < configuration['array_element_limit']:
entry['number_of_elements'] = upper_bound + 1
elif die_type.tag == 'DW_TAG_union_type':
load_children(die_type)
child_entries = []
for child_die in die_type._children:
if child_die.tag != 'DW_TAG_member' and not configuration['all_members']:
continue
if 'DW_AT_type' in child_die.attributes:
typ_die = child_die.get_DIE_from_attribute('DW_AT_type')
elif child_die.tag in supported_types:
typ_die = child_die
else:
eprint('Child DIE with no type information')
epprint(child_die)
continue
child_entry = dict()
child_entry['name'] = safe_DIE_name(child_die, '?')
if 'address' in entry:
child_entry['address'] = entry['address']
self.resolve_bit_size(child_die, child_entry)
self.truly_resolve_type(child_entry, typ_die)
child_entries.append(child_entry)
entry['children'] = child_entries
elif die_type.tag == 'DW_TAG_enumeration_type':
entry['type'] = safe_DIE_name(die_type, 'ENUM')
elif die_type.tag == 'DW_TAG_pointer_type':
entry['type'] = safe_DIE_name(die_type, 'POINTER')
else:
eprint("Unsupported type:", die_type.tag)
def normalize_by_address(self):
validVariables = list()
for CU in self.myVariables:
child_entries = list()
for child in CU['children']:
if (child['name'] != '?') and (child['name'] in self.valid_addresses):
if child['address'] == self.valid_addresses[child['name']]:
child_entries.append(child)
validVariables.append({
'name': CU['name'],
'children': child_entries
})
self.myVariables = validVariables
def flatten_type(self, parent=None):
for CU in self.myVariables:
for child in CU['children']:
if configuration["include_file_name"]:
self.pettanko(child, CU['name'], ":")
else:
self.pettanko(child)
def pettanko(self, entry : dict, a_parent_name : str = '', separator : str = '.', address_offset : int = 0):
flat_entry = dict()
if a_parent_name:
flat_entry['name'] = '{parent}{separator}{child}'.format(parent=a_parent_name,
separator=separator,
child=entry['name'])
else:
flat_entry['name'] = entry['name']
if 'address' in entry:
flat_entry['address'] = entry['address'] + address_offset
if 'size_byte' in entry:
flat_entry['size_byte'] = entry['size_byte']
if 'number_of_elements' in entry:
flat_entry['size_byte'] = flat_entry['size_byte'] * entry['number_of_elements']
if 'size_bit' in entry:
flat_entry['size_bit'] = entry['size_bit']
if 'bit_offset' in entry:
flat_entry['bit_offset'] = entry['bit_offset']
if 'type' in entry:
flat_entry['type'] = entry['type']
self.flat_list.append(flat_entry)
if 'children' in entry and 'number_of_elements' not in entry:
for kid in entry['children']:
self.pettanko(entry=kid, a_parent_name=flat_entry['name'],
separator=separator, address_offset=address_offset)
if 'number_of_elements' in entry:
step = 1
if configuration['no_array_expand']:
if entry['number_of_elements'] > 1:
step = (entry['number_of_elements'] - 1)
else:
step = entry['number_of_elements']
if step == 0 or entry['number_of_elements'] == 0:
return
for index in range(0, entry['number_of_elements'], step):
kid = flat_entry.copy()
kid['name'] = '{name}[{index}]'.format(name=flat_entry['name'], index=index)
if 'size_byte' in entry:
kid['size_byte'] = entry['size_byte']
size_offset = address_offset
if ('address' in flat_entry) and ('size_byte' in entry):
size_offset = size_offset + (entry['size_byte'] * index)
kid['address'] = flat_entry['address'] + size_offset
self.pettanko(entry=kid, separator=separator, address_offset=0)
kid_parent = self.flat_list[-1]
if 'children' in entry:
for kid_of_kid in entry['children']:
self.pettanko(entry=kid_of_kid, a_parent_name=kid_parent['name'],
separator=separator, address_offset=size_offset)
def pretty_print(self):
for entry in self.flat_list:
address = 0
if 'address' in entry:
address = entry['address']
if not address and entry['name'] == '?':
continue
size_byte = 1
if 'size_byte' in entry:
size_byte = entry['size_byte']
if 'size_bit' in entry and 'bit_offset' in entry:
bit_mask = 0
for n in range(entry['size_bit']):
bit_mask = bit_mask | (1 << n)
if entry['bit_offset'] >= 0:
bit_mask = bit_mask << entry['bit_offset']
else:
eprint("Illegal offset {offset} for variable {variable}").format(
offset=entry['bit_offset'],
variable=entry['name']
)
address = address + int(((entry['bit_offset'] + 1) / 8) - 1)
print('{address}\t&{bit_mask}\t{variable_name}'.format(
address=hex(address),
variable_name=entry['name'],
bit_mask=hex(bit_mask)
))
else:
print('{address}\t{size_byte}\t{variable_name}'.format(
address=hex(address),
variable_name=entry['name'],
size_byte=size_byte
))
def parse_location(self, die, attr):
di = die.dwarfinfo
if di._locparser is None:
di._locparser = LocationParser(di.location_lists())
return di._locparser.parse_from_attribute(attr, die.cu['version'], die=die)
# Expr is an expression blob
# Returns a list of strings for ops
# Format: op arg, arg...
def dump_expr(self, die, expr):
if die.cu._exprparser is None:
if die.cu['version'] > 1:
die.cu._exprparser = DWARFExprParser(die.cu.structs)
else:
die.cu._exprparser = DWARFExprParserV1(die.cu.structs)
# Challenge: for nested expressions, args is a list with a list of commands
# For those, the format is: op {op arg, arg; op arg, arg}
# Can't just check for iterable, str is iterable too
return die.cu._exprparser.parse_expr(expr)
def resolve_arch(self, arches):
print("resolve_arch: Unsupported feature")
return None
def main():
from dwex.patch import monkeypatch
monkeypatch()
parser = argparse.ArgumentParser(
description='Expands symbols (global variables) types.',
epilog='Data displayed by this script are informative only!'
)
parser.add_argument('elf_file', metavar='file', type=str, nargs='?',
help='ELF file to try to extract symbols')
parser.add_argument('-f', '--include-file-name', dest='include_file_name', action='store_true',
help='Display filename at the beginning')
parser.add_argument('-d', '--print-debug-info', dest='print_debug_info', action='store_true',
help='Print debug infor when parsing DWARF')
parser.add_argument('-a', '--all-members', dest='all_members', action='store_true',
help='Print information of all children')
parser.add_argument('-e', '--no-array-expand', dest='no_array_expand', action='store_true',
help='Print only first and last array elements')
parser.add_argument('--array-element-limit', dest='array_element_limit', action='store',
default=hex(configuration['array_element_limit']),
help='Maximum hexadecimal number of valid elements in an array')
parser.add_argument('-n', '--disable-address-normalization', dest='address_normalization',
action='store_false',
help='Disable variable normalization by address, hides duplicate entries.')
parser.add_argument('--no-mangled-names', dest='display_mangled_names', action='store_false',
help='Hide mangled names. By default mangled names are shown after the normal names')
parser.add_argument('--version', action='version',
version='%(prog)s {version}'.format(version=SCRIPT_VERSION))
args = parser.parse_args()
configuration['include_file_name'] = args.include_file_name
configuration['print_debug_info'] = args.print_debug_info
configuration['all_members'] = args.all_members
configuration['no_array_expand'] = args.no_array_expand
configuration['array_element_limit'] = int(args.array_element_limit, 16)
configuration['address_normalization'] = args.address_normalization
configuration['display_mangled_names'] = args.display_mangled_names
if not args.elf_file:
parser.print_help()
exit(1)
if not os.path.exists(args.elf_file):
eprint('File {elf_file} does not exist!'.format(elf_file=args.elf_file))
exit(1)
bear = Bear(args.elf_file)
if configuration['address_normalization']:
bear.normalize_by_address()
bear.flatten_type()
bear.pretty_print()
if __name__ == "__main__":
main()