elf_symbols/main.py

376 lines
15 KiB
Python
Raw Normal View History

#!/bin/python
import os
import sys
import math
import argparse
from dwex import formats
from elftools.dwarf.locationlists import LocationParser, LocationExpr
from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp, DW_OP_opcode2name
from dwex.dwarfone import DWARFExprParserV1
from pprint import pprint
SCRIPT_VERSION = '0.1.0'
configuration = {
'include_file_name': False,
'print_debug_info': False,
}
supported_types = [
'DW_TAG_base_type',
'DW_TAG_structure_type',
'DW_TAG_array_type',
'DW_TAG_union_type',
'DW_TAG_enumeration_type'
]
def eprint(*args, **kwargs):
if configuration['print_debug_info']:
print(*args, file=sys.stderr, **kwargs)
def epprint(*args, **kwargs):
if configuration['print_debug_info']:
pprint(*args, stream=sys.stderr, **kwargs)
class DWARFParseError(Exception):
""" "Opened, could not parse" """
def __init__(self, exc, di):
Exception.__init__(self, "DWARF parsing error: " + format(exc))
self.dwarfinfo = di
# Some additional data for every DIE
def decorate_die(die, i):
die._i = i
die._children = None
return die
def load_children(parent_die):
# Load and cache child DIEs in the parent DIE, if necessary
# Assumes the check if the DIE has children has been already performed
if not hasattr(parent_die, "_children") or parent_die._children is None:
# TODO: wait cursor here. It may cause disk I/O
try:
parent_die._children = [decorate_die(die, i) for (i, die) in enumerate(parent_die.iter_children())]
except KeyError as ke:
# Catching #1516
# QMessageBox(QMessageBox.Icon.Warning, "DWARF Explorer",
# "This executable file is corrupt or incompatible with the current version of DWARF Explorer. Please consider creating a new issue at https://github.com/sevaa/dwex/, and share this file with the tech support.",
# QMessageBox.StandardButton.Ok, QApplication.instance().win).show()
print("This executable file is corrupt or incompatible.")
parent_die._children = []
def safe_DIE_name(die, default=''):
name = ''
if 'DW_AT_name' in die.attributes:
name = die.attributes['DW_AT_name'].value.decode('utf-8', errors='ignore')
elif configuration['print_debug_info']:
name = die.tag
else:
name = default
return name
class Bear():
def __init__(self, filename):
di = formats.read_dwarf(filename, self.resolve_arch)
if not di: # Covers both False and None
print("Something went wrong")
exit(1)
# Some degree of graceful handling of wrong format
try:
# Some cached top level stuff
# Notably, iter_CUs doesn't cache
di._ranges = None # Loaded on first use
def decorate_cu(cu, i):
cu._i = i
cu._lineprogram = None
cu._exprparser = None
return cu
# We'll need them first thing, might as well load here
di._unsorted_CUs = [decorate_cu(cu, i) for (i, cu) in enumerate(di.iter_CUs())]
if not len(di._unsorted_CUs):
return None # Weird, but saw it once - debug sections present, but no CUs
# For quick CU search by offset within the info section, regardless of sorting
di._CU_offsets = [cu.cu_offset for cu in di._unsorted_CUs]
di._CUs = list(di._unsorted_CUs)
di._locparser = None # Created on first use
self.dwarfinfo = di
self.filename = filename
except AssertionError as ass: # Covers exeptions during parsing
raise DWARFParseError(ass, di)
# A list containing variables in a disctionary
# Description of used fields:
# name: variable name
# type: test description of the type
# size: size of the variable
# address: absolute address of the variable
# children: a dictionary of child elements
self.myVariables = []
self.flat_list = []
self.top_dies = [decorate_die(CU.get_top_DIE(), i) for (i, CU) in enumerate(di._CUs)]
for top_die in self.top_dies:
# top dies only contain Compile Units
# Preload children
load_children(top_die)
children_dies = list()
for child_die in top_die._children:
if child_die.tag == 'DW_TAG_variable':
entry = {
# Name should be on every element, if not set something so it can be printed
'name': safe_DIE_name(child_die, '?')
}
if 'DW_AT_location' in child_die.attributes:
if LocationParser.attribute_has_location(child_die.attributes['DW_AT_location'], child_die.cu['version']):
ll = self.parse_location(child_die, child_die.attributes['DW_AT_location'])
lloc = self.dump_expr(child_die, ll.loc_expr)
# print(hex(lloc[0].args[0]))
entry['address'] = hex(lloc[0].args[0])
# if isinstance(ll, LocationExpr):
# return '; '.join(self.dump_expr(child_die, ll.loc_expr))
# else:
# return "Loc list: 0x%x" % child_die.attributes['DW_AT_location'].value
if 'DW_AT_type' in child_die.attributes:
typ_die = child_die.get_DIE_from_attribute('DW_AT_type')
self.truly_resolve_type(entry, typ_die)
children_dies.append(entry)
self.myVariables.append({
'name': safe_DIE_name(top_die, '?'),
'children': children_dies
})
def truly_resolve_type(self, entry, die_type):
if die_type.tag == 'DW_TAG_volatile_type':
die_type = die_type.get_DIE_from_attribute('DW_AT_type')
entry['type'] = safe_DIE_name(die_type, '?')
if 'DW_AT_data_member_location' in die_type.attributes:
entry['offset'] = die_type.attributes['DW_AT_data_member_location'].value * 8
if 'address' in entry:
entry['address'] = hex(int(entry['address'], 16) + die_type.attributes['DW_AT_data_member_location'].value)
if 'DW_AT_type' in die_type.attributes and die_type.tag not in ['DW_TAG_base_type', 'DW_TAG_structure_type', 'DW_TAG_array_type']:
# Check if the type is a redefinition of a base type
die_type_test = die_type
while 'DW_AT_type' in die_type_test.attributes:
die_type_test = die_type_test.get_DIE_from_attribute('DW_AT_type')
if die_type_test.tag in supported_types:
die_type = die_type_test
break
if 'DW_AT_byte_size' in die_type.attributes:
entry['size_byte'] = die_type.attributes['DW_AT_byte_size'].value
if die_type.tag == 'DW_TAG_base_type':
real_type_name = safe_DIE_name(die_type, '?')
if real_type_name != '?' and real_type_name != entry['type']:
entry['type'] = '{name} ({real})'.format(name=entry['type'],
real=safe_DIE_name(die_type, '?'))
elif die_type.tag == 'DW_TAG_structure_type':
load_children(die_type)
child_dies = []
child_offset = 0
child_offset_bit = 0
for child_die in die_type._children:
if 'DW_AT_type' in child_die.attributes:
typ_die = child_die.get_DIE_from_attribute('DW_AT_type')
elif child_die.tag in supported_types:
typ_die = child_die
else:
eprint('Child DIE with no type information')
epprint(child_die)
continue
child_entry = dict()
child_entry['name'] = safe_DIE_name(child_die, '?')
if 'DW_AT_data_bit_offset' in child_die.attributes:
child_offset_bit = child_die.attributes['DW_AT_data_bit_offset'].value
if child_offset_bit >= 8:
child_offset = child_offset + math.floor(child_offset_bit / 8)
child_offset_bit = child_offset_bit - math.floor(child_offset_bit / 8) * 8
if 'address' in entry:
child_entry['address'] = hex(int(entry['address'], 16) + child_offset)
self.truly_resolve_type(child_entry, typ_die)
if ('size_byte' in child_entry) and ('DW_AT_data_bit_offset' not in child_die.attributes):
child_offset = child_offset + child_entry['size_byte']
child_dies.append(child_entry)
entry['children'] = child_dies
elif die_type.tag == 'DW_TAG_array_type':
self.truly_resolve_type(entry, die_type.get_DIE_from_attribute('DW_AT_type'))
load_children(die_type)
entry['number_of_elements'] = die_type._children[0].attributes['DW_AT_upper_bound'].value + 1
elif die_type.tag == 'DW_TAG_union_type':
load_children(die_type)
child_entries = []
for child_die in die_type._children:
if 'DW_AT_type' in child_die.attributes:
typ_die = child_die.get_DIE_from_attribute('DW_AT_type')
elif child_die.tag in supported_types:
typ_die = child_die
else:
eprint('Child DIE with no type information')
epprint(child_die)
continue
child_entry = dict()
child_entry['name'] = safe_DIE_name(child_die, '?')
if 'address' in entry:
child_entry['address'] = entry['address']
self.truly_resolve_type(child_entry, typ_die)
child_entries.append(child_entry)
entry['children'] = child_entries
elif die_type.tag == 'DW_TAG_enumeration_type':
entry['type'] = safe_DIE_name(die_type, 'ENUM')
else:
eprint("Unsupported type:", die_type.tag)
def flatten_type(self, parent=None):
for CU in self.myVariables:
for child in CU['children']:
if configuration["include_file_name"]:
self.pettanko(child, CU['name'], ":")
else:
self.pettanko(child)
def pettanko(self, entry : dict, a_parent_name : str = '', separator : str = '.'):
flat_entry = dict()
if a_parent_name:
flat_entry['name'] = '{parent}{separator}{child}'.format(parent=a_parent_name,
separator=separator,
child=entry['name'])
else:
flat_entry['name'] = entry['name']
if 'address' in entry:
flat_entry['address'] = entry['address']
if 'size_byte' in entry:
flat_entry['size_byte'] = entry['size_byte']
if 'number_of_elements' in entry:
flat_entry['size_byte'] = flat_entry['size_byte'] * entry['number_of_elements']
if 'type' in entry:
flat_entry['type'] = entry['type']
self.flat_list.append(flat_entry)
if 'children' in entry:
for kid in entry['children']:
self.pettanko(kid, flat_entry['name'])
if 'number_of_elements' in entry:
for index in range(0, entry['number_of_elements']):
kid = flat_entry.copy()
kid['name'] = '{name}[{index}]'.format(name=flat_entry['name'], index=index)
kid['address'] = hex(int(flat_entry['address'], 16) + entry['size_byte'] * index)
self.pettanko(kid)
kid_parent = self.flat_list[-1]
if 'children' in entry:
for kid_of_kid in entry['children']:
self.pettanko(kid_of_kid, kid_parent['name'])
def pretty_print(self):
for entry in self.flat_list:
address = ''
if 'address' in entry:
address = entry['address']
if not address and entry['name'] == '?':
continue
print('{address}\t{variable_name}'.format(address=address,
variable_name=entry['name']))
def parse_location(self, die, attr):
di = die.dwarfinfo
if di._locparser is None:
di._locparser = LocationParser(di.location_lists())
return di._locparser.parse_from_attribute(attr, die.cu['version'], die=die)
# Expr is an expression blob
# Returns a list of strings for ops
# Format: op arg, arg...
def dump_expr(self, die, expr):
if die.cu._exprparser is None:
die.cu._exprparser = DWARFExprParser(die.cu.structs) if die.cu['version'] > 1 else DWARFExprParserV1(die.cu.structs)
# Challenge: for nested expressions, args is a list with a list of commands
# For those, the format is: op {op arg, arg; op arg, arg}
# Can't just check for iterable, str is iterable too
return die.cu._exprparser.parse_expr(expr)
def resolve_arch(self, arches):
print("resolve_arch: Unsupported feature")
return None
def main():
from dwex.patch import monkeypatch
monkeypatch()
parser = argparse.ArgumentParser(
description='Expands symbols (global variables) types.',
epilog='Data displayed by this script are informative only!'
)
parser.add_argument('elf_file', metavar='file', type=str, nargs='?',
help='ELF file to try to extract symbols')
parser.add_argument('--include-file-name', dest='include_file_name', action='store_true',
help='Display filename at the beginning')
parser.add_argument('--print-debug-info', dest='print_debug_info', action='store_true',
help='Print debug infor when parsing DWARF')
parser.add_argument('--version', action='version',
version='%(prog)s {version}'.format(version=SCRIPT_VERSION))
args = parser.parse_args()
configuration['include_file_name'] = args.include_file_name
configuration['print_debug_info'] = args.print_debug_info
if not args.elf_file:
parser.print_help()
exit(1)
if not os.path.exists(args.elf_file):
eprint('File {elf_file} does not exist!'.format(elf_file=args.elf_file))
exit(1)
bear = Bear(args.elf_file)
bear.flatten_type()
bear.pretty_print()
if __name__ == "__main__":
main()