elf_symbols/main.py
Juraj Oravec 3c6b845948
WIP: Add a lot of code
Signed-off-by: Juraj Oravec <jurajoravec@mailo.com>
2024-04-14 11:44:05 +02:00

378 lines
14 KiB
Python

#!/bin/python
import sys
from bear import formats
from elftools.dwarf.locationlists import LocationParser, LocationExpr
from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp, DW_OP_opcode2name
from bear.dwarfone import DWARFExprParserV1
from pprint import pprint
configuration = {
"include_file_name": False,
}
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
class DWARFParseError(Exception):
""" "Opened, could not parse" """
def __init__(self, exc, di):
Exception.__init__(self, "DWARF parsing error: " + format(exc))
self.dwarfinfo = di
# Some additional data for every DIE
def decorate_die(die, i):
die._i = i
die._children = None
return die
def load_children(parent_die):
# Load and cache child DIEs in the parent DIE, if necessary
# Assumes the check if the DIE has children has been already performed
if not hasattr(parent_die, "_children") or parent_die._children is None:
# TODO: wait cursor here. It may cause disk I/O
try:
parent_die._children = [decorate_die(die, i) for (i, die) in enumerate(parent_die.iter_children())]
except KeyError as ke:
# Catching #1516
# QMessageBox(QMessageBox.Icon.Warning, "DWARF Explorer",
# "This executable file is corrupt or incompatible with the current version of DWARF Explorer. Please consider creating a new issue at https://github.com/sevaa/dwex/, and share this file with the tech support.",
# QMessageBox.StandardButton.Ok, QApplication.instance().win).show()
print("This executable file is corrupt or incompatible with the current version of Bear.")
parent_die._children = []
def safe_DIE_name(die, default = ''):
return die.attributes['DW_AT_name'].value.decode('utf-8', errors='ignore') if 'DW_AT_name' in die.attributes else default
class Bear():
def __init__(self, filename):
di = formats.read_dwarf(filename, self.resolve_arch)
if not di: # Covers both False and None
print("Something went wrong")
exit(1)
# Some degree of graceful handling of wrong format
try:
# Some cached top level stuff
# Notably, iter_CUs doesn't cache
di._ranges = None # Loaded on first use
def decorate_cu(cu, i):
cu._i = i
cu._lineprogram = None
cu._exprparser = None
return cu
di._unsorted_CUs = [decorate_cu(cu, i) for (i, cu) in enumerate(di.iter_CUs())] # We'll need them first thing, might as well load here
if not len(di._unsorted_CUs):
return None # Weird, but saw it once - debug sections present, but no CUs
# For quick CU search by offset within the info section, regardless of sorting
di._CU_offsets = [cu.cu_offset for cu in di._unsorted_CUs]
di._CUs = list(di._unsorted_CUs)
di._locparser = None # Created on first use
self.dwarfinfo = di
self.filename = filename
except AssertionError as ass: # Covers exeptions during parsing
raise DWARFParseError(ass, di)
# A list containing variables in a disctionary
# Description of used fields:
# name: variable name
# type: test description of the type
# size: size of the variable
# address: absolute address of the variable
# children: a dictionary of child elements
self.myVariables = []
self.top_dies = [decorate_die(CU.get_top_DIE(), i) for (i, CU) in enumerate(di._CUs)]
for top_die in self.top_dies:
# top dies only contain Compile Units
# Preload children
load_children(top_die)
children_dies = []
for child_die in top_die._children:
if child_die.tag == 'DW_TAG_variable':
# pprint(child_die)
entry = {
# Name should be on every element, if not set something so it can be printed
'name': safe_DIE_name(child_die, '?')
}
if 'DW_AT_location' in child_die.attributes:
if LocationParser.attribute_has_location(child_die.attributes['DW_AT_location'], child_die.cu['version']):
ll = self.parse_location(child_die, child_die.attributes['DW_AT_location'])
# pprint(ll.loc_expr)
# pprint(self.dump_expr(child_die, ll.loc_expr))
lloc = self.dump_expr(child_die, ll.loc_expr)
# print(hex(lloc[0].args[0]))
entry['address'] = hex(lloc[0].args[0])
# if isinstance(ll, LocationExpr):
# return '; '.join(self.dump_expr(child_die, ll.loc_expr))
# else:
# return "Loc list: 0x%x" % child_die.attributes['DW_AT_location'].value
if 'DW_AT_type' in child_die.attributes:
typ_die = child_die.get_DIE_from_attribute('DW_AT_type')
entry['type'] = self.resolve_type(typ_die)
children_dies.append(entry)
self.myVariables.append({
'name': safe_DIE_name(top_die, '?'),
'children': children_dies
})
pprint(self.myVariables)
def resolve_type(self, die_type):
if die_type.tag == 'DW_TAG_volatile_type':
die_type = die_type.get_DIE_from_attribute('DW_AT_type')
entry = {
# Name should be on every element, if not set something so it can be printed
'name': safe_DIE_name(die_type, '?')
}
if 'DW_AT_data_member_location' in die_type.attributes:
entry['offset'] = die_type.attributes['DW_AT_data_member_location'].value * 8
if 'DW_AT_type' in die_type.attributes and die_type.tag not in ['DW_TAG_base_type', 'DW_TAG_structure_type', 'DW_TAG_array_type']:
# Check if the type is a redefinition of a base type
die_type_test = die_type
while 'DW_AT_type' in die_type_test.attributes:
die_type_test = die_type_test.get_DIE_from_attribute('DW_AT_type')
if die_type_test.tag in ['DW_TAG_base_type', 'DW_TAG_structure_type', 'DW_TAG_array_type', 'DW_TAG_union_type']:
die_type = die_type_test
break
if die_type.tag == 'DW_TAG_base_type':
entry['type'] = safe_DIE_name(die_type, '?')
elif die_type.tag == "DW_TAG_structure_type":
load_children(die_type)
child_dies = []
for child_die in die_type._children:
child_entry = self.resolve_type(child_die)
child_dies.append(child_entry)
entry['children'] = child_dies;
elif die_type.tag == "DW_TAG_array_type":
array_type = self.resolve_type(die_type.get_DIE_from_attribute('DW_AT_type'))
load_children(die_type)
children_num = die_type._children[0].attributes['DW_AT_upper_bound'].value
child_entries = []
for child in range(0, children_num + 1):
child_entry = array_type.copy()
child_entry['offset'] = array_type['size_bit'] * child
child_entries.append(child_entry)
entry['children'] = child_entries
elif die_type.tag == 'DW_TAG_union_type':
load_children(die_type)
child_entries = []
for child_die in die_type._children:
child_entry = self.resolve_type(child_die)
child_entries.append(child_entry)
entry['children'] = child_entries
else:
eprint("Unsupported type:", die_type.tag)
if 'DW_AT_byte_size' in die_type.attributes:
entry['size_bit'] = die_type.attributes['DW_AT_byte_size'].value * 8
return entry
def flatten_type(self, parent=None):
# Structure of resulting list of dictionaries
# address - The address
# name - The long name of a variable after out rolling the type
vars = []
# Iterate over CUs
# - name - filename
# - children - variables
for CU in self.myVariables:
vars.append(CU['name'])
for child in CU['children']:
if configuration["include_file_name"]:
vars.append(self.flatten_child(child, CU['name']))
else:
vars.append(self.flatten_child(child))
return vars
def flatten_child(self, child, name='', address=0):
var = {}
kids = []
if name:
var['name'] = '{parent}.{child}'.format(parent=name, child=child['name'])
else:
var['name'] = child['name']
if address:
var['address'] = address
else:
var['address'] = child['address']
if 'children' in child:
for kid in child['children']:
self.flatten_child(kid, var['name'], var['address'])
if 'type' in child:
self.flatten_child()
return kids
def pretty_print(self):
vars = []
# Iterate over CUs
# - name - filename
# - children - variables
for CU in self.myVariables:
vars.append(CU['name'])
for child in CU['children']:
self.pretty_child(child)
return vars
def pretty_child(self, child, prefix='', address=0):
name = ''
if 'children' in child:
pass
else:
pass
def print_top_DIE(self, die):
if die.tag == 'DW_TAG_variable':
name = safe_DIE_name(die)
if name:
typ_name = ''
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
print(self.describe_type(typ))
# typ_name = safe_DIE_name(typ)
# if not typ_name:
# print (typ)
print('{name} {typ_name}'.format(name=name, typ_name=typ_name))
def print_DIE(self, die, prefix=''):
name = ''
# print(die)
if die.tag == 'DW_TAG_variable':
name = safe_DIE_name(die)
if name and 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
if 'DW_AT_location' in die.attributes:
ll = self.parse_location(die, die.attributes['DW_AT_location'])
# if isinstance(ll, LocationExpr):
# print(self.dump_expr(die, ll.loc_expr))
self.print_DIE(typ, name)
return
# print(typ)
elif die.tag == 'DW_TAG_compile_unit':
name = safe_DIE_name(die, '.')
elif prefix and die.tag == 'DW_TAG_base_type':
name = safe_DIE_name(die)
elif prefix and die.tag == 'DW_TAG_const_type':
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
self.print_DIE(typ, prefix)
return
elif prefix and die.tag == 'DW_TAG_array_type':
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
name = prefix + '[]'
self.print_DIE(typ, name)
load_children(die)
if die._children:
for child in die._children:
print(child)
self.print_DIE(child, name)
return
elif prefix and die.tag == 'DW_TAG_volatile_type':
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
self.print_DIE(typ, prefix)
return
elif prefix and die.tag == 'DW_TAG_typedef':
print(die.attributes["DW_AT_name"].value)
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
name = prefix + '[]'
self.print_DIE(typ, name)
load_children(die)
if die._children:
for child in die._children:
print(child)
self.print_DIE(child, name)
elif prefix and die.tag == 'DW_TAG_enumeration_type':
# print(die.attributes["DW_AT_name"].value)
print ("mylittlepony")
print(die)
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
name = prefix
self.print_DIE(typ, name)
load_children(die)
print (typ)
elif prefix:
print (prefix)
print(die)
if name:
if prefix:
print (prefix, name)
else:
print (name)
def parse_location(self, die, attr):
di = die.dwarfinfo
if di._locparser is None:
di._locparser = LocationParser(di.location_lists())
return di._locparser.parse_from_attribute(attr, die.cu['version'], die = die)
# Expr is an expression blob
# Returns a list of strings for ops
# Format: op arg, arg...
def dump_expr(self, die, expr):
if die.cu._exprparser is None:
die.cu._exprparser = DWARFExprParser(die.cu.structs) if die.cu['version'] > 1 else DWARFExprParserV1(die.cu.structs)
# Challenge: for nested expressions, args is a list with a list of commands
# For those, the format is: op {op arg, arg; op arg, arg}
# Can't just check for iterable, str is iterable too
return die.cu._exprparser.parse_expr(expr)
def resolve_arch(self, arches):
print("resolve_arch: Unsupported feature")
return None
def main():
from bear.patch import monkeypatch
monkeypatch()
bear = Bear("/home/juraj/projects/Playground_C/build/playground_c")
vars = bear.flatten_type()
pprint(vars)
#bear = Bear("main.elf")
# bear = Bear("LED_Cube.elf")
# bear = Bear("serialplay")
pass
if __name__ == "__main__":
main()