2024-04-14 11:44:05 +02:00
#!/bin/python
2024-05-11 14:40:38 +02:00
import os
2024-04-14 11:44:05 +02:00
import sys
2024-05-10 23:34:00 +02:00
import math
2024-05-11 14:40:38 +02:00
import argparse
2024-05-11 18:42:36 +02:00
from dwex import formats
2024-04-14 11:44:05 +02:00
from elftools . dwarf . locationlists import LocationParser , LocationExpr
from elftools . dwarf . dwarf_expr import DWARFExprParser , DWARFExprOp , DW_OP_opcode2name
2024-05-11 18:42:36 +02:00
from dwex . dwarfone import DWARFExprParserV1
2024-04-14 11:44:05 +02:00
2024-05-11 14:40:38 +02:00
SCRIPT_VERSION = ' 0.1.0 '
2024-04-14 11:44:05 +02:00
configuration = {
2024-05-11 14:40:38 +02:00
' include_file_name ' : False ,
2024-04-14 11:44:05 +02:00
}
def eprint ( * args , * * kwargs ) :
print ( * args , file = sys . stderr , * * kwargs )
class DWARFParseError ( Exception ) :
""" " Opened, could not parse " """
def __init__ ( self , exc , di ) :
Exception . __init__ ( self , " DWARF parsing error: " + format ( exc ) )
self . dwarfinfo = di
# Some additional data for every DIE
def decorate_die ( die , i ) :
die . _i = i
die . _children = None
return die
2024-05-10 23:34:00 +02:00
2024-04-14 11:44:05 +02:00
def load_children ( parent_die ) :
# Load and cache child DIEs in the parent DIE, if necessary
# Assumes the check if the DIE has children has been already performed
if not hasattr ( parent_die , " _children " ) or parent_die . _children is None :
# TODO: wait cursor here. It may cause disk I/O
try :
parent_die . _children = [ decorate_die ( die , i ) for ( i , die ) in enumerate ( parent_die . iter_children ( ) ) ]
except KeyError as ke :
# Catching #1516
# QMessageBox(QMessageBox.Icon.Warning, "DWARF Explorer",
# "This executable file is corrupt or incompatible with the current version of DWARF Explorer. Please consider creating a new issue at https://github.com/sevaa/dwex/, and share this file with the tech support.",
# QMessageBox.StandardButton.Ok, QApplication.instance().win).show()
2024-05-11 14:40:38 +02:00
print ( " This executable file is corrupt or incompatible. " )
2024-04-14 11:44:05 +02:00
parent_die . _children = [ ]
2024-05-10 23:34:00 +02:00
def safe_DIE_name ( die , default = ' ' ) :
2024-04-14 11:44:05 +02:00
return die . attributes [ ' DW_AT_name ' ] . value . decode ( ' utf-8 ' , errors = ' ignore ' ) if ' DW_AT_name ' in die . attributes else default
class Bear ( ) :
def __init__ ( self , filename ) :
di = formats . read_dwarf ( filename , self . resolve_arch )
if not di : # Covers both False and None
print ( " Something went wrong " )
exit ( 1 )
# Some degree of graceful handling of wrong format
try :
# Some cached top level stuff
# Notably, iter_CUs doesn't cache
di . _ranges = None # Loaded on first use
2024-05-10 23:34:00 +02:00
2024-04-14 11:44:05 +02:00
def decorate_cu ( cu , i ) :
cu . _i = i
cu . _lineprogram = None
cu . _exprparser = None
return cu
2024-05-11 14:40:38 +02:00
# We'll need them first thing, might as well load here
di . _unsorted_CUs = [ decorate_cu ( cu , i ) for ( i , cu ) in enumerate ( di . iter_CUs ( ) ) ]
2024-04-14 11:44:05 +02:00
if not len ( di . _unsorted_CUs ) :
return None # Weird, but saw it once - debug sections present, but no CUs
# For quick CU search by offset within the info section, regardless of sorting
di . _CU_offsets = [ cu . cu_offset for cu in di . _unsorted_CUs ]
di . _CUs = list ( di . _unsorted_CUs )
di . _locparser = None # Created on first use
self . dwarfinfo = di
self . filename = filename
2024-05-10 23:34:00 +02:00
except AssertionError as ass : # Covers exeptions during parsing
2024-04-14 11:44:05 +02:00
raise DWARFParseError ( ass , di )
# A list containing variables in a disctionary
# Description of used fields:
# name: variable name
# type: test description of the type
# size: size of the variable
# address: absolute address of the variable
# children: a dictionary of child elements
self . myVariables = [ ]
2024-05-10 23:34:00 +02:00
self . flat_list = [ ]
2024-04-14 11:44:05 +02:00
self . top_dies = [ decorate_die ( CU . get_top_DIE ( ) , i ) for ( i , CU ) in enumerate ( di . _CUs ) ]
for top_die in self . top_dies :
# top dies only contain Compile Units
# Preload children
load_children ( top_die )
2024-05-10 23:34:00 +02:00
children_dies = list ( )
2024-04-14 11:44:05 +02:00
for child_die in top_die . _children :
if child_die . tag == ' DW_TAG_variable ' :
entry = {
# Name should be on every element, if not set something so it can be printed
' name ' : safe_DIE_name ( child_die , ' ? ' )
}
if ' DW_AT_location ' in child_die . attributes :
if LocationParser . attribute_has_location ( child_die . attributes [ ' DW_AT_location ' ] , child_die . cu [ ' version ' ] ) :
ll = self . parse_location ( child_die , child_die . attributes [ ' DW_AT_location ' ] )
lloc = self . dump_expr ( child_die , ll . loc_expr )
# print(hex(lloc[0].args[0]))
entry [ ' address ' ] = hex ( lloc [ 0 ] . args [ 0 ] )
# if isinstance(ll, LocationExpr):
# return '; '.join(self.dump_expr(child_die, ll.loc_expr))
# else:
# return "Loc list: 0x%x" % child_die.attributes['DW_AT_location'].value
if ' DW_AT_type ' in child_die . attributes :
typ_die = child_die . get_DIE_from_attribute ( ' DW_AT_type ' )
2024-05-10 23:34:00 +02:00
self . truly_resolve_type ( entry , typ_die )
2024-04-14 11:44:05 +02:00
children_dies . append ( entry )
self . myVariables . append ( {
' name ' : safe_DIE_name ( top_die , ' ? ' ) ,
' children ' : children_dies
} )
2024-05-10 23:34:00 +02:00
def truly_resolve_type ( self , entry , die_type ) :
2024-04-14 11:44:05 +02:00
if die_type . tag == ' DW_TAG_volatile_type ' :
die_type = die_type . get_DIE_from_attribute ( ' DW_AT_type ' )
2024-05-10 23:34:00 +02:00
entry [ ' type ' ] = safe_DIE_name ( die_type , ' ? ' )
2024-04-14 11:44:05 +02:00
if ' DW_AT_data_member_location ' in die_type . attributes :
entry [ ' offset ' ] = die_type . attributes [ ' DW_AT_data_member_location ' ] . value * 8
2024-05-10 23:34:00 +02:00
if ' address ' in entry :
entry [ ' address ' ] = hex ( int ( entry [ ' address ' ] , 16 ) + die_type . attributes [ ' DW_AT_data_member_location ' ] . value )
2024-04-14 11:44:05 +02:00
if ' DW_AT_type ' in die_type . attributes and die_type . tag not in [ ' DW_TAG_base_type ' , ' DW_TAG_structure_type ' , ' DW_TAG_array_type ' ] :
# Check if the type is a redefinition of a base type
die_type_test = die_type
while ' DW_AT_type ' in die_type_test . attributes :
die_type_test = die_type_test . get_DIE_from_attribute ( ' DW_AT_type ' )
if die_type_test . tag in [ ' DW_TAG_base_type ' , ' DW_TAG_structure_type ' , ' DW_TAG_array_type ' , ' DW_TAG_union_type ' ] :
die_type = die_type_test
break
2024-05-10 23:34:00 +02:00
if ' DW_AT_byte_size ' in die_type . attributes :
entry [ ' size_byte ' ] = die_type . attributes [ ' DW_AT_byte_size ' ] . value
2024-04-14 11:44:05 +02:00
if die_type . tag == ' DW_TAG_base_type ' :
2024-05-10 23:34:00 +02:00
real_type_name = safe_DIE_name ( die_type , ' ? ' )
if real_type_name != ' ? ' and real_type_name != entry [ ' type ' ] :
2024-05-11 14:40:38 +02:00
entry [ ' type ' ] = ' {name} ( {real} ) ' . format ( name = entry [ ' type ' ] ,
real = safe_DIE_name ( die_type , ' ? ' ) )
2024-04-14 11:44:05 +02:00
elif die_type . tag == " DW_TAG_structure_type " :
load_children ( die_type )
child_dies = [ ]
2024-05-10 23:34:00 +02:00
child_offset = 0
child_offset_bit = 0
2024-04-14 11:44:05 +02:00
for child_die in die_type . _children :
2024-05-10 23:34:00 +02:00
if ' DW_AT_type ' in child_die . attributes :
typ_die = child_die . get_DIE_from_attribute ( ' DW_AT_type ' )
child_entry = dict ( )
child_entry [ ' name ' ] = safe_DIE_name ( child_die , ' ? ' )
if ' DW_AT_data_bit_offset ' in child_die . attributes :
child_offset_bit = child_die . attributes [ ' DW_AT_data_bit_offset ' ] . value
if child_offset_bit > = 8 :
child_offset = child_offset + math . floor ( child_offset_bit / 8 )
child_offset_bit = child_offset_bit - math . floor ( child_offset_bit / 8 ) * 8
if ' address ' in entry :
child_entry [ ' address ' ] = hex ( int ( entry [ ' address ' ] , 16 ) + child_offset )
self . truly_resolve_type ( child_entry , typ_die )
if ( ' size_byte ' in child_entry ) and ( ' DW_AT_data_bit_offset ' not in child_die . attributes ) :
child_offset = child_offset + child_entry [ ' size_byte ' ]
2024-04-14 11:44:05 +02:00
child_dies . append ( child_entry )
2024-05-10 23:34:00 +02:00
entry [ ' children ' ] = child_dies
2024-04-14 11:44:05 +02:00
elif die_type . tag == " DW_TAG_array_type " :
2024-05-10 23:34:00 +02:00
self . truly_resolve_type ( entry , die_type . get_DIE_from_attribute ( ' DW_AT_type ' ) )
2024-04-14 11:44:05 +02:00
load_children ( die_type )
2024-05-10 23:34:00 +02:00
entry [ ' number_of_elements ' ] = die_type . _children [ 0 ] . attributes [ ' DW_AT_upper_bound ' ] . value + 1
2024-04-14 11:44:05 +02:00
elif die_type . tag == ' DW_TAG_union_type ' :
load_children ( die_type )
child_entries = [ ]
for child_die in die_type . _children :
2024-05-10 23:34:00 +02:00
if ' DW_AT_type ' in child_die . attributes :
typ_die = child_die . get_DIE_from_attribute ( ' DW_AT_type ' )
child_entry = dict ( )
child_entry [ ' name ' ] = safe_DIE_name ( child_die , ' ? ' )
if ' address ' in entry :
child_entry [ ' address ' ] = entry [ ' address ' ]
self . truly_resolve_type ( child_entry , typ_die )
2024-04-14 11:44:05 +02:00
child_entries . append ( child_entry )
entry [ ' children ' ] = child_entries
else :
eprint ( " Unsupported type: " , die_type . tag )
def flatten_type ( self , parent = None ) :
for CU in self . myVariables :
for child in CU [ ' children ' ] :
if configuration [ " include_file_name " ] :
2024-05-11 14:40:38 +02:00
self . pettanko ( child , CU [ ' name ' ] , " : " )
2024-04-14 11:44:05 +02:00
else :
2024-05-10 23:34:00 +02:00
self . pettanko ( child )
2024-04-14 11:44:05 +02:00
2024-05-11 14:40:38 +02:00
def pettanko ( self , entry : dict , a_parent_name : str = ' ' , separator : str = ' . ' ) :
2024-05-10 23:34:00 +02:00
flat_entry = dict ( )
if a_parent_name :
2024-05-11 14:40:38 +02:00
flat_entry [ ' name ' ] = ' {parent} {separator} {child} ' . format ( parent = a_parent_name ,
separator = separator ,
child = entry [ ' name ' ] )
2024-04-14 11:44:05 +02:00
else :
2024-05-10 23:34:00 +02:00
flat_entry [ ' name ' ] = entry [ ' name ' ]
2024-04-14 11:44:05 +02:00
2024-05-10 23:34:00 +02:00
if ' address ' in entry :
flat_entry [ ' address ' ] = entry [ ' address ' ]
2024-04-14 11:44:05 +02:00
2024-05-10 23:34:00 +02:00
if ' size_byte ' in entry :
flat_entry [ ' size_byte ' ] = entry [ ' size_byte ' ]
if ' number_of_elements ' in entry :
flat_entry [ ' size_byte ' ] = flat_entry [ ' size_byte ' ] * entry [ ' number_of_elements ' ]
2024-04-14 11:44:05 +02:00
2024-05-10 23:34:00 +02:00
if ' type ' in entry :
flat_entry [ ' type ' ] = entry [ ' type ' ]
2024-04-14 11:44:05 +02:00
2024-05-10 23:34:00 +02:00
self . flat_list . append ( flat_entry )
2024-04-14 11:44:05 +02:00
2024-05-10 23:34:00 +02:00
if ' children ' in entry :
for kid in entry [ ' children ' ] :
self . pettanko ( kid , flat_entry [ ' name ' ] )
2024-04-14 11:44:05 +02:00
2024-05-10 23:34:00 +02:00
if ' number_of_elements ' in entry :
for index in range ( 0 , entry [ ' number_of_elements ' ] ) :
kid = flat_entry . copy ( )
2024-05-11 14:40:38 +02:00
kid [ ' name ' ] = ' {name} [ {index} ] ' . format ( name = flat_entry [ ' name ' ] , index = index )
2024-05-10 23:34:00 +02:00
kid [ ' address ' ] = hex ( int ( flat_entry [ ' address ' ] , 16 ) + entry [ ' size_byte ' ] * index )
2024-04-14 11:44:05 +02:00
2024-05-10 23:34:00 +02:00
self . pettanko ( kid )
kid_parent = self . flat_list [ - 1 ]
if ' children ' in entry :
for kid_of_kid in entry [ ' children ' ] :
self . pettanko ( kid_of_kid , kid_parent [ ' name ' ] )
def pretty_print ( self ) :
for entry in self . flat_list :
2024-05-11 15:40:36 +02:00
address = ' '
if ' address ' in entry :
address = entry [ ' address ' ]
if not address and entry [ ' name ' ] == ' ? ' :
continue
print ( ' {address} \t {variable_name} ' . format ( address = address ,
2024-05-11 14:40:38 +02:00
variable_name = entry [ ' name ' ] ) )
2024-04-14 11:44:05 +02:00
def parse_location ( self , die , attr ) :
di = die . dwarfinfo
if di . _locparser is None :
di . _locparser = LocationParser ( di . location_lists ( ) )
2024-05-10 23:34:00 +02:00
return di . _locparser . parse_from_attribute ( attr , die . cu [ ' version ' ] , die = die )
2024-04-14 11:44:05 +02:00
# Expr is an expression blob
# Returns a list of strings for ops
# Format: op arg, arg...
def dump_expr ( self , die , expr ) :
if die . cu . _exprparser is None :
die . cu . _exprparser = DWARFExprParser ( die . cu . structs ) if die . cu [ ' version ' ] > 1 else DWARFExprParserV1 ( die . cu . structs )
# Challenge: for nested expressions, args is a list with a list of commands
# For those, the format is: op {op arg, arg; op arg, arg}
# Can't just check for iterable, str is iterable too
return die . cu . _exprparser . parse_expr ( expr )
def resolve_arch ( self , arches ) :
print ( " resolve_arch: Unsupported feature " )
return None
def main ( ) :
2024-05-11 18:42:36 +02:00
from dwex . patch import monkeypatch
2024-04-14 11:44:05 +02:00
monkeypatch ( )
2024-05-11 14:40:38 +02:00
parser = argparse . ArgumentParser (
description = ' Expands symbols (global variables) types. ' ,
epilog = ' Data displayed by this script are informative only! '
)
parser . add_argument ( ' elf_file ' , metavar = ' file ' , type = str , nargs = ' ? ' ,
help = ' ELF file to try to extract symbols ' )
parser . add_argument ( ' --include-file-name ' , dest = ' include_file_name ' , action = ' store_true ' ,
help = ' Display filename at the beginning ' )
parser . add_argument ( ' --version ' , action = ' version ' ,
version = ' %(prog)s {version} ' . format ( version = SCRIPT_VERSION ) )
args = parser . parse_args ( )
configuration [ ' include_file_name ' ] = args . include_file_name
2024-05-12 20:15:02 +02:00
if not args . elf_file :
parser . print_help ( )
exit ( 1 )
2024-05-11 14:40:38 +02:00
if not os . path . exists ( args . elf_file ) :
eprint ( ' File {elf_file} does not exist! ' . format ( elf_file = args . elf_file ) )
exit ( 1 )
bear = Bear ( args . elf_file )
2024-05-10 23:34:00 +02:00
bear . flatten_type ( )
bear . pretty_print ( )
2024-04-14 11:44:05 +02:00
if __name__ == " __main__ " :
main ( )