WIP: Add a lot of code

Signed-off-by: Juraj Oravec <jurajoravec@mailo.com>
This commit is contained in:
Juraj Oravec 2024-04-14 11:44:05 +02:00
parent 7449c692c9
commit 3c6b845948
Signed by: SGOrava
GPG Key ID: 13660A3F1D9F093B
5 changed files with 2798 additions and 4 deletions

289
bear/dwarfone.py Normal file
View File

@ -0,0 +1,289 @@
# Support for DWARF v1.1 in a way that will be more or less compatible with pyelftools
from io import BytesIO
from collections import OrderedDict, namedtuple
from bisect import bisect_left
from elftools.dwarf.dwarfinfo import DwarfConfig, DebugSectionDescriptor
from elftools.dwarf.die import AttributeValue
from elftools.dwarf.structs import DWARFStructs
from elftools.common.utils import struct_parse, bytelist2string
from elftools.dwarf.enums import ENUM_DW_TAG, ENUM_DW_AT, ENUM_DW_FORM
from elftools.construct import CString
from elftools.dwarf.lineprogram import LineProgramEntry, LineState
from elftools.dwarf.dwarf_expr import DWARFExprOp
LineTableHeader = namedtuple('LineTableHeader', 'version file_entry')
CUv1Header = namedtuple('CUv1Header', 'version unit_length debug_abbrev_offset address_size')
TAG_reverse = dict((v, k) for k, v in ENUM_DW_TAG.items())
ATTR_reverse = dict((v, k) for k, v in ENUM_DW_AT.items())
FORM_reverse = dict((v, k) for k, v in ENUM_DW_FORM.items())
DW_OP_name2opcode = dict(
DW_OP_reg = 0x01,
DW_OP_basereg = 0x02,
DW_OP_addr = 0x03,
DW_OP_const = 0x04,
DW_OP_deref2 = 0x05,
DW_OP_deref = 0x06,
DW_OP_deref4 = 0x06,
DW_OP_add = 0x07,
DW_OP_user_0x80 = 0x80 #Extension op, not sure what's the deal with that
)
DW_OP_opcode2name = dict((v, k) for k, v in DW_OP_name2opcode.items())
class DIEV1(object):
def __init__(self, stm, cu, di):
self.cu = cu
self.dwarfinfo = di
self.stream = stm
self.offset = stm.tell()
self.attributes = OrderedDict()
self.tag = None
self.has_children = None
self.abbrev_code = None
self.size = 0
# Null DIE terminator. It can be used to obtain offset range occupied
# by this DIE including its whole subtree.
self._terminator = None
self._parent = None
structs = self.dwarfinfo.structs
self.size = struct_parse(structs.Dwarf_uint32(''), stm)
if self.size < 8:
self.tag = 'DW_TAG_padding'
self.has_children = False
else:
tag_code = struct_parse(structs.Dwarf_uint16(''), stm)
if tag_code not in TAG_reverse:
raise ValueError("%d not a known tag" % (tag_code))
self.tag = TAG_reverse[tag_code]
if self.tag == 'DW_TAG_null': # TAG_padding in DWARF1 spec
# No attributes, just advance the stream
stm.seek(self.size-6, 1)
self.has_children = False
else:
while stm.tell() < self.offset + self.size:
attr_offset = self.stream.tell()
attr = struct_parse(structs.Dwarf_uint16(''), stm)
form = FORM_reverse[attr & 0xf]
attr >>= 4
if attr in ATTR_reverse:
name = ATTR_reverse[attr]
elif 0x200 <= attr <= 0x3ff: #DW_AT_MIPS represented as 0x204???
name = 'DW_AT_user_0x%x' % attr
else:
raise ValueError("%d not a known attribute" % (attr))
raw_value = struct_parse(structs.Dwarf_dw_form[form], stm)
value = raw_value
self.attributes[name] = AttributeValue(
name=name,
form=form,
value=value,
raw_value=raw_value,
offset=attr_offset)
self.has_children = self.attributes['DW_AT_sibling'].value >= self.offset + self.size + 8
def get_parent(self):
return self._parent
def is_null(self):
return self.tag == 'DW_TAG_padding'
def iter_children(self):
return self.cu.iter_children(self)
def sibling(self):
return self.attributes['DW_AT_sibling'].value
class CompileUnitV1(object):
def __init__(self, di, top_die):
self.dwarfinfo = di
self.structs = di.structs
self.header = CUv1Header(version = 1, unit_length = None, debug_abbrev_offset = None, address_size = 4)
self._dielist = [top_die]
self._diemap = [top_die.offset]
def get_top_DIE(self):
return self._dielist[0]
def __getitem__(self, name):
return self.header._asdict()[name]
# Caches
def DIE_at_offset(self, offset):
i = bisect_left(self._diemap, offset)
if i < len(self._diemap) and offset == self._diemap[i]:
die = self._dielist[i]
else:
die = self.dwarfinfo.DIE_at_offset(offset, self)
self._dielist.insert(i, die)
self._diemap.insert(i, offset)
return die
# pyelftools' iter_DIEs sets parent on discovered DIEs, we should too
def iter_DIEs(self):
offset = self.cu_offset
parent = None
parent_stack = list()
end_offset = self.get_top_DIE().attributes['DW_AT_sibling'].value
while offset < end_offset:
die = self.DIE_at_offset(offset)
if die._parent is None:
die._parent = parent
if not die.is_null():
yield die
offset += die.size
if offset != die.sibling(): # Start of a subtree
parent_stack.append(parent)
parent = die
else: # padding - end of a sibling chain
parent = parent_stack.pop()
offset += die.size
def iter_children(self, parent_die):
offset = parent_die.offset + parent_die.size
while offset < self.dwarfinfo.section_size:
die = self.DIE_at_offset(offset)
if die._parent is None:
die._parent = parent_die
if not die.is_null():
yield die
# Troubleshooting #1497
tag = die.tag
attr = die.attributes
off = die.offset
size = die.size
has_children = die.has_children
offset = die.sibling()
else:
break
class LineTableV1(object):
def __init__(self, stm, structs, len, pc):
self.stm = stm
self.structs = structs
self.len = len
self.pc = pc
self._decoded_entries = None
self.header = LineTableHeader(1, (None))
def get_entries(self):
if self._decoded_entries is None:
stm = self.stm
offset = stm.tell()
end_offset = offset + self.len
structs = self.structs
entries = []
pc = self.pc
while offset < end_offset:
line = struct_parse(structs.Dwarf_uint32(''), stm)
col = struct_parse(structs.Dwarf_uint16(''), stm)
pc_delta = struct_parse(structs.Dwarf_uint32(''), stm)
if line == 0:
break
state = LineState(True)
state.file = 0
state.line = line
state.column = col if col != 0xffff else None
state.address = pc
entries.append(LineProgramEntry(0, False, [], state))
pc += pc_delta
self._decoded_entries = entries
return self._decoded_entries
class DWARFExprParserV1(object):
def __init__(self, structs):
self.structs = structs
def parse_expr(self, expr):
stm = BytesIO(bytelist2string(expr))
parsed = []
while True:
# Get the next opcode from the stream. If nothing is left in the
# stream, we're done.
byte = stm.read(1)
if len(byte) == 0:
break
# Decode the opcode and its name.
op = ord(byte)
op_name = DW_OP_opcode2name.get(op, 'OP:0x%x' % op)
if op <= 4 or op == 0x80:
args = [struct_parse(self.structs.Dwarf_target_addr(''), stm),]
else:
args = []
parsed.append(DWARFExprOp(op=op, op_name=op_name, args=args, offset=stm.tell()))
return parsed
class DWARFInfoV1(object):
def __init__(self, elffile):
section = elffile.get_section_by_name(".debug")
section_data = section.data()
self.section_size = len(section_data)
self.stm = BytesIO()
self.stm.write(section_data)
self.stm.seek(0, 0)
lsection = elffile.get_section_by_name(".line")
if lsection:
self.linestream = BytesIO()
self.linestream.write(lsection.data())
self.linestream.seek(0, 0)
self.config = DwarfConfig(
little_endian = elffile.little_endian,
default_address_size = elffile.elfclass // 8,
machine_arch = elffile.get_machine_arch()
)
self.structs = DWARFStructs(
little_endian = self.config.little_endian,
dwarf_format = 32,
address_size = self.config.default_address_size)
def iter_CUs(self):
offset = 0
while offset < self.section_size:
die = self.DIE_at_offset(offset, None)
if die.tag != 'DW_TAG_padding':
if die.cu is None:
die.cu = cu = CompileUnitV1(self, die)
cu.cu_offset = offset
yield die.cu
offset = die.attributes['DW_AT_sibling'].value
else:
break
# Does not cache
def DIE_at_offset(self, offset, cu):
self.stm.seek(offset, 0)
return DIEV1(self.stm, cu, self)
def location_lists(self):
return None
def line_program_for_CU(self, cu):
top_DIE = cu.get_top_DIE()
if 'DW_AT_stmt_list' in top_DIE.attributes:
stm = self.linestream
stm.seek(top_DIE.attributes['DW_AT_stmt_list'].value, 0)
structs = self.structs
len = struct_parse(structs.Dwarf_uint32(''), stm)
pc = struct_parse(structs.Dwarf_target_addr(''), stm)
return LineTableV1(stm, structs, len, pc)
else:
return None
def parse_dwarf1(elffile):
return DWARFInfoV1(elffile)

221
bear/formats.py Normal file
View File

@ -0,0 +1,221 @@
import io
from os import path, listdir
from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
# This doesn't depend on Qt
# The dependency on filebytes only lives here
# Format codes: 0 = ELF, 1 = MACHO, 2 = PE
def read_pe(filename):
from filebytes.pe import PE, IMAGE_FILE_MACHINE
pefile = PE(filename)
# Section's real size might be padded - see https://github.com/sashs/filebytes/issues/28
sections = [(section.name, section,
section.header.PhysicalAddress_or_VirtualSize,
section.header.SizeOfRawData)
for section in pefile.sections
if section.name.startswith('.debug')]
data = {name: DebugSectionDescriptor(io.BytesIO(section.bytes), name, None,
raw_size if virtual_size == 0 else min((raw_size, virtual_size)), 0)
for (name, section, virtual_size, raw_size) in sections}
if not '.debug_info' in data:
return None
machine = pefile.imageNtHeaders.header.FileHeader.Machine
is64 = machine in (IMAGE_FILE_MACHINE.AMD64, IMAGE_FILE_MACHINE.ARM64, IMAGE_FILE_MACHINE.IA64) # There are also some exotic architectures...
di = DWARFInfo(
config = DwarfConfig(
little_endian = True,
default_address_size = 8 if is64 else 4,
machine_arch = IMAGE_FILE_MACHINE[machine].name
),
debug_info_sec = data['.debug_info'],
debug_aranges_sec = data.get('.debug_aranges'),
debug_abbrev_sec = data.get('.debug_abbrev'),
debug_frame_sec = data.get('.debug_frame'),
eh_frame_sec = None, # Haven't seen one in the wild so far
debug_str_sec = data.get('.debug_str'),
debug_loc_sec = data.get('.debug_loc'),
debug_ranges_sec = data.get('.debug_ranges'),
debug_line_sec = data.get('.debug_line'),
debug_pubtypes_sec = data.get('.debug_pubtypes'),
debug_pubnames_sec = data.get('.debug_pubnames'),
debug_addr_sec = data.get('.debug_addr'),
debug_str_offsets_sec = data.get('.debug_str_offsets'),
debug_line_str_sec = data.get('.debug_line_str'),
debug_loclists_sec = data.get('.debug_loclists'),
debug_rnglists_sec = data.get('.debug_rnglists'),
debug_sup_sec = data.get('.debug_sup'),
gnu_debugaltlink_sec = data.get('.gnu_debugaltlink')
)
di._format = 2
di._start_address = pefile.imageNtHeaders.header.OptionalHeader.ImageBase
return di
# Arch + flavor where flavor matters
def make_macho_arch_name(macho):
from filebytes.mach_o import CpuType, CpuSubTypeARM, CpuSubTypeARM64
h = macho.machHeader.header
c = h.cputype
st = h.cpusubtype
flavor = ''
if st != 0:
if c == CpuType.ARM:
flavor = CpuSubTypeARM[st].name
elif c == CpuType.ARM64:
flavor = CpuSubTypeARM64[st].name
return CpuType[c].name + flavor
# For debugging purposes only - dump individual debug related sections in a Mach-O file/slice as files
def macho_save_sections(filename, macho):
from filebytes.mach_o import LC
arch = make_macho_arch_name(macho)
for cmd in macho.loadCommands:
if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64):
for section in cmd.sections:
if section.name.startswith('__debug'):
sec_file = ".".join((filename, arch, section.name))
if not path.exists(sec_file):
with open(sec_file, 'wb') as f:
f.write(section.bytes)
# resolve_arch takes a list of architecture descriptions, and returns
# the desired index, or None if the user has cancelled
def read_macho(filename, resolve_arch, friendly_filename):
from filebytes.mach_o import MachO, CpuType, TypeFlags, LC
fat_arch = None
macho = MachO(filename)
if macho.isFat:
slices = [make_macho_arch_name(slice) for slice in macho.fatArches]
arch_no = resolve_arch(slices)
if arch_no is None: # User cancellation
return False
fat_arch = slices[arch_no]
macho = macho.fatArches[arch_no]
# We proceed with macho being a arch-specific file, or a slice within a fat binary
data = {
section.name: DebugSectionDescriptor(io.BytesIO(section.bytes), section.name, None, len(section.bytes), 0)
for cmd in macho.loadCommands
if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64)
for section in cmd.sections
if section.name.startswith('__debug')
}
#macho_save_sections(friendly_filename, macho)
if not '__debug_info' in data:
return None
cpu = macho.machHeader.header.cputype
di = DWARFInfo(
config = DwarfConfig(
little_endian=True,
default_address_size = 8 if (cpu & TypeFlags.ABI64) != 0 else 4,
machine_arch = make_macho_arch_name(macho)
),
debug_info_sec = data['__debug_info'],
debug_aranges_sec = data.get('__debug_aranges'),
debug_abbrev_sec = data['__debug_abbrev'],
debug_frame_sec = data.get('__debug_frame'),
eh_frame_sec = None, # Haven't seen those in Mach-O
debug_str_sec = data['__debug_str'],
debug_loc_sec = data.get('__debug_loc'),
debug_ranges_sec = data.get('__debug_ranges'),
debug_line_sec = data.get('__debug_line'),
debug_pubtypes_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubn?
debug_pubnames_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubt?
debug_addr_sec = data.get('__debug_addr'),
debug_str_offsets_sec = data.get('__debug_str_offsets'),
debug_line_str_sec = data.get('__debug_line_str_name'),
debug_loclists_sec = data.get('__debug_loclists_sec_name'),
debug_rnglists_sec = data.get('__debug_rnglists_sec_name'),
debug_sup_sec = data.get('__debug_sup_name'),
gnu_debugaltlink_sec = data.get('__gnu_debugaltlink_name')
)
di._format = 1
di._fat_arch = fat_arch
text_cmd = next((cmd for cmd in macho.loadCommands if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64) and cmd.name == "__TEXT"), False)
di._start_address = text_cmd.header.vmaddr if text_cmd else 0
return di
# UI agnostic - resolve_arch might be interactive
# Returns slightly augmented DWARFInfo
# Or None if not a DWARF containing file (or unrecognized)
# Or False if user has cancelled
# Or throws an exception
# resolve_arch is for Mach-O fat binaries - see read_macho()
def read_dwarf(filename, resolve_arch):
if path.isfile(filename): # On MacOS, opening dSYM bundles as is would be right
file = None
try: # For ELF, the file is to remain open
file = open(filename, 'rb')
signature = file.read(4)
if signature[0:2] == b'MZ': # DOS header - this might be a PE. Don't verify the PE header, just feed it to the parser
return read_pe(filename)
elif signature == b'\x7FELF': #It's an ELF
from elftools.elf.elffile import ELFFile
file.seek(0)
elffile = ELFFile(file)
file = None # Keep the file open
# Retrieve the preferred loading address
load_segment = next((seg for seg in elffile.iter_segments() if seg.header.p_type == 'PT_LOAD'), None)
start_address = load_segment.header.p_vaddr if load_segment else 0
di = None
if elffile.has_dwarf_info():
di = elffile.get_dwarf_info()
elif elffile.get_section_by_name(".debug"):
from .dwarfone import parse_dwarf1
di = parse_dwarf1(elffile)
if di:
di._format = 0
di._start_address = start_address
return di
elif signature in (b'\xCA\xFE\xBA\xBE', b'\xFE\xED\xFA\xCE', b'\xFE\xED\xFA\xCF', b'\xCE\xFA\xED\xFE', b'\xCF\xFA\xED\xFE'):
if signature == b'\xCA\xFE\xBA\xBE' and int.from_bytes(file.read(4), 'big') >= 0x20:
# Java .class files also have CAFEBABE, check the fat binary arch count
return None
# Mach-O fat binary, or 32/64-bit Mach-O in big/little-endian format
return read_macho(filename, resolve_arch, filename)
finally:
if file:
file.close()
elif path.isdir(filename):
# Is it a dSYM bundle?
nameparts = path.basename(filename).split('.')
if nameparts[-1] == 'dSYM' and path.exists(path.join(filename, 'Contents', 'Resources', 'DWARF')):
files = listdir(path.join(filename, 'Contents', 'Resources', 'DWARF'))
if len(files) > 0:
# When are there multiple DWARF files in a dSYM bundle?
# TODO: let the user choose?
dsym_file_path = path.join(filename, 'Contents', 'Resources', 'DWARF', files[0])
return read_macho(dsym_file_path, resolve_arch, filename)
# Is it an app bundle? appname.app
if len(nameparts) > 1 and nameparts[-1] in ('app', 'framework'):
app_file = path.join(filename, '.'.join(nameparts[0:-1]))
if path.exists(app_file):
return read_macho(app_file, resolve_arch, filename)
# Any other bundle formats we should be aware of?
return None
def get_debug_sections(di):
section_names = {name: "debug_%s_sec" % name
for name in
('info', 'aranges', 'abbrev', 'frame',
'str', 'loc', 'ranges', 'line', 'addr',
'str_offsets', 'line_str', 'pubtypes',
'pubnames', 'loclists', 'rnglists', 'sup')}
section_names['eh_frame'] = 'eh_frame_sec'
section_names['gnu_debugaltlink'] = 'eh_frame_sec'
# Display name to section object
return {display_name: di.__dict__[field_name]
for (display_name, field_name) in section_names.items()
if di.__dict__[field_name]}

27
bear/patch.py Normal file
View File

@ -0,0 +1,27 @@
import elftools.dwarf.structs
from elftools.construct.macros import Array
import elftools.dwarf.locationlists
from elftools.common.exceptions import DWARFError
import elftools.dwarf.enums
# Fixes to pyelftools that are not in the released version yet
# Not sure about form_indirect, no binaries.
def monkeypatch():
# Not sure about DW_FORM_indirect - need a test binary
# This patches DW_FORM_data16
def _create_dw_form_ex(self):
self._create_dw_form_base()
self.Dwarf_dw_form['DW_FORM_data16'] = Array(16, self.Dwarf_uint8(''))
elftools.dwarf.structs.DWARFStructs._create_dw_form_base = elftools.dwarf.structs.DWARFStructs._create_dw_form
elftools.dwarf.structs.DWARFStructs._create_dw_form = _create_dw_form_ex
def get_location_list_at_offset_ex(self, offset, die=None):
if die is None:
raise DWARFError("For this binary, \"die\" needs to be provided")
section = self._loclists if die.cu.header.version >= 5 else self._loc
return section.get_location_list_at_offset(offset, die)
elftools.dwarf.locationlists.LocationListsPair.get_location_list_at_offset = get_location_list_at_offset_ex
elftools.dwarf.enums.ENUM_DW_AT["DW_AT_GNU_dwo_name"] = 0x2130
elftools.dwarf.enums.ENUM_DW_AT["DW_AT_GNU_ranges_base"] = 0x2132
elftools.dwarf.enums.ENUM_DW_AT["DW_AT_GNU_addr_base"] = 0x2133

File diff suppressed because it is too large Load Diff

377
main.py Normal file
View File

@ -0,0 +1,377 @@
#!/bin/python
import sys
from bear import formats
from elftools.dwarf.locationlists import LocationParser, LocationExpr
from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp, DW_OP_opcode2name
from bear.dwarfone import DWARFExprParserV1
from pprint import pprint
configuration = {
"include_file_name": False,
}
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
class DWARFParseError(Exception):
""" "Opened, could not parse" """
def __init__(self, exc, di):
Exception.__init__(self, "DWARF parsing error: " + format(exc))
self.dwarfinfo = di
# Some additional data for every DIE
def decorate_die(die, i):
die._i = i
die._children = None
return die
def load_children(parent_die):
# Load and cache child DIEs in the parent DIE, if necessary
# Assumes the check if the DIE has children has been already performed
if not hasattr(parent_die, "_children") or parent_die._children is None:
# TODO: wait cursor here. It may cause disk I/O
try:
parent_die._children = [decorate_die(die, i) for (i, die) in enumerate(parent_die.iter_children())]
except KeyError as ke:
# Catching #1516
# QMessageBox(QMessageBox.Icon.Warning, "DWARF Explorer",
# "This executable file is corrupt or incompatible with the current version of DWARF Explorer. Please consider creating a new issue at https://github.com/sevaa/dwex/, and share this file with the tech support.",
# QMessageBox.StandardButton.Ok, QApplication.instance().win).show()
print("This executable file is corrupt or incompatible with the current version of Bear.")
parent_die._children = []
def safe_DIE_name(die, default = ''):
return die.attributes['DW_AT_name'].value.decode('utf-8', errors='ignore') if 'DW_AT_name' in die.attributes else default
class Bear():
def __init__(self, filename):
di = formats.read_dwarf(filename, self.resolve_arch)
if not di: # Covers both False and None
print("Something went wrong")
exit(1)
# Some degree of graceful handling of wrong format
try:
# Some cached top level stuff
# Notably, iter_CUs doesn't cache
di._ranges = None # Loaded on first use
def decorate_cu(cu, i):
cu._i = i
cu._lineprogram = None
cu._exprparser = None
return cu
di._unsorted_CUs = [decorate_cu(cu, i) for (i, cu) in enumerate(di.iter_CUs())] # We'll need them first thing, might as well load here
if not len(di._unsorted_CUs):
return None # Weird, but saw it once - debug sections present, but no CUs
# For quick CU search by offset within the info section, regardless of sorting
di._CU_offsets = [cu.cu_offset for cu in di._unsorted_CUs]
di._CUs = list(di._unsorted_CUs)
di._locparser = None # Created on first use
self.dwarfinfo = di
self.filename = filename
except AssertionError as ass: # Covers exeptions during parsing
raise DWARFParseError(ass, di)
# A list containing variables in a disctionary
# Description of used fields:
# name: variable name
# type: test description of the type
# size: size of the variable
# address: absolute address of the variable
# children: a dictionary of child elements
self.myVariables = []
self.top_dies = [decorate_die(CU.get_top_DIE(), i) for (i, CU) in enumerate(di._CUs)]
for top_die in self.top_dies:
# top dies only contain Compile Units
# Preload children
load_children(top_die)
children_dies = []
for child_die in top_die._children:
if child_die.tag == 'DW_TAG_variable':
# pprint(child_die)
entry = {
# Name should be on every element, if not set something so it can be printed
'name': safe_DIE_name(child_die, '?')
}
if 'DW_AT_location' in child_die.attributes:
if LocationParser.attribute_has_location(child_die.attributes['DW_AT_location'], child_die.cu['version']):
ll = self.parse_location(child_die, child_die.attributes['DW_AT_location'])
# pprint(ll.loc_expr)
# pprint(self.dump_expr(child_die, ll.loc_expr))
lloc = self.dump_expr(child_die, ll.loc_expr)
# print(hex(lloc[0].args[0]))
entry['address'] = hex(lloc[0].args[0])
# if isinstance(ll, LocationExpr):
# return '; '.join(self.dump_expr(child_die, ll.loc_expr))
# else:
# return "Loc list: 0x%x" % child_die.attributes['DW_AT_location'].value
if 'DW_AT_type' in child_die.attributes:
typ_die = child_die.get_DIE_from_attribute('DW_AT_type')
entry['type'] = self.resolve_type(typ_die)
children_dies.append(entry)
self.myVariables.append({
'name': safe_DIE_name(top_die, '?'),
'children': children_dies
})
pprint(self.myVariables)
def resolve_type(self, die_type):
if die_type.tag == 'DW_TAG_volatile_type':
die_type = die_type.get_DIE_from_attribute('DW_AT_type')
entry = {
# Name should be on every element, if not set something so it can be printed
'name': safe_DIE_name(die_type, '?')
}
if 'DW_AT_data_member_location' in die_type.attributes:
entry['offset'] = die_type.attributes['DW_AT_data_member_location'].value * 8
if 'DW_AT_type' in die_type.attributes and die_type.tag not in ['DW_TAG_base_type', 'DW_TAG_structure_type', 'DW_TAG_array_type']:
# Check if the type is a redefinition of a base type
die_type_test = die_type
while 'DW_AT_type' in die_type_test.attributes:
die_type_test = die_type_test.get_DIE_from_attribute('DW_AT_type')
if die_type_test.tag in ['DW_TAG_base_type', 'DW_TAG_structure_type', 'DW_TAG_array_type', 'DW_TAG_union_type']:
die_type = die_type_test
break
if die_type.tag == 'DW_TAG_base_type':
entry['type'] = safe_DIE_name(die_type, '?')
elif die_type.tag == "DW_TAG_structure_type":
load_children(die_type)
child_dies = []
for child_die in die_type._children:
child_entry = self.resolve_type(child_die)
child_dies.append(child_entry)
entry['children'] = child_dies;
elif die_type.tag == "DW_TAG_array_type":
array_type = self.resolve_type(die_type.get_DIE_from_attribute('DW_AT_type'))
load_children(die_type)
children_num = die_type._children[0].attributes['DW_AT_upper_bound'].value
child_entries = []
for child in range(0, children_num + 1):
child_entry = array_type.copy()
child_entry['offset'] = array_type['size_bit'] * child
child_entries.append(child_entry)
entry['children'] = child_entries
elif die_type.tag == 'DW_TAG_union_type':
load_children(die_type)
child_entries = []
for child_die in die_type._children:
child_entry = self.resolve_type(child_die)
child_entries.append(child_entry)
entry['children'] = child_entries
else:
eprint("Unsupported type:", die_type.tag)
if 'DW_AT_byte_size' in die_type.attributes:
entry['size_bit'] = die_type.attributes['DW_AT_byte_size'].value * 8
return entry
def flatten_type(self, parent=None):
# Structure of resulting list of dictionaries
# address - The address
# name - The long name of a variable after out rolling the type
vars = []
# Iterate over CUs
# - name - filename
# - children - variables
for CU in self.myVariables:
vars.append(CU['name'])
for child in CU['children']:
if configuration["include_file_name"]:
vars.append(self.flatten_child(child, CU['name']))
else:
vars.append(self.flatten_child(child))
return vars
def flatten_child(self, child, name='', address=0):
var = {}
kids = []
if name:
var['name'] = '{parent}.{child}'.format(parent=name, child=child['name'])
else:
var['name'] = child['name']
if address:
var['address'] = address
else:
var['address'] = child['address']
if 'children' in child:
for kid in child['children']:
self.flatten_child(kid, var['name'], var['address'])
if 'type' in child:
self.flatten_child()
return kids
def pretty_print(self):
vars = []
# Iterate over CUs
# - name - filename
# - children - variables
for CU in self.myVariables:
vars.append(CU['name'])
for child in CU['children']:
self.pretty_child(child)
return vars
def pretty_child(self, child, prefix='', address=0):
name = ''
if 'children' in child:
pass
else:
pass
def print_top_DIE(self, die):
if die.tag == 'DW_TAG_variable':
name = safe_DIE_name(die)
if name:
typ_name = ''
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
print(self.describe_type(typ))
# typ_name = safe_DIE_name(typ)
# if not typ_name:
# print (typ)
print('{name} {typ_name}'.format(name=name, typ_name=typ_name))
def print_DIE(self, die, prefix=''):
name = ''
# print(die)
if die.tag == 'DW_TAG_variable':
name = safe_DIE_name(die)
if name and 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
if 'DW_AT_location' in die.attributes:
ll = self.parse_location(die, die.attributes['DW_AT_location'])
# if isinstance(ll, LocationExpr):
# print(self.dump_expr(die, ll.loc_expr))
self.print_DIE(typ, name)
return
# print(typ)
elif die.tag == 'DW_TAG_compile_unit':
name = safe_DIE_name(die, '.')
elif prefix and die.tag == 'DW_TAG_base_type':
name = safe_DIE_name(die)
elif prefix and die.tag == 'DW_TAG_const_type':
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
self.print_DIE(typ, prefix)
return
elif prefix and die.tag == 'DW_TAG_array_type':
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
name = prefix + '[]'
self.print_DIE(typ, name)
load_children(die)
if die._children:
for child in die._children:
print(child)
self.print_DIE(child, name)
return
elif prefix and die.tag == 'DW_TAG_volatile_type':
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
self.print_DIE(typ, prefix)
return
elif prefix and die.tag == 'DW_TAG_typedef':
print(die.attributes["DW_AT_name"].value)
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
name = prefix + '[]'
self.print_DIE(typ, name)
load_children(die)
if die._children:
for child in die._children:
print(child)
self.print_DIE(child, name)
elif prefix and die.tag == 'DW_TAG_enumeration_type':
# print(die.attributes["DW_AT_name"].value)
print ("mylittlepony")
print(die)
if 'DW_AT_type' in die.attributes:
typ = die.get_DIE_from_attribute('DW_AT_type')
name = prefix
self.print_DIE(typ, name)
load_children(die)
print (typ)
elif prefix:
print (prefix)
print(die)
if name:
if prefix:
print (prefix, name)
else:
print (name)
def parse_location(self, die, attr):
di = die.dwarfinfo
if di._locparser is None:
di._locparser = LocationParser(di.location_lists())
return di._locparser.parse_from_attribute(attr, die.cu['version'], die = die)
# Expr is an expression blob
# Returns a list of strings for ops
# Format: op arg, arg...
def dump_expr(self, die, expr):
if die.cu._exprparser is None:
die.cu._exprparser = DWARFExprParser(die.cu.structs) if die.cu['version'] > 1 else DWARFExprParserV1(die.cu.structs)
# Challenge: for nested expressions, args is a list with a list of commands
# For those, the format is: op {op arg, arg; op arg, arg}
# Can't just check for iterable, str is iterable too
return die.cu._exprparser.parse_expr(expr)
def resolve_arch(self, arches):
print("resolve_arch: Unsupported feature")
return None
def main():
from bear.patch import monkeypatch
monkeypatch()
bear = Bear("/home/juraj/projects/Playground_C/build/playground_c")
vars = bear.flatten_type()
pprint(vars)
#bear = Bear("main.elf")
# bear = Bear("LED_Cube.elf")
# bear = Bear("serialplay")
pass
if __name__ == "__main__":
main()