WIP: Add a lot of code
Signed-off-by: Juraj Oravec <jurajoravec@mailo.com>
This commit is contained in:
parent
7449c692c9
commit
3c6b845948
289
bear/dwarfone.py
Normal file
289
bear/dwarfone.py
Normal file
@ -0,0 +1,289 @@
|
||||
# Support for DWARF v1.1 in a way that will be more or less compatible with pyelftools
|
||||
|
||||
from io import BytesIO
|
||||
from collections import OrderedDict, namedtuple
|
||||
from bisect import bisect_left
|
||||
from elftools.dwarf.dwarfinfo import DwarfConfig, DebugSectionDescriptor
|
||||
from elftools.dwarf.die import AttributeValue
|
||||
from elftools.dwarf.structs import DWARFStructs
|
||||
from elftools.common.utils import struct_parse, bytelist2string
|
||||
from elftools.dwarf.enums import ENUM_DW_TAG, ENUM_DW_AT, ENUM_DW_FORM
|
||||
from elftools.construct import CString
|
||||
from elftools.dwarf.lineprogram import LineProgramEntry, LineState
|
||||
from elftools.dwarf.dwarf_expr import DWARFExprOp
|
||||
|
||||
LineTableHeader = namedtuple('LineTableHeader', 'version file_entry')
|
||||
CUv1Header = namedtuple('CUv1Header', 'version unit_length debug_abbrev_offset address_size')
|
||||
|
||||
TAG_reverse = dict((v, k) for k, v in ENUM_DW_TAG.items())
|
||||
ATTR_reverse = dict((v, k) for k, v in ENUM_DW_AT.items())
|
||||
FORM_reverse = dict((v, k) for k, v in ENUM_DW_FORM.items())
|
||||
|
||||
DW_OP_name2opcode = dict(
|
||||
DW_OP_reg = 0x01,
|
||||
DW_OP_basereg = 0x02,
|
||||
DW_OP_addr = 0x03,
|
||||
DW_OP_const = 0x04,
|
||||
DW_OP_deref2 = 0x05,
|
||||
DW_OP_deref = 0x06,
|
||||
DW_OP_deref4 = 0x06,
|
||||
DW_OP_add = 0x07,
|
||||
DW_OP_user_0x80 = 0x80 #Extension op, not sure what's the deal with that
|
||||
)
|
||||
|
||||
DW_OP_opcode2name = dict((v, k) for k, v in DW_OP_name2opcode.items())
|
||||
|
||||
class DIEV1(object):
|
||||
def __init__(self, stm, cu, di):
|
||||
self.cu = cu
|
||||
self.dwarfinfo = di
|
||||
self.stream = stm
|
||||
self.offset = stm.tell()
|
||||
self.attributes = OrderedDict()
|
||||
self.tag = None
|
||||
self.has_children = None
|
||||
self.abbrev_code = None
|
||||
self.size = 0
|
||||
# Null DIE terminator. It can be used to obtain offset range occupied
|
||||
# by this DIE including its whole subtree.
|
||||
self._terminator = None
|
||||
self._parent = None
|
||||
|
||||
structs = self.dwarfinfo.structs
|
||||
self.size = struct_parse(structs.Dwarf_uint32(''), stm)
|
||||
if self.size < 8:
|
||||
self.tag = 'DW_TAG_padding'
|
||||
self.has_children = False
|
||||
else:
|
||||
tag_code = struct_parse(structs.Dwarf_uint16(''), stm)
|
||||
if tag_code not in TAG_reverse:
|
||||
raise ValueError("%d not a known tag" % (tag_code))
|
||||
self.tag = TAG_reverse[tag_code]
|
||||
if self.tag == 'DW_TAG_null': # TAG_padding in DWARF1 spec
|
||||
# No attributes, just advance the stream
|
||||
stm.seek(self.size-6, 1)
|
||||
self.has_children = False
|
||||
else:
|
||||
while stm.tell() < self.offset + self.size:
|
||||
attr_offset = self.stream.tell()
|
||||
attr = struct_parse(structs.Dwarf_uint16(''), stm)
|
||||
form = FORM_reverse[attr & 0xf]
|
||||
attr >>= 4
|
||||
if attr in ATTR_reverse:
|
||||
name = ATTR_reverse[attr]
|
||||
elif 0x200 <= attr <= 0x3ff: #DW_AT_MIPS represented as 0x204???
|
||||
name = 'DW_AT_user_0x%x' % attr
|
||||
else:
|
||||
raise ValueError("%d not a known attribute" % (attr))
|
||||
|
||||
raw_value = struct_parse(structs.Dwarf_dw_form[form], stm)
|
||||
value = raw_value
|
||||
|
||||
self.attributes[name] = AttributeValue(
|
||||
name=name,
|
||||
form=form,
|
||||
value=value,
|
||||
raw_value=raw_value,
|
||||
offset=attr_offset)
|
||||
self.has_children = self.attributes['DW_AT_sibling'].value >= self.offset + self.size + 8
|
||||
|
||||
def get_parent(self):
|
||||
return self._parent
|
||||
|
||||
def is_null(self):
|
||||
return self.tag == 'DW_TAG_padding'
|
||||
|
||||
def iter_children(self):
|
||||
return self.cu.iter_children(self)
|
||||
|
||||
def sibling(self):
|
||||
return self.attributes['DW_AT_sibling'].value
|
||||
|
||||
class CompileUnitV1(object):
|
||||
def __init__(self, di, top_die):
|
||||
self.dwarfinfo = di
|
||||
self.structs = di.structs
|
||||
self.header = CUv1Header(version = 1, unit_length = None, debug_abbrev_offset = None, address_size = 4)
|
||||
self._dielist = [top_die]
|
||||
self._diemap = [top_die.offset]
|
||||
|
||||
def get_top_DIE(self):
|
||||
return self._dielist[0]
|
||||
|
||||
def __getitem__(self, name):
|
||||
return self.header._asdict()[name]
|
||||
|
||||
# Caches
|
||||
def DIE_at_offset(self, offset):
|
||||
i = bisect_left(self._diemap, offset)
|
||||
if i < len(self._diemap) and offset == self._diemap[i]:
|
||||
die = self._dielist[i]
|
||||
else:
|
||||
die = self.dwarfinfo.DIE_at_offset(offset, self)
|
||||
self._dielist.insert(i, die)
|
||||
self._diemap.insert(i, offset)
|
||||
return die
|
||||
|
||||
# pyelftools' iter_DIEs sets parent on discovered DIEs, we should too
|
||||
def iter_DIEs(self):
|
||||
offset = self.cu_offset
|
||||
parent = None
|
||||
parent_stack = list()
|
||||
end_offset = self.get_top_DIE().attributes['DW_AT_sibling'].value
|
||||
while offset < end_offset:
|
||||
die = self.DIE_at_offset(offset)
|
||||
|
||||
if die._parent is None:
|
||||
die._parent = parent
|
||||
|
||||
if not die.is_null():
|
||||
yield die
|
||||
offset += die.size
|
||||
if offset != die.sibling(): # Start of a subtree
|
||||
parent_stack.append(parent)
|
||||
parent = die
|
||||
else: # padding - end of a sibling chain
|
||||
parent = parent_stack.pop()
|
||||
offset += die.size
|
||||
|
||||
def iter_children(self, parent_die):
|
||||
offset = parent_die.offset + parent_die.size
|
||||
while offset < self.dwarfinfo.section_size:
|
||||
die = self.DIE_at_offset(offset)
|
||||
|
||||
if die._parent is None:
|
||||
die._parent = parent_die
|
||||
if not die.is_null():
|
||||
yield die
|
||||
# Troubleshooting #1497
|
||||
tag = die.tag
|
||||
attr = die.attributes
|
||||
off = die.offset
|
||||
size = die.size
|
||||
has_children = die.has_children
|
||||
offset = die.sibling()
|
||||
else:
|
||||
break
|
||||
|
||||
class LineTableV1(object):
|
||||
def __init__(self, stm, structs, len, pc):
|
||||
self.stm = stm
|
||||
self.structs = structs
|
||||
self.len = len
|
||||
self.pc = pc
|
||||
self._decoded_entries = None
|
||||
self.header = LineTableHeader(1, (None))
|
||||
|
||||
def get_entries(self):
|
||||
if self._decoded_entries is None:
|
||||
stm = self.stm
|
||||
offset = stm.tell()
|
||||
end_offset = offset + self.len
|
||||
structs = self.structs
|
||||
entries = []
|
||||
pc = self.pc
|
||||
while offset < end_offset:
|
||||
line = struct_parse(structs.Dwarf_uint32(''), stm)
|
||||
col = struct_parse(structs.Dwarf_uint16(''), stm)
|
||||
pc_delta = struct_parse(structs.Dwarf_uint32(''), stm)
|
||||
if line == 0:
|
||||
break
|
||||
state = LineState(True)
|
||||
state.file = 0
|
||||
state.line = line
|
||||
state.column = col if col != 0xffff else None
|
||||
state.address = pc
|
||||
entries.append(LineProgramEntry(0, False, [], state))
|
||||
pc += pc_delta
|
||||
self._decoded_entries = entries
|
||||
return self._decoded_entries
|
||||
|
||||
class DWARFExprParserV1(object):
|
||||
def __init__(self, structs):
|
||||
self.structs = structs
|
||||
|
||||
def parse_expr(self, expr):
|
||||
stm = BytesIO(bytelist2string(expr))
|
||||
parsed = []
|
||||
|
||||
while True:
|
||||
# Get the next opcode from the stream. If nothing is left in the
|
||||
# stream, we're done.
|
||||
byte = stm.read(1)
|
||||
if len(byte) == 0:
|
||||
break
|
||||
|
||||
# Decode the opcode and its name.
|
||||
op = ord(byte)
|
||||
op_name = DW_OP_opcode2name.get(op, 'OP:0x%x' % op)
|
||||
|
||||
if op <= 4 or op == 0x80:
|
||||
args = [struct_parse(self.structs.Dwarf_target_addr(''), stm),]
|
||||
else:
|
||||
args = []
|
||||
|
||||
parsed.append(DWARFExprOp(op=op, op_name=op_name, args=args, offset=stm.tell()))
|
||||
|
||||
return parsed
|
||||
|
||||
class DWARFInfoV1(object):
|
||||
def __init__(self, elffile):
|
||||
section = elffile.get_section_by_name(".debug")
|
||||
section_data = section.data()
|
||||
self.section_size = len(section_data)
|
||||
self.stm = BytesIO()
|
||||
self.stm.write(section_data)
|
||||
self.stm.seek(0, 0)
|
||||
|
||||
lsection = elffile.get_section_by_name(".line")
|
||||
if lsection:
|
||||
self.linestream = BytesIO()
|
||||
self.linestream.write(lsection.data())
|
||||
self.linestream.seek(0, 0)
|
||||
|
||||
self.config = DwarfConfig(
|
||||
little_endian = elffile.little_endian,
|
||||
default_address_size = elffile.elfclass // 8,
|
||||
machine_arch = elffile.get_machine_arch()
|
||||
)
|
||||
|
||||
self.structs = DWARFStructs(
|
||||
little_endian = self.config.little_endian,
|
||||
dwarf_format = 32,
|
||||
address_size = self.config.default_address_size)
|
||||
|
||||
def iter_CUs(self):
|
||||
offset = 0
|
||||
while offset < self.section_size:
|
||||
die = self.DIE_at_offset(offset, None)
|
||||
if die.tag != 'DW_TAG_padding':
|
||||
if die.cu is None:
|
||||
die.cu = cu = CompileUnitV1(self, die)
|
||||
cu.cu_offset = offset
|
||||
yield die.cu
|
||||
offset = die.attributes['DW_AT_sibling'].value
|
||||
else:
|
||||
break
|
||||
|
||||
# Does not cache
|
||||
def DIE_at_offset(self, offset, cu):
|
||||
self.stm.seek(offset, 0)
|
||||
return DIEV1(self.stm, cu, self)
|
||||
|
||||
def location_lists(self):
|
||||
return None
|
||||
|
||||
def line_program_for_CU(self, cu):
|
||||
top_DIE = cu.get_top_DIE()
|
||||
if 'DW_AT_stmt_list' in top_DIE.attributes:
|
||||
stm = self.linestream
|
||||
stm.seek(top_DIE.attributes['DW_AT_stmt_list'].value, 0)
|
||||
structs = self.structs
|
||||
len = struct_parse(structs.Dwarf_uint32(''), stm)
|
||||
pc = struct_parse(structs.Dwarf_target_addr(''), stm)
|
||||
return LineTableV1(stm, structs, len, pc)
|
||||
else:
|
||||
return None
|
||||
|
||||
def parse_dwarf1(elffile):
|
||||
return DWARFInfoV1(elffile)
|
221
bear/formats.py
Normal file
221
bear/formats.py
Normal file
@ -0,0 +1,221 @@
|
||||
import io
|
||||
from os import path, listdir
|
||||
from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
|
||||
# This doesn't depend on Qt
|
||||
# The dependency on filebytes only lives here
|
||||
# Format codes: 0 = ELF, 1 = MACHO, 2 = PE
|
||||
|
||||
def read_pe(filename):
|
||||
from filebytes.pe import PE, IMAGE_FILE_MACHINE
|
||||
|
||||
pefile = PE(filename)
|
||||
|
||||
# Section's real size might be padded - see https://github.com/sashs/filebytes/issues/28
|
||||
sections = [(section.name, section,
|
||||
section.header.PhysicalAddress_or_VirtualSize,
|
||||
section.header.SizeOfRawData)
|
||||
for section in pefile.sections
|
||||
if section.name.startswith('.debug')]
|
||||
|
||||
data = {name: DebugSectionDescriptor(io.BytesIO(section.bytes), name, None,
|
||||
raw_size if virtual_size == 0 else min((raw_size, virtual_size)), 0)
|
||||
for (name, section, virtual_size, raw_size) in sections}
|
||||
|
||||
if not '.debug_info' in data:
|
||||
return None
|
||||
|
||||
machine = pefile.imageNtHeaders.header.FileHeader.Machine
|
||||
is64 = machine in (IMAGE_FILE_MACHINE.AMD64, IMAGE_FILE_MACHINE.ARM64, IMAGE_FILE_MACHINE.IA64) # There are also some exotic architectures...
|
||||
di = DWARFInfo(
|
||||
config = DwarfConfig(
|
||||
little_endian = True,
|
||||
default_address_size = 8 if is64 else 4,
|
||||
machine_arch = IMAGE_FILE_MACHINE[machine].name
|
||||
),
|
||||
debug_info_sec = data['.debug_info'],
|
||||
debug_aranges_sec = data.get('.debug_aranges'),
|
||||
debug_abbrev_sec = data.get('.debug_abbrev'),
|
||||
debug_frame_sec = data.get('.debug_frame'),
|
||||
eh_frame_sec = None, # Haven't seen one in the wild so far
|
||||
debug_str_sec = data.get('.debug_str'),
|
||||
debug_loc_sec = data.get('.debug_loc'),
|
||||
debug_ranges_sec = data.get('.debug_ranges'),
|
||||
debug_line_sec = data.get('.debug_line'),
|
||||
debug_pubtypes_sec = data.get('.debug_pubtypes'),
|
||||
debug_pubnames_sec = data.get('.debug_pubnames'),
|
||||
debug_addr_sec = data.get('.debug_addr'),
|
||||
debug_str_offsets_sec = data.get('.debug_str_offsets'),
|
||||
debug_line_str_sec = data.get('.debug_line_str'),
|
||||
debug_loclists_sec = data.get('.debug_loclists'),
|
||||
debug_rnglists_sec = data.get('.debug_rnglists'),
|
||||
debug_sup_sec = data.get('.debug_sup'),
|
||||
gnu_debugaltlink_sec = data.get('.gnu_debugaltlink')
|
||||
)
|
||||
di._format = 2
|
||||
di._start_address = pefile.imageNtHeaders.header.OptionalHeader.ImageBase
|
||||
return di
|
||||
|
||||
# Arch + flavor where flavor matters
|
||||
def make_macho_arch_name(macho):
|
||||
from filebytes.mach_o import CpuType, CpuSubTypeARM, CpuSubTypeARM64
|
||||
h = macho.machHeader.header
|
||||
c = h.cputype
|
||||
st = h.cpusubtype
|
||||
flavor = ''
|
||||
if st != 0:
|
||||
if c == CpuType.ARM:
|
||||
flavor = CpuSubTypeARM[st].name
|
||||
elif c == CpuType.ARM64:
|
||||
flavor = CpuSubTypeARM64[st].name
|
||||
return CpuType[c].name + flavor
|
||||
|
||||
# For debugging purposes only - dump individual debug related sections in a Mach-O file/slice as files
|
||||
def macho_save_sections(filename, macho):
|
||||
from filebytes.mach_o import LC
|
||||
arch = make_macho_arch_name(macho)
|
||||
for cmd in macho.loadCommands:
|
||||
if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64):
|
||||
for section in cmd.sections:
|
||||
if section.name.startswith('__debug'):
|
||||
sec_file = ".".join((filename, arch, section.name))
|
||||
if not path.exists(sec_file):
|
||||
with open(sec_file, 'wb') as f:
|
||||
f.write(section.bytes)
|
||||
|
||||
|
||||
# resolve_arch takes a list of architecture descriptions, and returns
|
||||
# the desired index, or None if the user has cancelled
|
||||
def read_macho(filename, resolve_arch, friendly_filename):
|
||||
from filebytes.mach_o import MachO, CpuType, TypeFlags, LC
|
||||
fat_arch = None
|
||||
macho = MachO(filename)
|
||||
if macho.isFat:
|
||||
slices = [make_macho_arch_name(slice) for slice in macho.fatArches]
|
||||
arch_no = resolve_arch(slices)
|
||||
if arch_no is None: # User cancellation
|
||||
return False
|
||||
fat_arch = slices[arch_no]
|
||||
macho = macho.fatArches[arch_no]
|
||||
|
||||
# We proceed with macho being a arch-specific file, or a slice within a fat binary
|
||||
data = {
|
||||
section.name: DebugSectionDescriptor(io.BytesIO(section.bytes), section.name, None, len(section.bytes), 0)
|
||||
for cmd in macho.loadCommands
|
||||
if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64)
|
||||
for section in cmd.sections
|
||||
if section.name.startswith('__debug')
|
||||
}
|
||||
|
||||
#macho_save_sections(friendly_filename, macho)
|
||||
|
||||
if not '__debug_info' in data:
|
||||
return None
|
||||
|
||||
cpu = macho.machHeader.header.cputype
|
||||
di = DWARFInfo(
|
||||
config = DwarfConfig(
|
||||
little_endian=True,
|
||||
default_address_size = 8 if (cpu & TypeFlags.ABI64) != 0 else 4,
|
||||
machine_arch = make_macho_arch_name(macho)
|
||||
),
|
||||
debug_info_sec = data['__debug_info'],
|
||||
debug_aranges_sec = data.get('__debug_aranges'),
|
||||
debug_abbrev_sec = data['__debug_abbrev'],
|
||||
debug_frame_sec = data.get('__debug_frame'),
|
||||
eh_frame_sec = None, # Haven't seen those in Mach-O
|
||||
debug_str_sec = data['__debug_str'],
|
||||
debug_loc_sec = data.get('__debug_loc'),
|
||||
debug_ranges_sec = data.get('__debug_ranges'),
|
||||
debug_line_sec = data.get('__debug_line'),
|
||||
debug_pubtypes_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubn?
|
||||
debug_pubnames_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubt?
|
||||
debug_addr_sec = data.get('__debug_addr'),
|
||||
debug_str_offsets_sec = data.get('__debug_str_offsets'),
|
||||
debug_line_str_sec = data.get('__debug_line_str_name'),
|
||||
debug_loclists_sec = data.get('__debug_loclists_sec_name'),
|
||||
debug_rnglists_sec = data.get('__debug_rnglists_sec_name'),
|
||||
debug_sup_sec = data.get('__debug_sup_name'),
|
||||
gnu_debugaltlink_sec = data.get('__gnu_debugaltlink_name')
|
||||
)
|
||||
di._format = 1
|
||||
di._fat_arch = fat_arch
|
||||
text_cmd = next((cmd for cmd in macho.loadCommands if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64) and cmd.name == "__TEXT"), False)
|
||||
di._start_address = text_cmd.header.vmaddr if text_cmd else 0
|
||||
return di
|
||||
|
||||
# UI agnostic - resolve_arch might be interactive
|
||||
# Returns slightly augmented DWARFInfo
|
||||
# Or None if not a DWARF containing file (or unrecognized)
|
||||
# Or False if user has cancelled
|
||||
# Or throws an exception
|
||||
# resolve_arch is for Mach-O fat binaries - see read_macho()
|
||||
def read_dwarf(filename, resolve_arch):
|
||||
if path.isfile(filename): # On MacOS, opening dSYM bundles as is would be right
|
||||
file = None
|
||||
try: # For ELF, the file is to remain open
|
||||
file = open(filename, 'rb')
|
||||
signature = file.read(4)
|
||||
|
||||
if signature[0:2] == b'MZ': # DOS header - this might be a PE. Don't verify the PE header, just feed it to the parser
|
||||
return read_pe(filename)
|
||||
elif signature == b'\x7FELF': #It's an ELF
|
||||
from elftools.elf.elffile import ELFFile
|
||||
file.seek(0)
|
||||
elffile = ELFFile(file)
|
||||
file = None # Keep the file open
|
||||
# Retrieve the preferred loading address
|
||||
load_segment = next((seg for seg in elffile.iter_segments() if seg.header.p_type == 'PT_LOAD'), None)
|
||||
start_address = load_segment.header.p_vaddr if load_segment else 0
|
||||
di = None
|
||||
if elffile.has_dwarf_info():
|
||||
di = elffile.get_dwarf_info()
|
||||
elif elffile.get_section_by_name(".debug"):
|
||||
from .dwarfone import parse_dwarf1
|
||||
di = parse_dwarf1(elffile)
|
||||
|
||||
if di:
|
||||
di._format = 0
|
||||
di._start_address = start_address
|
||||
return di
|
||||
elif signature in (b'\xCA\xFE\xBA\xBE', b'\xFE\xED\xFA\xCE', b'\xFE\xED\xFA\xCF', b'\xCE\xFA\xED\xFE', b'\xCF\xFA\xED\xFE'):
|
||||
if signature == b'\xCA\xFE\xBA\xBE' and int.from_bytes(file.read(4), 'big') >= 0x20:
|
||||
# Java .class files also have CAFEBABE, check the fat binary arch count
|
||||
return None
|
||||
# Mach-O fat binary, or 32/64-bit Mach-O in big/little-endian format
|
||||
return read_macho(filename, resolve_arch, filename)
|
||||
finally:
|
||||
if file:
|
||||
file.close()
|
||||
elif path.isdir(filename):
|
||||
# Is it a dSYM bundle?
|
||||
nameparts = path.basename(filename).split('.')
|
||||
if nameparts[-1] == 'dSYM' and path.exists(path.join(filename, 'Contents', 'Resources', 'DWARF')):
|
||||
files = listdir(path.join(filename, 'Contents', 'Resources', 'DWARF'))
|
||||
if len(files) > 0:
|
||||
# When are there multiple DWARF files in a dSYM bundle?
|
||||
# TODO: let the user choose?
|
||||
dsym_file_path = path.join(filename, 'Contents', 'Resources', 'DWARF', files[0])
|
||||
return read_macho(dsym_file_path, resolve_arch, filename)
|
||||
# Is it an app bundle? appname.app
|
||||
if len(nameparts) > 1 and nameparts[-1] in ('app', 'framework'):
|
||||
app_file = path.join(filename, '.'.join(nameparts[0:-1]))
|
||||
if path.exists(app_file):
|
||||
return read_macho(app_file, resolve_arch, filename)
|
||||
|
||||
# Any other bundle formats we should be aware of?
|
||||
return None
|
||||
|
||||
def get_debug_sections(di):
|
||||
section_names = {name: "debug_%s_sec" % name
|
||||
for name in
|
||||
('info', 'aranges', 'abbrev', 'frame',
|
||||
'str', 'loc', 'ranges', 'line', 'addr',
|
||||
'str_offsets', 'line_str', 'pubtypes',
|
||||
'pubnames', 'loclists', 'rnglists', 'sup')}
|
||||
section_names['eh_frame'] = 'eh_frame_sec'
|
||||
section_names['gnu_debugaltlink'] = 'eh_frame_sec'
|
||||
|
||||
# Display name to section object
|
||||
return {display_name: di.__dict__[field_name]
|
||||
for (display_name, field_name) in section_names.items()
|
||||
if di.__dict__[field_name]}
|
27
bear/patch.py
Normal file
27
bear/patch.py
Normal file
@ -0,0 +1,27 @@
|
||||
import elftools.dwarf.structs
|
||||
from elftools.construct.macros import Array
|
||||
import elftools.dwarf.locationlists
|
||||
from elftools.common.exceptions import DWARFError
|
||||
import elftools.dwarf.enums
|
||||
|
||||
# Fixes to pyelftools that are not in the released version yet
|
||||
# Not sure about form_indirect, no binaries.
|
||||
def monkeypatch():
|
||||
# Not sure about DW_FORM_indirect - need a test binary
|
||||
# This patches DW_FORM_data16
|
||||
def _create_dw_form_ex(self):
|
||||
self._create_dw_form_base()
|
||||
self.Dwarf_dw_form['DW_FORM_data16'] = Array(16, self.Dwarf_uint8(''))
|
||||
|
||||
elftools.dwarf.structs.DWARFStructs._create_dw_form_base = elftools.dwarf.structs.DWARFStructs._create_dw_form
|
||||
elftools.dwarf.structs.DWARFStructs._create_dw_form = _create_dw_form_ex
|
||||
|
||||
def get_location_list_at_offset_ex(self, offset, die=None):
|
||||
if die is None:
|
||||
raise DWARFError("For this binary, \"die\" needs to be provided")
|
||||
section = self._loclists if die.cu.header.version >= 5 else self._loc
|
||||
return section.get_location_list_at_offset(offset, die)
|
||||
elftools.dwarf.locationlists.LocationListsPair.get_location_list_at_offset = get_location_list_at_offset_ex
|
||||
elftools.dwarf.enums.ENUM_DW_AT["DW_AT_GNU_dwo_name"] = 0x2130
|
||||
elftools.dwarf.enums.ENUM_DW_AT["DW_AT_GNU_ranges_base"] = 0x2132
|
||||
elftools.dwarf.enums.ENUM_DW_AT["DW_AT_GNU_addr_base"] = 0x2133
|
1888
elf_symbols.py
1888
elf_symbols.py
File diff suppressed because it is too large
Load Diff
377
main.py
Normal file
377
main.py
Normal file
@ -0,0 +1,377 @@
|
||||
#!/bin/python
|
||||
|
||||
import sys
|
||||
from bear import formats
|
||||
from elftools.dwarf.locationlists import LocationParser, LocationExpr
|
||||
from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp, DW_OP_opcode2name
|
||||
from bear.dwarfone import DWARFExprParserV1
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
|
||||
configuration = {
|
||||
"include_file_name": False,
|
||||
}
|
||||
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
print(*args, file=sys.stderr, **kwargs)
|
||||
|
||||
|
||||
class DWARFParseError(Exception):
|
||||
""" "Opened, could not parse" """
|
||||
def __init__(self, exc, di):
|
||||
Exception.__init__(self, "DWARF parsing error: " + format(exc))
|
||||
self.dwarfinfo = di
|
||||
|
||||
|
||||
# Some additional data for every DIE
|
||||
def decorate_die(die, i):
|
||||
die._i = i
|
||||
die._children = None
|
||||
return die
|
||||
|
||||
def load_children(parent_die):
|
||||
# Load and cache child DIEs in the parent DIE, if necessary
|
||||
# Assumes the check if the DIE has children has been already performed
|
||||
if not hasattr(parent_die, "_children") or parent_die._children is None:
|
||||
# TODO: wait cursor here. It may cause disk I/O
|
||||
try:
|
||||
parent_die._children = [decorate_die(die, i) for (i, die) in enumerate(parent_die.iter_children())]
|
||||
except KeyError as ke:
|
||||
# Catching #1516
|
||||
# QMessageBox(QMessageBox.Icon.Warning, "DWARF Explorer",
|
||||
# "This executable file is corrupt or incompatible with the current version of DWARF Explorer. Please consider creating a new issue at https://github.com/sevaa/dwex/, and share this file with the tech support.",
|
||||
# QMessageBox.StandardButton.Ok, QApplication.instance().win).show()
|
||||
print("This executable file is corrupt or incompatible with the current version of Bear.")
|
||||
parent_die._children = []
|
||||
|
||||
|
||||
|
||||
def safe_DIE_name(die, default = ''):
|
||||
return die.attributes['DW_AT_name'].value.decode('utf-8', errors='ignore') if 'DW_AT_name' in die.attributes else default
|
||||
|
||||
|
||||
class Bear():
|
||||
def __init__(self, filename):
|
||||
di = formats.read_dwarf(filename, self.resolve_arch)
|
||||
if not di: # Covers both False and None
|
||||
print("Something went wrong")
|
||||
exit(1)
|
||||
|
||||
# Some degree of graceful handling of wrong format
|
||||
try:
|
||||
# Some cached top level stuff
|
||||
# Notably, iter_CUs doesn't cache
|
||||
di._ranges = None # Loaded on first use
|
||||
def decorate_cu(cu, i):
|
||||
cu._i = i
|
||||
cu._lineprogram = None
|
||||
cu._exprparser = None
|
||||
return cu
|
||||
di._unsorted_CUs = [decorate_cu(cu, i) for (i, cu) in enumerate(di.iter_CUs())] # We'll need them first thing, might as well load here
|
||||
if not len(di._unsorted_CUs):
|
||||
return None # Weird, but saw it once - debug sections present, but no CUs
|
||||
# For quick CU search by offset within the info section, regardless of sorting
|
||||
di._CU_offsets = [cu.cu_offset for cu in di._unsorted_CUs]
|
||||
di._CUs = list(di._unsorted_CUs)
|
||||
|
||||
di._locparser = None # Created on first use
|
||||
|
||||
self.dwarfinfo = di
|
||||
self.filename = filename
|
||||
except AssertionError as ass: # Covers exeptions during parsing
|
||||
raise DWARFParseError(ass, di)
|
||||
|
||||
# A list containing variables in a disctionary
|
||||
# Description of used fields:
|
||||
# name: variable name
|
||||
# type: test description of the type
|
||||
# size: size of the variable
|
||||
# address: absolute address of the variable
|
||||
# children: a dictionary of child elements
|
||||
self.myVariables = []
|
||||
self.top_dies = [decorate_die(CU.get_top_DIE(), i) for (i, CU) in enumerate(di._CUs)]
|
||||
|
||||
for top_die in self.top_dies:
|
||||
# top dies only contain Compile Units
|
||||
|
||||
# Preload children
|
||||
load_children(top_die)
|
||||
|
||||
children_dies = []
|
||||
|
||||
for child_die in top_die._children:
|
||||
if child_die.tag == 'DW_TAG_variable':
|
||||
# pprint(child_die)
|
||||
entry = {
|
||||
# Name should be on every element, if not set something so it can be printed
|
||||
'name': safe_DIE_name(child_die, '?')
|
||||
}
|
||||
|
||||
if 'DW_AT_location' in child_die.attributes:
|
||||
if LocationParser.attribute_has_location(child_die.attributes['DW_AT_location'], child_die.cu['version']):
|
||||
ll = self.parse_location(child_die, child_die.attributes['DW_AT_location'])
|
||||
# pprint(ll.loc_expr)
|
||||
# pprint(self.dump_expr(child_die, ll.loc_expr))
|
||||
lloc = self.dump_expr(child_die, ll.loc_expr)
|
||||
# print(hex(lloc[0].args[0]))
|
||||
entry['address'] = hex(lloc[0].args[0])
|
||||
# if isinstance(ll, LocationExpr):
|
||||
# return '; '.join(self.dump_expr(child_die, ll.loc_expr))
|
||||
# else:
|
||||
# return "Loc list: 0x%x" % child_die.attributes['DW_AT_location'].value
|
||||
|
||||
if 'DW_AT_type' in child_die.attributes:
|
||||
typ_die = child_die.get_DIE_from_attribute('DW_AT_type')
|
||||
entry['type'] = self.resolve_type(typ_die)
|
||||
|
||||
children_dies.append(entry)
|
||||
|
||||
self.myVariables.append({
|
||||
'name': safe_DIE_name(top_die, '?'),
|
||||
'children': children_dies
|
||||
})
|
||||
|
||||
pprint(self.myVariables)
|
||||
|
||||
def resolve_type(self, die_type):
|
||||
if die_type.tag == 'DW_TAG_volatile_type':
|
||||
die_type = die_type.get_DIE_from_attribute('DW_AT_type')
|
||||
|
||||
entry = {
|
||||
# Name should be on every element, if not set something so it can be printed
|
||||
'name': safe_DIE_name(die_type, '?')
|
||||
}
|
||||
|
||||
if 'DW_AT_data_member_location' in die_type.attributes:
|
||||
entry['offset'] = die_type.attributes['DW_AT_data_member_location'].value * 8
|
||||
|
||||
if 'DW_AT_type' in die_type.attributes and die_type.tag not in ['DW_TAG_base_type', 'DW_TAG_structure_type', 'DW_TAG_array_type']:
|
||||
# Check if the type is a redefinition of a base type
|
||||
die_type_test = die_type
|
||||
while 'DW_AT_type' in die_type_test.attributes:
|
||||
die_type_test = die_type_test.get_DIE_from_attribute('DW_AT_type')
|
||||
if die_type_test.tag in ['DW_TAG_base_type', 'DW_TAG_structure_type', 'DW_TAG_array_type', 'DW_TAG_union_type']:
|
||||
die_type = die_type_test
|
||||
break
|
||||
|
||||
if die_type.tag == 'DW_TAG_base_type':
|
||||
entry['type'] = safe_DIE_name(die_type, '?')
|
||||
elif die_type.tag == "DW_TAG_structure_type":
|
||||
load_children(die_type)
|
||||
child_dies = []
|
||||
for child_die in die_type._children:
|
||||
child_entry = self.resolve_type(child_die)
|
||||
child_dies.append(child_entry)
|
||||
entry['children'] = child_dies;
|
||||
elif die_type.tag == "DW_TAG_array_type":
|
||||
array_type = self.resolve_type(die_type.get_DIE_from_attribute('DW_AT_type'))
|
||||
load_children(die_type)
|
||||
children_num = die_type._children[0].attributes['DW_AT_upper_bound'].value
|
||||
child_entries = []
|
||||
for child in range(0, children_num + 1):
|
||||
child_entry = array_type.copy()
|
||||
child_entry['offset'] = array_type['size_bit'] * child
|
||||
child_entries.append(child_entry)
|
||||
entry['children'] = child_entries
|
||||
elif die_type.tag == 'DW_TAG_union_type':
|
||||
load_children(die_type)
|
||||
child_entries = []
|
||||
for child_die in die_type._children:
|
||||
child_entry = self.resolve_type(child_die)
|
||||
child_entries.append(child_entry)
|
||||
entry['children'] = child_entries
|
||||
else:
|
||||
eprint("Unsupported type:", die_type.tag)
|
||||
|
||||
if 'DW_AT_byte_size' in die_type.attributes:
|
||||
entry['size_bit'] = die_type.attributes['DW_AT_byte_size'].value * 8
|
||||
|
||||
return entry
|
||||
|
||||
def flatten_type(self, parent=None):
|
||||
# Structure of resulting list of dictionaries
|
||||
# address - The address
|
||||
# name - The long name of a variable after out rolling the type
|
||||
vars = []
|
||||
# Iterate over CUs
|
||||
# - name - filename
|
||||
# - children - variables
|
||||
for CU in self.myVariables:
|
||||
vars.append(CU['name'])
|
||||
|
||||
for child in CU['children']:
|
||||
if configuration["include_file_name"]:
|
||||
vars.append(self.flatten_child(child, CU['name']))
|
||||
else:
|
||||
vars.append(self.flatten_child(child))
|
||||
return vars
|
||||
|
||||
def flatten_child(self, child, name='', address=0):
|
||||
var = {}
|
||||
kids = []
|
||||
|
||||
if name:
|
||||
var['name'] = '{parent}.{child}'.format(parent=name, child=child['name'])
|
||||
else:
|
||||
var['name'] = child['name']
|
||||
|
||||
if address:
|
||||
var['address'] = address
|
||||
else:
|
||||
var['address'] = child['address']
|
||||
|
||||
if 'children' in child:
|
||||
for kid in child['children']:
|
||||
self.flatten_child(kid, var['name'], var['address'])
|
||||
|
||||
if 'type' in child:
|
||||
self.flatten_child()
|
||||
|
||||
return kids
|
||||
|
||||
def pretty_print(self):
|
||||
vars = []
|
||||
# Iterate over CUs
|
||||
# - name - filename
|
||||
# - children - variables
|
||||
for CU in self.myVariables:
|
||||
vars.append(CU['name'])
|
||||
|
||||
for child in CU['children']:
|
||||
self.pretty_child(child)
|
||||
return vars
|
||||
|
||||
def pretty_child(self, child, prefix='', address=0):
|
||||
name = ''
|
||||
if 'children' in child:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
def print_top_DIE(self, die):
|
||||
if die.tag == 'DW_TAG_variable':
|
||||
name = safe_DIE_name(die)
|
||||
if name:
|
||||
typ_name = ''
|
||||
if 'DW_AT_type' in die.attributes:
|
||||
typ = die.get_DIE_from_attribute('DW_AT_type')
|
||||
print(self.describe_type(typ))
|
||||
# typ_name = safe_DIE_name(typ)
|
||||
# if not typ_name:
|
||||
# print (typ)
|
||||
print('{name} {typ_name}'.format(name=name, typ_name=typ_name))
|
||||
|
||||
def print_DIE(self, die, prefix=''):
|
||||
name = ''
|
||||
# print(die)
|
||||
if die.tag == 'DW_TAG_variable':
|
||||
name = safe_DIE_name(die)
|
||||
if name and 'DW_AT_type' in die.attributes:
|
||||
typ = die.get_DIE_from_attribute('DW_AT_type')
|
||||
if 'DW_AT_location' in die.attributes:
|
||||
ll = self.parse_location(die, die.attributes['DW_AT_location'])
|
||||
# if isinstance(ll, LocationExpr):
|
||||
# print(self.dump_expr(die, ll.loc_expr))
|
||||
|
||||
self.print_DIE(typ, name)
|
||||
return
|
||||
# print(typ)
|
||||
elif die.tag == 'DW_TAG_compile_unit':
|
||||
name = safe_DIE_name(die, '.')
|
||||
elif prefix and die.tag == 'DW_TAG_base_type':
|
||||
name = safe_DIE_name(die)
|
||||
elif prefix and die.tag == 'DW_TAG_const_type':
|
||||
if 'DW_AT_type' in die.attributes:
|
||||
typ = die.get_DIE_from_attribute('DW_AT_type')
|
||||
self.print_DIE(typ, prefix)
|
||||
return
|
||||
elif prefix and die.tag == 'DW_TAG_array_type':
|
||||
if 'DW_AT_type' in die.attributes:
|
||||
typ = die.get_DIE_from_attribute('DW_AT_type')
|
||||
name = prefix + '[]'
|
||||
self.print_DIE(typ, name)
|
||||
load_children(die)
|
||||
|
||||
if die._children:
|
||||
for child in die._children:
|
||||
print(child)
|
||||
self.print_DIE(child, name)
|
||||
|
||||
return
|
||||
elif prefix and die.tag == 'DW_TAG_volatile_type':
|
||||
if 'DW_AT_type' in die.attributes:
|
||||
typ = die.get_DIE_from_attribute('DW_AT_type')
|
||||
self.print_DIE(typ, prefix)
|
||||
return
|
||||
elif prefix and die.tag == 'DW_TAG_typedef':
|
||||
print(die.attributes["DW_AT_name"].value)
|
||||
if 'DW_AT_type' in die.attributes:
|
||||
typ = die.get_DIE_from_attribute('DW_AT_type')
|
||||
name = prefix + '[]'
|
||||
self.print_DIE(typ, name)
|
||||
load_children(die)
|
||||
|
||||
if die._children:
|
||||
for child in die._children:
|
||||
print(child)
|
||||
self.print_DIE(child, name)
|
||||
elif prefix and die.tag == 'DW_TAG_enumeration_type':
|
||||
# print(die.attributes["DW_AT_name"].value)
|
||||
print ("mylittlepony")
|
||||
print(die)
|
||||
if 'DW_AT_type' in die.attributes:
|
||||
typ = die.get_DIE_from_attribute('DW_AT_type')
|
||||
name = prefix
|
||||
self.print_DIE(typ, name)
|
||||
load_children(die)
|
||||
print (typ)
|
||||
elif prefix:
|
||||
print (prefix)
|
||||
print(die)
|
||||
|
||||
if name:
|
||||
if prefix:
|
||||
print (prefix, name)
|
||||
else:
|
||||
print (name)
|
||||
|
||||
def parse_location(self, die, attr):
|
||||
di = die.dwarfinfo
|
||||
if di._locparser is None:
|
||||
di._locparser = LocationParser(di.location_lists())
|
||||
return di._locparser.parse_from_attribute(attr, die.cu['version'], die = die)
|
||||
|
||||
# Expr is an expression blob
|
||||
# Returns a list of strings for ops
|
||||
# Format: op arg, arg...
|
||||
def dump_expr(self, die, expr):
|
||||
if die.cu._exprparser is None:
|
||||
die.cu._exprparser = DWARFExprParser(die.cu.structs) if die.cu['version'] > 1 else DWARFExprParserV1(die.cu.structs)
|
||||
|
||||
# Challenge: for nested expressions, args is a list with a list of commands
|
||||
# For those, the format is: op {op arg, arg; op arg, arg}
|
||||
# Can't just check for iterable, str is iterable too
|
||||
return die.cu._exprparser.parse_expr(expr)
|
||||
|
||||
def resolve_arch(self, arches):
|
||||
print("resolve_arch: Unsupported feature")
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
from bear.patch import monkeypatch
|
||||
monkeypatch()
|
||||
|
||||
bear = Bear("/home/juraj/projects/Playground_C/build/playground_c")
|
||||
vars = bear.flatten_type()
|
||||
pprint(vars)
|
||||
|
||||
#bear = Bear("main.elf")
|
||||
# bear = Bear("LED_Cube.elf")
|
||||
# bear = Bear("serialplay")
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user