elf_symbols/dwex/dwarfone.py
Juraj Oravec bd70b22a7a
Rename folder bear to dwex
Signed-off-by: Juraj Oravec <jurajoravec@mailo.com>
2024-05-11 18:42:36 +02:00

290 lines
10 KiB
Python

# Support for DWARF v1.1 in a way that will be more or less compatible with pyelftools
from io import BytesIO
from collections import OrderedDict, namedtuple
from bisect import bisect_left
from elftools.dwarf.dwarfinfo import DwarfConfig, DebugSectionDescriptor
from elftools.dwarf.die import AttributeValue
from elftools.dwarf.structs import DWARFStructs
from elftools.common.utils import struct_parse, bytelist2string
from elftools.dwarf.enums import ENUM_DW_TAG, ENUM_DW_AT, ENUM_DW_FORM
from elftools.construct import CString
from elftools.dwarf.lineprogram import LineProgramEntry, LineState
from elftools.dwarf.dwarf_expr import DWARFExprOp
LineTableHeader = namedtuple('LineTableHeader', 'version file_entry')
CUv1Header = namedtuple('CUv1Header', 'version unit_length debug_abbrev_offset address_size')
TAG_reverse = dict((v, k) for k, v in ENUM_DW_TAG.items())
ATTR_reverse = dict((v, k) for k, v in ENUM_DW_AT.items())
FORM_reverse = dict((v, k) for k, v in ENUM_DW_FORM.items())
DW_OP_name2opcode = dict(
DW_OP_reg = 0x01,
DW_OP_basereg = 0x02,
DW_OP_addr = 0x03,
DW_OP_const = 0x04,
DW_OP_deref2 = 0x05,
DW_OP_deref = 0x06,
DW_OP_deref4 = 0x06,
DW_OP_add = 0x07,
DW_OP_user_0x80 = 0x80 #Extension op, not sure what's the deal with that
)
DW_OP_opcode2name = dict((v, k) for k, v in DW_OP_name2opcode.items())
class DIEV1(object):
def __init__(self, stm, cu, di):
self.cu = cu
self.dwarfinfo = di
self.stream = stm
self.offset = stm.tell()
self.attributes = OrderedDict()
self.tag = None
self.has_children = None
self.abbrev_code = None
self.size = 0
# Null DIE terminator. It can be used to obtain offset range occupied
# by this DIE including its whole subtree.
self._terminator = None
self._parent = None
structs = self.dwarfinfo.structs
self.size = struct_parse(structs.Dwarf_uint32(''), stm)
if self.size < 8:
self.tag = 'DW_TAG_padding'
self.has_children = False
else:
tag_code = struct_parse(structs.Dwarf_uint16(''), stm)
if tag_code not in TAG_reverse:
raise ValueError("%d not a known tag" % (tag_code))
self.tag = TAG_reverse[tag_code]
if self.tag == 'DW_TAG_null': # TAG_padding in DWARF1 spec
# No attributes, just advance the stream
stm.seek(self.size-6, 1)
self.has_children = False
else:
while stm.tell() < self.offset + self.size:
attr_offset = self.stream.tell()
attr = struct_parse(structs.Dwarf_uint16(''), stm)
form = FORM_reverse[attr & 0xf]
attr >>= 4
if attr in ATTR_reverse:
name = ATTR_reverse[attr]
elif 0x200 <= attr <= 0x3ff: #DW_AT_MIPS represented as 0x204???
name = 'DW_AT_user_0x%x' % attr
else:
raise ValueError("%d not a known attribute" % (attr))
raw_value = struct_parse(structs.Dwarf_dw_form[form], stm)
value = raw_value
self.attributes[name] = AttributeValue(
name=name,
form=form,
value=value,
raw_value=raw_value,
offset=attr_offset)
self.has_children = self.attributes['DW_AT_sibling'].value >= self.offset + self.size + 8
def get_parent(self):
return self._parent
def is_null(self):
return self.tag == 'DW_TAG_padding'
def iter_children(self):
return self.cu.iter_children(self)
def sibling(self):
return self.attributes['DW_AT_sibling'].value
class CompileUnitV1(object):
def __init__(self, di, top_die):
self.dwarfinfo = di
self.structs = di.structs
self.header = CUv1Header(version = 1, unit_length = None, debug_abbrev_offset = None, address_size = 4)
self._dielist = [top_die]
self._diemap = [top_die.offset]
def get_top_DIE(self):
return self._dielist[0]
def __getitem__(self, name):
return self.header._asdict()[name]
# Caches
def DIE_at_offset(self, offset):
i = bisect_left(self._diemap, offset)
if i < len(self._diemap) and offset == self._diemap[i]:
die = self._dielist[i]
else:
die = self.dwarfinfo.DIE_at_offset(offset, self)
self._dielist.insert(i, die)
self._diemap.insert(i, offset)
return die
# pyelftools' iter_DIEs sets parent on discovered DIEs, we should too
def iter_DIEs(self):
offset = self.cu_offset
parent = None
parent_stack = list()
end_offset = self.get_top_DIE().attributes['DW_AT_sibling'].value
while offset < end_offset:
die = self.DIE_at_offset(offset)
if die._parent is None:
die._parent = parent
if not die.is_null():
yield die
offset += die.size
if offset != die.sibling(): # Start of a subtree
parent_stack.append(parent)
parent = die
else: # padding - end of a sibling chain
parent = parent_stack.pop()
offset += die.size
def iter_children(self, parent_die):
offset = parent_die.offset + parent_die.size
while offset < self.dwarfinfo.section_size:
die = self.DIE_at_offset(offset)
if die._parent is None:
die._parent = parent_die
if not die.is_null():
yield die
# Troubleshooting #1497
tag = die.tag
attr = die.attributes
off = die.offset
size = die.size
has_children = die.has_children
offset = die.sibling()
else:
break
class LineTableV1(object):
def __init__(self, stm, structs, len, pc):
self.stm = stm
self.structs = structs
self.len = len
self.pc = pc
self._decoded_entries = None
self.header = LineTableHeader(1, (None))
def get_entries(self):
if self._decoded_entries is None:
stm = self.stm
offset = stm.tell()
end_offset = offset + self.len
structs = self.structs
entries = []
pc = self.pc
while offset < end_offset:
line = struct_parse(structs.Dwarf_uint32(''), stm)
col = struct_parse(structs.Dwarf_uint16(''), stm)
pc_delta = struct_parse(structs.Dwarf_uint32(''), stm)
if line == 0:
break
state = LineState(True)
state.file = 0
state.line = line
state.column = col if col != 0xffff else None
state.address = pc
entries.append(LineProgramEntry(0, False, [], state))
pc += pc_delta
self._decoded_entries = entries
return self._decoded_entries
class DWARFExprParserV1(object):
def __init__(self, structs):
self.structs = structs
def parse_expr(self, expr):
stm = BytesIO(bytelist2string(expr))
parsed = []
while True:
# Get the next opcode from the stream. If nothing is left in the
# stream, we're done.
byte = stm.read(1)
if len(byte) == 0:
break
# Decode the opcode and its name.
op = ord(byte)
op_name = DW_OP_opcode2name.get(op, 'OP:0x%x' % op)
if op <= 4 or op == 0x80:
args = [struct_parse(self.structs.Dwarf_target_addr(''), stm),]
else:
args = []
parsed.append(DWARFExprOp(op=op, op_name=op_name, args=args, offset=stm.tell()))
return parsed
class DWARFInfoV1(object):
def __init__(self, elffile):
section = elffile.get_section_by_name(".debug")
section_data = section.data()
self.section_size = len(section_data)
self.stm = BytesIO()
self.stm.write(section_data)
self.stm.seek(0, 0)
lsection = elffile.get_section_by_name(".line")
if lsection:
self.linestream = BytesIO()
self.linestream.write(lsection.data())
self.linestream.seek(0, 0)
self.config = DwarfConfig(
little_endian = elffile.little_endian,
default_address_size = elffile.elfclass // 8,
machine_arch = elffile.get_machine_arch()
)
self.structs = DWARFStructs(
little_endian = self.config.little_endian,
dwarf_format = 32,
address_size = self.config.default_address_size)
def iter_CUs(self):
offset = 0
while offset < self.section_size:
die = self.DIE_at_offset(offset, None)
if die.tag != 'DW_TAG_padding':
if die.cu is None:
die.cu = cu = CompileUnitV1(self, die)
cu.cu_offset = offset
yield die.cu
offset = die.attributes['DW_AT_sibling'].value
else:
break
# Does not cache
def DIE_at_offset(self, offset, cu):
self.stm.seek(offset, 0)
return DIEV1(self.stm, cu, self)
def location_lists(self):
return None
def line_program_for_CU(self, cu):
top_DIE = cu.get_top_DIE()
if 'DW_AT_stmt_list' in top_DIE.attributes:
stm = self.linestream
stm.seek(top_DIE.attributes['DW_AT_stmt_list'].value, 0)
structs = self.structs
len = struct_parse(structs.Dwarf_uint32(''), stm)
pc = struct_parse(structs.Dwarf_target_addr(''), stm)
return LineTableV1(stm, structs, len, pc)
else:
return None
def parse_dwarf1(elffile):
return DWARFInfoV1(elffile)