elf_symbols/dwex/formats.py
Juraj Oravec bd70b22a7a
Rename folder bear to dwex
Signed-off-by: Juraj Oravec <jurajoravec@mailo.com>
2024-05-11 18:42:36 +02:00

222 lines
9.8 KiB
Python

import io
from os import path, listdir
from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
# This doesn't depend on Qt
# The dependency on filebytes only lives here
# Format codes: 0 = ELF, 1 = MACHO, 2 = PE
def read_pe(filename):
from filebytes.pe import PE, IMAGE_FILE_MACHINE
pefile = PE(filename)
# Section's real size might be padded - see https://github.com/sashs/filebytes/issues/28
sections = [(section.name, section,
section.header.PhysicalAddress_or_VirtualSize,
section.header.SizeOfRawData)
for section in pefile.sections
if section.name.startswith('.debug')]
data = {name: DebugSectionDescriptor(io.BytesIO(section.bytes), name, None,
raw_size if virtual_size == 0 else min((raw_size, virtual_size)), 0)
for (name, section, virtual_size, raw_size) in sections}
if not '.debug_info' in data:
return None
machine = pefile.imageNtHeaders.header.FileHeader.Machine
is64 = machine in (IMAGE_FILE_MACHINE.AMD64, IMAGE_FILE_MACHINE.ARM64, IMAGE_FILE_MACHINE.IA64) # There are also some exotic architectures...
di = DWARFInfo(
config = DwarfConfig(
little_endian = True,
default_address_size = 8 if is64 else 4,
machine_arch = IMAGE_FILE_MACHINE[machine].name
),
debug_info_sec = data['.debug_info'],
debug_aranges_sec = data.get('.debug_aranges'),
debug_abbrev_sec = data.get('.debug_abbrev'),
debug_frame_sec = data.get('.debug_frame'),
eh_frame_sec = None, # Haven't seen one in the wild so far
debug_str_sec = data.get('.debug_str'),
debug_loc_sec = data.get('.debug_loc'),
debug_ranges_sec = data.get('.debug_ranges'),
debug_line_sec = data.get('.debug_line'),
debug_pubtypes_sec = data.get('.debug_pubtypes'),
debug_pubnames_sec = data.get('.debug_pubnames'),
debug_addr_sec = data.get('.debug_addr'),
debug_str_offsets_sec = data.get('.debug_str_offsets'),
debug_line_str_sec = data.get('.debug_line_str'),
debug_loclists_sec = data.get('.debug_loclists'),
debug_rnglists_sec = data.get('.debug_rnglists'),
debug_sup_sec = data.get('.debug_sup'),
gnu_debugaltlink_sec = data.get('.gnu_debugaltlink')
)
di._format = 2
di._start_address = pefile.imageNtHeaders.header.OptionalHeader.ImageBase
return di
# Arch + flavor where flavor matters
def make_macho_arch_name(macho):
from filebytes.mach_o import CpuType, CpuSubTypeARM, CpuSubTypeARM64
h = macho.machHeader.header
c = h.cputype
st = h.cpusubtype
flavor = ''
if st != 0:
if c == CpuType.ARM:
flavor = CpuSubTypeARM[st].name
elif c == CpuType.ARM64:
flavor = CpuSubTypeARM64[st].name
return CpuType[c].name + flavor
# For debugging purposes only - dump individual debug related sections in a Mach-O file/slice as files
def macho_save_sections(filename, macho):
from filebytes.mach_o import LC
arch = make_macho_arch_name(macho)
for cmd in macho.loadCommands:
if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64):
for section in cmd.sections:
if section.name.startswith('__debug'):
sec_file = ".".join((filename, arch, section.name))
if not path.exists(sec_file):
with open(sec_file, 'wb') as f:
f.write(section.bytes)
# resolve_arch takes a list of architecture descriptions, and returns
# the desired index, or None if the user has cancelled
def read_macho(filename, resolve_arch, friendly_filename):
from filebytes.mach_o import MachO, CpuType, TypeFlags, LC
fat_arch = None
macho = MachO(filename)
if macho.isFat:
slices = [make_macho_arch_name(slice) for slice in macho.fatArches]
arch_no = resolve_arch(slices)
if arch_no is None: # User cancellation
return False
fat_arch = slices[arch_no]
macho = macho.fatArches[arch_no]
# We proceed with macho being a arch-specific file, or a slice within a fat binary
data = {
section.name: DebugSectionDescriptor(io.BytesIO(section.bytes), section.name, None, len(section.bytes), 0)
for cmd in macho.loadCommands
if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64)
for section in cmd.sections
if section.name.startswith('__debug')
}
#macho_save_sections(friendly_filename, macho)
if not '__debug_info' in data:
return None
cpu = macho.machHeader.header.cputype
di = DWARFInfo(
config = DwarfConfig(
little_endian=True,
default_address_size = 8 if (cpu & TypeFlags.ABI64) != 0 else 4,
machine_arch = make_macho_arch_name(macho)
),
debug_info_sec = data['__debug_info'],
debug_aranges_sec = data.get('__debug_aranges'),
debug_abbrev_sec = data['__debug_abbrev'],
debug_frame_sec = data.get('__debug_frame'),
eh_frame_sec = None, # Haven't seen those in Mach-O
debug_str_sec = data['__debug_str'],
debug_loc_sec = data.get('__debug_loc'),
debug_ranges_sec = data.get('__debug_ranges'),
debug_line_sec = data.get('__debug_line'),
debug_pubtypes_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubn?
debug_pubnames_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubt?
debug_addr_sec = data.get('__debug_addr'),
debug_str_offsets_sec = data.get('__debug_str_offsets'),
debug_line_str_sec = data.get('__debug_line_str_name'),
debug_loclists_sec = data.get('__debug_loclists_sec_name'),
debug_rnglists_sec = data.get('__debug_rnglists_sec_name'),
debug_sup_sec = data.get('__debug_sup_name'),
gnu_debugaltlink_sec = data.get('__gnu_debugaltlink_name')
)
di._format = 1
di._fat_arch = fat_arch
text_cmd = next((cmd for cmd in macho.loadCommands if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64) and cmd.name == "__TEXT"), False)
di._start_address = text_cmd.header.vmaddr if text_cmd else 0
return di
# UI agnostic - resolve_arch might be interactive
# Returns slightly augmented DWARFInfo
# Or None if not a DWARF containing file (or unrecognized)
# Or False if user has cancelled
# Or throws an exception
# resolve_arch is for Mach-O fat binaries - see read_macho()
def read_dwarf(filename, resolve_arch):
if path.isfile(filename): # On MacOS, opening dSYM bundles as is would be right
file = None
try: # For ELF, the file is to remain open
file = open(filename, 'rb')
signature = file.read(4)
if signature[0:2] == b'MZ': # DOS header - this might be a PE. Don't verify the PE header, just feed it to the parser
return read_pe(filename)
elif signature == b'\x7FELF': #It's an ELF
from elftools.elf.elffile import ELFFile
file.seek(0)
elffile = ELFFile(file)
file = None # Keep the file open
# Retrieve the preferred loading address
load_segment = next((seg for seg in elffile.iter_segments() if seg.header.p_type == 'PT_LOAD'), None)
start_address = load_segment.header.p_vaddr if load_segment else 0
di = None
if elffile.has_dwarf_info():
di = elffile.get_dwarf_info()
elif elffile.get_section_by_name(".debug"):
from .dwarfone import parse_dwarf1
di = parse_dwarf1(elffile)
if di:
di._format = 0
di._start_address = start_address
return di
elif signature in (b'\xCA\xFE\xBA\xBE', b'\xFE\xED\xFA\xCE', b'\xFE\xED\xFA\xCF', b'\xCE\xFA\xED\xFE', b'\xCF\xFA\xED\xFE'):
if signature == b'\xCA\xFE\xBA\xBE' and int.from_bytes(file.read(4), 'big') >= 0x20:
# Java .class files also have CAFEBABE, check the fat binary arch count
return None
# Mach-O fat binary, or 32/64-bit Mach-O in big/little-endian format
return read_macho(filename, resolve_arch, filename)
finally:
if file:
file.close()
elif path.isdir(filename):
# Is it a dSYM bundle?
nameparts = path.basename(filename).split('.')
if nameparts[-1] == 'dSYM' and path.exists(path.join(filename, 'Contents', 'Resources', 'DWARF')):
files = listdir(path.join(filename, 'Contents', 'Resources', 'DWARF'))
if len(files) > 0:
# When are there multiple DWARF files in a dSYM bundle?
# TODO: let the user choose?
dsym_file_path = path.join(filename, 'Contents', 'Resources', 'DWARF', files[0])
return read_macho(dsym_file_path, resolve_arch, filename)
# Is it an app bundle? appname.app
if len(nameparts) > 1 and nameparts[-1] in ('app', 'framework'):
app_file = path.join(filename, '.'.join(nameparts[0:-1]))
if path.exists(app_file):
return read_macho(app_file, resolve_arch, filename)
# Any other bundle formats we should be aware of?
return None
def get_debug_sections(di):
section_names = {name: "debug_%s_sec" % name
for name in
('info', 'aranges', 'abbrev', 'frame',
'str', 'loc', 'ranges', 'line', 'addr',
'str_offsets', 'line_str', 'pubtypes',
'pubnames', 'loclists', 'rnglists', 'sup')}
section_names['eh_frame'] = 'eh_frame_sec'
section_names['gnu_debugaltlink'] = 'eh_frame_sec'
# Display name to section object
return {display_name: di.__dict__[field_name]
for (display_name, field_name) in section_names.items()
if di.__dict__[field_name]}