import io from os import path, listdir from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig # This doesn't depend on Qt # The dependency on filebytes only lives here # Format codes: 0 = ELF, 1 = MACHO, 2 = PE def read_pe(filename): from filebytes.pe import PE, IMAGE_FILE_MACHINE pefile = PE(filename) # Section's real size might be padded - see https://github.com/sashs/filebytes/issues/28 sections = [(section.name, section, section.header.PhysicalAddress_or_VirtualSize, section.header.SizeOfRawData) for section in pefile.sections if section.name.startswith('.debug')] data = {name: DebugSectionDescriptor(io.BytesIO(section.bytes), name, None, raw_size if virtual_size == 0 else min((raw_size, virtual_size)), 0) for (name, section, virtual_size, raw_size) in sections} if not '.debug_info' in data: return None machine = pefile.imageNtHeaders.header.FileHeader.Machine is64 = machine in (IMAGE_FILE_MACHINE.AMD64, IMAGE_FILE_MACHINE.ARM64, IMAGE_FILE_MACHINE.IA64) # There are also some exotic architectures... di = DWARFInfo( config = DwarfConfig( little_endian = True, default_address_size = 8 if is64 else 4, machine_arch = IMAGE_FILE_MACHINE[machine].name ), debug_info_sec = data['.debug_info'], debug_aranges_sec = data.get('.debug_aranges'), debug_abbrev_sec = data.get('.debug_abbrev'), debug_frame_sec = data.get('.debug_frame'), eh_frame_sec = None, # Haven't seen one in the wild so far debug_str_sec = data.get('.debug_str'), debug_loc_sec = data.get('.debug_loc'), debug_ranges_sec = data.get('.debug_ranges'), debug_line_sec = data.get('.debug_line'), debug_pubtypes_sec = data.get('.debug_pubtypes'), debug_pubnames_sec = data.get('.debug_pubnames'), debug_addr_sec = data.get('.debug_addr'), debug_str_offsets_sec = data.get('.debug_str_offsets'), debug_line_str_sec = data.get('.debug_line_str'), debug_loclists_sec = data.get('.debug_loclists'), debug_rnglists_sec = data.get('.debug_rnglists'), debug_sup_sec = data.get('.debug_sup'), gnu_debugaltlink_sec = data.get('.gnu_debugaltlink') ) di._format = 2 di._start_address = pefile.imageNtHeaders.header.OptionalHeader.ImageBase return di # Arch + flavor where flavor matters def make_macho_arch_name(macho): from filebytes.mach_o import CpuType, CpuSubTypeARM, CpuSubTypeARM64 h = macho.machHeader.header c = h.cputype st = h.cpusubtype flavor = '' if st != 0: if c == CpuType.ARM: flavor = CpuSubTypeARM[st].name elif c == CpuType.ARM64: flavor = CpuSubTypeARM64[st].name return CpuType[c].name + flavor # For debugging purposes only - dump individual debug related sections in a Mach-O file/slice as files def macho_save_sections(filename, macho): from filebytes.mach_o import LC arch = make_macho_arch_name(macho) for cmd in macho.loadCommands: if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64): for section in cmd.sections: if section.name.startswith('__debug'): sec_file = ".".join((filename, arch, section.name)) if not path.exists(sec_file): with open(sec_file, 'wb') as f: f.write(section.bytes) # resolve_arch takes a list of architecture descriptions, and returns # the desired index, or None if the user has cancelled def read_macho(filename, resolve_arch, friendly_filename): from filebytes.mach_o import MachO, CpuType, TypeFlags, LC fat_arch = None macho = MachO(filename) if macho.isFat: slices = [make_macho_arch_name(slice) for slice in macho.fatArches] arch_no = resolve_arch(slices) if arch_no is None: # User cancellation return False fat_arch = slices[arch_no] macho = macho.fatArches[arch_no] # We proceed with macho being a arch-specific file, or a slice within a fat binary data = { section.name: DebugSectionDescriptor(io.BytesIO(section.bytes), section.name, None, len(section.bytes), 0) for cmd in macho.loadCommands if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64) for section in cmd.sections if section.name.startswith('__debug') } #macho_save_sections(friendly_filename, macho) if not '__debug_info' in data: return None cpu = macho.machHeader.header.cputype di = DWARFInfo( config = DwarfConfig( little_endian=True, default_address_size = 8 if (cpu & TypeFlags.ABI64) != 0 else 4, machine_arch = make_macho_arch_name(macho) ), debug_info_sec = data['__debug_info'], debug_aranges_sec = data.get('__debug_aranges'), debug_abbrev_sec = data['__debug_abbrev'], debug_frame_sec = data.get('__debug_frame'), eh_frame_sec = None, # Haven't seen those in Mach-O debug_str_sec = data['__debug_str'], debug_loc_sec = data.get('__debug_loc'), debug_ranges_sec = data.get('__debug_ranges'), debug_line_sec = data.get('__debug_line'), debug_pubtypes_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubn? debug_pubnames_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubt? debug_addr_sec = data.get('__debug_addr'), debug_str_offsets_sec = data.get('__debug_str_offsets'), debug_line_str_sec = data.get('__debug_line_str_name'), debug_loclists_sec = data.get('__debug_loclists_sec_name'), debug_rnglists_sec = data.get('__debug_rnglists_sec_name'), debug_sup_sec = data.get('__debug_sup_name'), gnu_debugaltlink_sec = data.get('__gnu_debugaltlink_name') ) di._format = 1 di._fat_arch = fat_arch text_cmd = next((cmd for cmd in macho.loadCommands if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64) and cmd.name == "__TEXT"), False) di._start_address = text_cmd.header.vmaddr if text_cmd else 0 return di # UI agnostic - resolve_arch might be interactive # Returns slightly augmented DWARFInfo # Or None if not a DWARF containing file (or unrecognized) # Or False if user has cancelled # Or throws an exception # resolve_arch is for Mach-O fat binaries - see read_macho() def read_dwarf(filename, resolve_arch): if path.isfile(filename): # On MacOS, opening dSYM bundles as is would be right file = None try: # For ELF, the file is to remain open file = open(filename, 'rb') signature = file.read(4) if signature[0:2] == b'MZ': # DOS header - this might be a PE. Don't verify the PE header, just feed it to the parser return read_pe(filename) elif signature == b'\x7FELF': #It's an ELF from elftools.elf.elffile import ELFFile file.seek(0) elffile = ELFFile(file) file = None # Keep the file open # Retrieve the preferred loading address load_segment = next((seg for seg in elffile.iter_segments() if seg.header.p_type == 'PT_LOAD'), None) start_address = load_segment.header.p_vaddr if load_segment else 0 di = None if elffile.has_dwarf_info(): di = elffile.get_dwarf_info() elif elffile.get_section_by_name(".debug"): from .dwarfone import parse_dwarf1 di = parse_dwarf1(elffile) if di: di._format = 0 di._start_address = start_address return di elif signature in (b'\xCA\xFE\xBA\xBE', b'\xFE\xED\xFA\xCE', b'\xFE\xED\xFA\xCF', b'\xCE\xFA\xED\xFE', b'\xCF\xFA\xED\xFE'): if signature == b'\xCA\xFE\xBA\xBE' and int.from_bytes(file.read(4), 'big') >= 0x20: # Java .class files also have CAFEBABE, check the fat binary arch count return None # Mach-O fat binary, or 32/64-bit Mach-O in big/little-endian format return read_macho(filename, resolve_arch, filename) finally: if file: file.close() elif path.isdir(filename): # Is it a dSYM bundle? nameparts = path.basename(filename).split('.') if nameparts[-1] == 'dSYM' and path.exists(path.join(filename, 'Contents', 'Resources', 'DWARF')): files = listdir(path.join(filename, 'Contents', 'Resources', 'DWARF')) if len(files) > 0: # When are there multiple DWARF files in a dSYM bundle? # TODO: let the user choose? dsym_file_path = path.join(filename, 'Contents', 'Resources', 'DWARF', files[0]) return read_macho(dsym_file_path, resolve_arch, filename) # Is it an app bundle? appname.app if len(nameparts) > 1 and nameparts[-1] in ('app', 'framework'): app_file = path.join(filename, '.'.join(nameparts[0:-1])) if path.exists(app_file): return read_macho(app_file, resolve_arch, filename) # Any other bundle formats we should be aware of? return None def get_debug_sections(di): section_names = {name: "debug_%s_sec" % name for name in ('info', 'aranges', 'abbrev', 'frame', 'str', 'loc', 'ranges', 'line', 'addr', 'str_offsets', 'line_str', 'pubtypes', 'pubnames', 'loclists', 'rnglists', 'sup')} section_names['eh_frame'] = 'eh_frame_sec' section_names['gnu_debugaltlink'] = 'eh_frame_sec' # Display name to section object return {display_name: di.__dict__[field_name] for (display_name, field_name) in section_names.items() if di.__dict__[field_name]}