1# Copyright 2021 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Library that parses an ELF file for a GNU build-id.""" 15 16import argparse 17import logging 18from pathlib import Path 19import sys 20from typing import BinaryIO 21import elftools # type: ignore 22from elftools.elf import elffile, notes, sections # type: ignore 23 24_LOG = logging.getLogger('build_id_parser') 25_PW_BUILD_ID_SYM_NAME = 'gnu_build_id_begin' 26 27 28class GnuBuildIdError(Exception): 29 """An exception raised when a GNU build ID is malformed.""" 30 31 32def read_build_id_from_section(elf_file: BinaryIO) -> bytes | None: 33 """Reads a build ID from a .note.gnu.build-id section.""" 34 parsed_elf_file = elffile.ELFFile(elf_file) 35 build_id_section = parsed_elf_file.get_section_by_name('.note.gnu.build-id') 36 37 if build_id_section is None: 38 return None 39 40 section_notes = list( 41 n 42 for n in notes.iter_notes( 43 parsed_elf_file, 44 build_id_section['sh_offset'], 45 build_id_section['sh_size'], 46 ) 47 ) 48 49 if len(section_notes) != 1: 50 raise GnuBuildIdError('GNU build ID section contains multiple notes') 51 52 build_id_note = section_notes[0] 53 if build_id_note['n_name'] != 'GNU': 54 raise GnuBuildIdError('GNU build ID note name invalid') 55 56 if build_id_note['n_type'] != 'NT_GNU_BUILD_ID': 57 raise GnuBuildIdError('GNU build ID note type invalid') 58 59 return bytes.fromhex(build_id_note['n_desc']) 60 61 62def _addr_is_in_segment(addr: int, segment) -> bool: 63 """Checks if the provided address resides within the provided segment.""" 64 # Address references uninitialized memory. Can't read. 65 if addr >= segment['p_vaddr'] + segment['p_filesz']: 66 raise GnuBuildIdError('GNU build ID is runtime-initialized') 67 68 return addr in range(segment['p_vaddr'], segment['p_memsz']) 69 70 71def _read_build_id_from_offset(elf, offset: int) -> bytes: 72 """Attempts to read a GNU build ID from an offset in an elf file.""" 73 note = elftools.common.utils.struct_parse( 74 elf.structs.Elf_Nhdr, elf.stream, stream_pos=offset 75 ) 76 elf.stream.seek(offset + elf.structs.Elf_Nhdr.sizeof()) 77 name = elf.stream.read(note['n_namesz']) 78 79 if name != b'GNU\0': 80 raise GnuBuildIdError('GNU build ID note name invalid') 81 82 return elf.stream.read(note['n_descsz']) 83 84 85def read_build_id_from_symbol(elf_file: BinaryIO) -> bytes | None: 86 """Reads a GNU build ID using gnu_build_id_begin to locate the data.""" 87 parsed_elf_file = elffile.ELFFile(elf_file) 88 89 matching_syms = None 90 for section in parsed_elf_file.iter_sections(): 91 if not isinstance(section, sections.SymbolTableSection): 92 continue 93 matching_syms = section.get_symbol_by_name(_PW_BUILD_ID_SYM_NAME) 94 if matching_syms is not None: 95 break 96 if matching_syms is None: 97 return None 98 99 if len(matching_syms) != 1: 100 raise GnuBuildIdError('Multiple GNU build ID start symbols defined') 101 102 gnu_build_id_sym = matching_syms[0] 103 section_number = gnu_build_id_sym['st_shndx'] 104 105 if section_number == 'SHN_UNDEF': 106 raise GnuBuildIdError('GNU build ID start symbol undefined') 107 108 matching_section = parsed_elf_file.get_section(section_number) 109 110 build_id_start_addr = gnu_build_id_sym['st_value'] 111 for segment in parsed_elf_file.iter_segments(): 112 if segment.section_in_segment(matching_section): 113 offset = ( 114 build_id_start_addr - segment['p_vaddr'] + segment['p_offset'] 115 ) 116 return _read_build_id_from_offset(parsed_elf_file, offset) 117 118 return None 119 120 121def read_build_id(elf_file: BinaryIO) -> bytes | None: 122 """Reads a GNU build ID from an ELF binary.""" 123 # Prefer to read the build ID from a dedicated section. 124 maybe_build_id = read_build_id_from_section(elf_file) 125 if maybe_build_id is not None: 126 return maybe_build_id 127 128 # If there's no dedicated section, try and use symbol information to find 129 # the build info. 130 return read_build_id_from_symbol(elf_file) 131 132 133def find_matching_elf(uuid: bytes, search_dir: Path) -> Path | None: 134 """Recursively searches a directory for an ELF file with a matching UUID.""" 135 elf_file_paths = search_dir.glob('**/*.elf') 136 for elf_file in elf_file_paths: 137 try: 138 candidate_id = read_build_id(open(elf_file, 'rb')) 139 except GnuBuildIdError: 140 continue 141 if candidate_id is None: 142 continue 143 if candidate_id == uuid: 144 return elf_file 145 146 return None 147 148 149def _main(elf_file: BinaryIO) -> int: 150 logging.basicConfig(format='%(message)s', level=logging.INFO) 151 build_id = read_build_id(elf_file) 152 if build_id is None: 153 _LOG.error('Error: No GNU build ID found.') 154 return 1 155 156 _LOG.info(build_id.hex()) 157 return 0 158 159 160def _parse_args(): 161 """Parses command-line arguments.""" 162 163 parser = argparse.ArgumentParser(description=__doc__) 164 parser.add_argument( 165 'elf_file', 166 type=argparse.FileType('rb'), 167 help='The .elf to parse build info from', 168 ) 169 170 return parser.parse_args() 171 172 173if __name__ == '__main__': 174 sys.exit(_main(**vars(_parse_args()))) 175