xref: /aosp_15_r20/external/pigweed/pw_build_info/py/pw_build_info/build_id.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1# Copyright 2021 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Library that parses an ELF file for a GNU build-id."""
15
16import argparse
17import logging
18from pathlib import Path
19import sys
20from typing import BinaryIO
21import elftools  # type: ignore
22from elftools.elf import elffile, notes, sections  # type: ignore
23
24_LOG = logging.getLogger('build_id_parser')
25_PW_BUILD_ID_SYM_NAME = 'gnu_build_id_begin'
26
27
28class GnuBuildIdError(Exception):
29    """An exception raised when a GNU build ID is malformed."""
30
31
32def read_build_id_from_section(elf_file: BinaryIO) -> bytes | None:
33    """Reads a build ID from a .note.gnu.build-id section."""
34    parsed_elf_file = elffile.ELFFile(elf_file)
35    build_id_section = parsed_elf_file.get_section_by_name('.note.gnu.build-id')
36
37    if build_id_section is None:
38        return None
39
40    section_notes = list(
41        n
42        for n in notes.iter_notes(
43            parsed_elf_file,
44            build_id_section['sh_offset'],
45            build_id_section['sh_size'],
46        )
47    )
48
49    if len(section_notes) != 1:
50        raise GnuBuildIdError('GNU build ID section contains multiple notes')
51
52    build_id_note = section_notes[0]
53    if build_id_note['n_name'] != 'GNU':
54        raise GnuBuildIdError('GNU build ID note name invalid')
55
56    if build_id_note['n_type'] != 'NT_GNU_BUILD_ID':
57        raise GnuBuildIdError('GNU build ID note type invalid')
58
59    return bytes.fromhex(build_id_note['n_desc'])
60
61
62def _addr_is_in_segment(addr: int, segment) -> bool:
63    """Checks if the provided address resides within the provided segment."""
64    # Address references uninitialized memory. Can't read.
65    if addr >= segment['p_vaddr'] + segment['p_filesz']:
66        raise GnuBuildIdError('GNU build ID is runtime-initialized')
67
68    return addr in range(segment['p_vaddr'], segment['p_memsz'])
69
70
71def _read_build_id_from_offset(elf, offset: int) -> bytes:
72    """Attempts to read a GNU build ID from an offset in an elf file."""
73    note = elftools.common.utils.struct_parse(
74        elf.structs.Elf_Nhdr, elf.stream, stream_pos=offset
75    )
76    elf.stream.seek(offset + elf.structs.Elf_Nhdr.sizeof())
77    name = elf.stream.read(note['n_namesz'])
78
79    if name != b'GNU\0':
80        raise GnuBuildIdError('GNU build ID note name invalid')
81
82    return elf.stream.read(note['n_descsz'])
83
84
85def read_build_id_from_symbol(elf_file: BinaryIO) -> bytes | None:
86    """Reads a GNU build ID using gnu_build_id_begin to locate the data."""
87    parsed_elf_file = elffile.ELFFile(elf_file)
88
89    matching_syms = None
90    for section in parsed_elf_file.iter_sections():
91        if not isinstance(section, sections.SymbolTableSection):
92            continue
93        matching_syms = section.get_symbol_by_name(_PW_BUILD_ID_SYM_NAME)
94        if matching_syms is not None:
95            break
96    if matching_syms is None:
97        return None
98
99    if len(matching_syms) != 1:
100        raise GnuBuildIdError('Multiple GNU build ID start symbols defined')
101
102    gnu_build_id_sym = matching_syms[0]
103    section_number = gnu_build_id_sym['st_shndx']
104
105    if section_number == 'SHN_UNDEF':
106        raise GnuBuildIdError('GNU build ID start symbol undefined')
107
108    matching_section = parsed_elf_file.get_section(section_number)
109
110    build_id_start_addr = gnu_build_id_sym['st_value']
111    for segment in parsed_elf_file.iter_segments():
112        if segment.section_in_segment(matching_section):
113            offset = (
114                build_id_start_addr - segment['p_vaddr'] + segment['p_offset']
115            )
116            return _read_build_id_from_offset(parsed_elf_file, offset)
117
118    return None
119
120
121def read_build_id(elf_file: BinaryIO) -> bytes | None:
122    """Reads a GNU build ID from an ELF binary."""
123    # Prefer to read the build ID from a dedicated section.
124    maybe_build_id = read_build_id_from_section(elf_file)
125    if maybe_build_id is not None:
126        return maybe_build_id
127
128    # If there's no dedicated section, try and use symbol information to find
129    # the build info.
130    return read_build_id_from_symbol(elf_file)
131
132
133def find_matching_elf(uuid: bytes, search_dir: Path) -> Path | None:
134    """Recursively searches a directory for an ELF file with a matching UUID."""
135    elf_file_paths = search_dir.glob('**/*.elf')
136    for elf_file in elf_file_paths:
137        try:
138            candidate_id = read_build_id(open(elf_file, 'rb'))
139        except GnuBuildIdError:
140            continue
141        if candidate_id is None:
142            continue
143        if candidate_id == uuid:
144            return elf_file
145
146    return None
147
148
149def _main(elf_file: BinaryIO) -> int:
150    logging.basicConfig(format='%(message)s', level=logging.INFO)
151    build_id = read_build_id(elf_file)
152    if build_id is None:
153        _LOG.error('Error: No GNU build ID found.')
154        return 1
155
156    _LOG.info(build_id.hex())
157    return 0
158
159
160def _parse_args():
161    """Parses command-line arguments."""
162
163    parser = argparse.ArgumentParser(description=__doc__)
164    parser.add_argument(
165        'elf_file',
166        type=argparse.FileType('rb'),
167        help='The .elf to parse build info from',
168    )
169
170    return parser.parse_args()
171
172
173if __name__ == '__main__':
174    sys.exit(_main(**vars(_parse_args())))
175