1 /* Sniff out modules from ELF headers visible in memory segments.
2 Copyright (C) 2008-2012, 2014, 2015, 2018 Red Hat, Inc.
3 Copyright (C) 2021 Mark J. Wielaard <[email protected]>
4 This file is part of elfutils.
5
6 This file is free software; you can redistribute it and/or modify
7 it under the terms of either
8
9 * the GNU Lesser General Public License as published by the Free
10 Software Foundation; either version 3 of the License, or (at
11 your option) any later version
12
13 or
14
15 * the GNU General Public License as published by the Free
16 Software Foundation; either version 2 of the License, or (at
17 your option) any later version
18
19 or both in parallel, as here.
20
21 elfutils is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
25
26 You should have received copies of the GNU General Public License and
27 the GNU Lesser General Public License along with this program. If
28 not, see <http://www.gnu.org/licenses/>. */
29
30 #include <config.h>
31 #include "libelfP.h" /* For NOTE_ALIGN4 and NOTE_ALIGN8. */
32 #include "libdwflP.h"
33 #include "common.h"
34
35 #include <elf.h>
36 #include <gelf.h>
37 #include <inttypes.h>
38 #include <fcntl.h>
39
40 #include <system.h>
41
42
43 /* A good size for the initial read from memory, if it's not too costly.
44 This more than covers the phdrs and note segment in the average 64-bit
45 binary. */
46
47 #define INITIAL_READ 1024
48
49 #if BYTE_ORDER == LITTLE_ENDIAN
50 # define MY_ELFDATA ELFDATA2LSB
51 #else
52 # define MY_ELFDATA ELFDATA2MSB
53 #endif
54
55 struct elf_build_id
56 {
57 void *memory;
58 size_t len;
59 GElf_Addr vaddr;
60 };
61
62 struct read_state
63 {
64 Dwfl *dwfl;
65 Dwfl_Memory_Callback *memory_callback;
66 void *memory_callback_arg;
67 void **buffer;
68 size_t *buffer_available;
69 };
70
71 /* Return user segment index closest to ADDR but not above it.
72 If NEXT, return the closest to ADDR but not below it. */
73 static int
addr_segndx(Dwfl * dwfl,size_t segment,GElf_Addr addr,bool next)74 addr_segndx (Dwfl *dwfl, size_t segment, GElf_Addr addr, bool next)
75 {
76 int ndx = -1;
77 do
78 {
79 if (dwfl->lookup_segndx[segment] >= 0)
80 ndx = dwfl->lookup_segndx[segment];
81 if (++segment >= dwfl->lookup_elts - 1)
82 return next ? ndx + 1 : ndx;
83 }
84 while (dwfl->lookup_addr[segment] < addr);
85
86 if (next)
87 {
88 while (dwfl->lookup_segndx[segment] < 0)
89 if (++segment >= dwfl->lookup_elts - 1)
90 return ndx + 1;
91 ndx = dwfl->lookup_segndx[segment];
92 }
93
94 return ndx;
95 }
96
97 /* Return whether there is SZ bytes available at PTR till END. */
98
99 static bool
buf_has_data(const void * ptr,const void * end,size_t sz)100 buf_has_data (const void *ptr, const void *end, size_t sz)
101 {
102 return ptr < end && (size_t) (end - ptr) >= sz;
103 }
104
105 /* Read SZ bytes into *RETP from *PTRP (limited by END) in format EI_DATA.
106 Function comes from src/readelf.c . */
107
108 static bool
buf_read_ulong(unsigned char ei_data,size_t sz,const void ** ptrp,const void * end,uint64_t * retp)109 buf_read_ulong (unsigned char ei_data, size_t sz,
110 const void **ptrp, const void *end, uint64_t *retp)
111 {
112 if (! buf_has_data (*ptrp, end, sz))
113 return false;
114
115 union
116 {
117 uint64_t u64;
118 uint32_t u32;
119 } u;
120
121 memcpy (&u, *ptrp, sz);
122 (*ptrp) += sz;
123
124 if (retp == NULL)
125 return true;
126
127 if (MY_ELFDATA != ei_data)
128 {
129 if (sz == 4)
130 CONVERT (u.u32);
131 else
132 CONVERT (u.u64);
133 }
134 if (sz == 4)
135 *retp = u.u32;
136 else
137 *retp = u.u64;
138 return true;
139 }
140
141 /* Try to find matching entry for module from address MODULE_START to
142 MODULE_END in NT_FILE note located at NOTE_FILE of NOTE_FILE_SIZE
143 bytes in format EI_CLASS and EI_DATA. */
144
145 static const char *
handle_file_note(GElf_Addr module_start,GElf_Addr module_end,unsigned char ei_class,unsigned char ei_data,const void * note_file,size_t note_file_size)146 handle_file_note (GElf_Addr module_start, GElf_Addr module_end,
147 unsigned char ei_class, unsigned char ei_data,
148 const void *note_file, size_t note_file_size)
149 {
150 if (note_file == NULL)
151 return NULL;
152
153 size_t sz;
154 switch (ei_class)
155 {
156 case ELFCLASS32:
157 sz = 4;
158 break;
159 case ELFCLASS64:
160 sz = 8;
161 break;
162 default:
163 return NULL;
164 }
165
166 const void *ptr = note_file;
167 const void *end = note_file + note_file_size;
168 uint64_t count;
169 if (! buf_read_ulong (ei_data, sz, &ptr, end, &count))
170 return NULL;
171 if (! buf_read_ulong (ei_data, sz, &ptr, end, NULL)) // page_size
172 return NULL;
173
174 uint64_t maxcount = (size_t) (end - ptr) / (3 * sz);
175 if (count > maxcount)
176 return NULL;
177
178 /* Where file names are stored. */
179 const char *fptr = ptr + 3 * count * sz;
180
181 ssize_t firstix = -1;
182 ssize_t lastix = -1;
183 for (size_t mix = 0; mix < count; mix++)
184 {
185 uint64_t mstart, mend, moffset;
186 if (! buf_read_ulong (ei_data, sz, &ptr, fptr, &mstart)
187 || ! buf_read_ulong (ei_data, sz, &ptr, fptr, &mend)
188 || ! buf_read_ulong (ei_data, sz, &ptr, fptr, &moffset))
189 return NULL;
190 if (mstart == module_start && moffset == 0)
191 firstix = lastix = mix;
192 if (firstix != -1 && mstart < module_end)
193 lastix = mix;
194 if (mend >= module_end)
195 break;
196 }
197 if (firstix == -1)
198 return NULL;
199
200 const char *retval = NULL;
201 for (ssize_t mix = 0; mix <= lastix; mix++)
202 {
203 const char *fnext = memchr (fptr, 0, (const char *) end - fptr);
204 if (fnext == NULL)
205 return NULL;
206 if (mix == firstix)
207 retval = fptr;
208 if (firstix < mix && mix <= lastix && strcmp (fptr, retval) != 0)
209 return NULL;
210 fptr = fnext + 1;
211 }
212 return retval;
213 }
214
215 /* Return true iff we are certain ELF cannot match BUILD_ID of
216 BUILD_ID_LEN bytes. Pass DISK_FILE_HAS_BUILD_ID as false if it is
217 certain ELF does not contain build-id (it is only a performance hit
218 to pass it always as true). */
219
220 static bool
invalid_elf(Elf * elf,bool disk_file_has_build_id,struct elf_build_id * build_id)221 invalid_elf (Elf *elf, bool disk_file_has_build_id,
222 struct elf_build_id *build_id)
223 {
224 if (! disk_file_has_build_id && build_id->len > 0)
225 {
226 /* Module found in segments with build-id is more reliable
227 than a module found via DT_DEBUG on disk without any
228 build-id. */
229 return true;
230 }
231 if (disk_file_has_build_id && build_id->len > 0)
232 {
233 const void *elf_build_id;
234 ssize_t elf_build_id_len;
235
236 /* If there is a build id in the elf file, check it. */
237 elf_build_id_len = INTUSE(dwelf_elf_gnu_build_id) (elf, &elf_build_id);
238 if (elf_build_id_len > 0)
239 {
240 if (build_id->len != (size_t) elf_build_id_len
241 || memcmp (build_id->memory, elf_build_id, build_id->len) != 0)
242 return true;
243 }
244 }
245 return false;
246 }
247
248 static void
finish_portion(struct read_state * read_state,void ** data,size_t * data_size)249 finish_portion (struct read_state *read_state,
250 void **data, size_t *data_size)
251 {
252 if (*data_size != 0 && *data != NULL)
253 (*read_state->memory_callback) (read_state->dwfl, -1, data, data_size,
254 0, 0, read_state->memory_callback_arg);
255 }
256
257 static inline bool
read_portion(struct read_state * read_state,void ** data,size_t * data_size,GElf_Addr start,size_t segment,GElf_Addr vaddr,size_t filesz)258 read_portion (struct read_state *read_state,
259 void **data, size_t *data_size,
260 GElf_Addr start, size_t segment,
261 GElf_Addr vaddr, size_t filesz)
262 {
263 /* Check whether we will have to read the segment data, or if it
264 can be returned from the existing buffer. */
265 if (filesz > *read_state->buffer_available
266 || vaddr - start > *read_state->buffer_available - filesz
267 /* If we're in string mode, then don't consider the buffer we have
268 sufficient unless it contains the terminator of the string. */
269 || (filesz == 0 && memchr (vaddr - start + *read_state->buffer, '\0',
270 (*read_state->buffer_available
271 - (vaddr - start))) == NULL))
272 {
273 *data = NULL;
274 *data_size = filesz;
275 return !(*read_state->memory_callback) (read_state->dwfl,
276 addr_segndx (read_state->dwfl,
277 segment, vaddr,
278 false),
279 data, data_size, vaddr, filesz,
280 read_state->memory_callback_arg);
281 }
282
283 /* We already have this whole note segment from our initial read. */
284 *data = vaddr - start + (*read_state->buffer);
285 *data_size = 0;
286 return false;
287 }
288
289 int
dwfl_segment_report_module(Dwfl * dwfl,int ndx,const char * name,Dwfl_Memory_Callback * memory_callback,void * memory_callback_arg,Dwfl_Module_Callback * read_eagerly,void * read_eagerly_arg,size_t maxread,const void * note_file,size_t note_file_size,const struct r_debug_info * r_debug_info)290 dwfl_segment_report_module (Dwfl *dwfl, int ndx, const char *name,
291 Dwfl_Memory_Callback *memory_callback,
292 void *memory_callback_arg,
293 Dwfl_Module_Callback *read_eagerly,
294 void *read_eagerly_arg,
295 size_t maxread,
296 const void *note_file, size_t note_file_size,
297 const struct r_debug_info *r_debug_info)
298 {
299 size_t segment = ndx;
300 struct read_state read_state;
301
302 if (segment >= dwfl->lookup_elts)
303 segment = dwfl->lookup_elts - 1;
304
305 while (segment > 0
306 && (dwfl->lookup_segndx[segment] > ndx
307 || dwfl->lookup_segndx[segment] == -1))
308 --segment;
309
310 while (dwfl->lookup_segndx[segment] < ndx)
311 if (++segment == dwfl->lookup_elts)
312 return 0;
313
314 GElf_Addr start = dwfl->lookup_addr[segment];
315
316 /* First read in the file header and check its sanity. */
317
318 void *buffer = NULL;
319 size_t buffer_available = INITIAL_READ;
320 Elf *elf = NULL;
321 int fd = -1;
322
323 read_state.dwfl = dwfl;
324 read_state.memory_callback = memory_callback;
325 read_state.memory_callback_arg = memory_callback_arg;
326 read_state.buffer = &buffer;
327 read_state.buffer_available = &buffer_available;
328
329 /* We might have to reserve some memory for the phdrs. Set to NULL
330 here so we can always safely free it. */
331 void *phdrsp = NULL;
332
333 /* Collect the build ID bits here. */
334 struct elf_build_id build_id;
335 build_id.memory = NULL;
336 build_id.len = 0;
337 build_id.vaddr = 0;
338
339 if (! (*memory_callback) (dwfl, ndx, &buffer, &buffer_available,
340 start, sizeof (Elf64_Ehdr), memory_callback_arg)
341 || memcmp (buffer, ELFMAG, SELFMAG) != 0)
342 goto out;
343
344 /* Extract the information we need from the file header. */
345 const unsigned char *e_ident;
346 unsigned char ei_class;
347 unsigned char ei_data;
348 uint16_t e_type;
349 union
350 {
351 Elf32_Ehdr e32;
352 Elf64_Ehdr e64;
353 } ehdr;
354 GElf_Off phoff;
355 uint_fast16_t phnum;
356 uint_fast16_t phentsize;
357 GElf_Off shdrs_end;
358 Elf_Data xlatefrom =
359 {
360 .d_type = ELF_T_EHDR,
361 .d_buf = (void *) buffer,
362 .d_version = EV_CURRENT,
363 };
364 Elf_Data xlateto =
365 {
366 .d_type = ELF_T_EHDR,
367 .d_buf = &ehdr,
368 .d_size = sizeof ehdr,
369 .d_version = EV_CURRENT,
370 };
371 e_ident = ((const unsigned char *) buffer);
372 ei_class = e_ident[EI_CLASS];
373 ei_data = e_ident[EI_DATA];
374 /* buffer may be unaligned, in which case xlatetom would not work.
375 xlatetom does work when the in and out d_buf are equal (but not
376 for any other overlap). */
377 size_t ehdr_align = (ei_class == ELFCLASS32
378 ? __alignof__ (Elf32_Ehdr)
379 : __alignof__ (Elf64_Ehdr));
380 if (((uintptr_t) buffer & (ehdr_align - 1)) != 0)
381 {
382 memcpy (&ehdr, buffer,
383 (ei_class == ELFCLASS32
384 ? sizeof (Elf32_Ehdr)
385 : sizeof (Elf64_Ehdr)));
386 xlatefrom.d_buf = &ehdr;
387 }
388 switch (ei_class)
389 {
390 case ELFCLASS32:
391 xlatefrom.d_size = sizeof (Elf32_Ehdr);
392 if (elf32_xlatetom (&xlateto, &xlatefrom, ei_data) == NULL)
393 goto out;
394 e_type = ehdr.e32.e_type;
395 phoff = ehdr.e32.e_phoff;
396 phnum = ehdr.e32.e_phnum;
397 phentsize = ehdr.e32.e_phentsize;
398 if (phentsize != sizeof (Elf32_Phdr))
399 goto out;
400 /* NOTE if the number of sections is > 0xff00 then e_shnum
401 is zero and the actual number would come from the section
402 zero sh_size field. We ignore this here because getting shdrs
403 is just a nice bonus (see below in the type == PT_LOAD case
404 where we trim the last segment). */
405 shdrs_end = ehdr.e32.e_shoff + ehdr.e32.e_shnum * sizeof (Elf32_Shdr);
406 break;
407
408 case ELFCLASS64:
409 xlatefrom.d_size = sizeof (Elf64_Ehdr);
410 if (elf64_xlatetom (&xlateto, &xlatefrom, ei_data) == NULL)
411 goto out;
412 e_type = ehdr.e64.e_type;
413 phoff = ehdr.e64.e_phoff;
414 phnum = ehdr.e64.e_phnum;
415 phentsize = ehdr.e64.e_phentsize;
416 if (phentsize != sizeof (Elf64_Phdr))
417 goto out;
418 /* See the NOTE above for shdrs_end and ehdr.e32.e_shnum. */
419 shdrs_end = ehdr.e64.e_shoff + ehdr.e64.e_shnum * sizeof (Elf64_Shdr);
420 break;
421
422 default:
423 goto out;
424 }
425
426 /* The file header tells where to find the program headers.
427 These are what we need to find the boundaries of the module.
428 Without them, we don't have a module to report. */
429
430 if (phnum == 0)
431 goto out;
432
433 xlatefrom.d_type = xlateto.d_type = ELF_T_PHDR;
434 xlatefrom.d_size = phnum * phentsize;
435
436 void *ph_buffer = NULL;
437 size_t ph_buffer_size = 0;
438 if (read_portion (&read_state, &ph_buffer, &ph_buffer_size,
439 start, segment,
440 start + phoff, xlatefrom.d_size))
441 goto out;
442
443 xlatefrom.d_buf = ph_buffer;
444
445 bool class32 = ei_class == ELFCLASS32;
446 size_t phdr_size = class32 ? sizeof (Elf32_Phdr) : sizeof (Elf64_Phdr);
447 if (unlikely (phnum > SIZE_MAX / phdr_size))
448 goto out;
449 const size_t phdrsp_bytes = phnum * phdr_size;
450 phdrsp = malloc (phdrsp_bytes);
451 if (unlikely (phdrsp == NULL))
452 goto out;
453
454 xlateto.d_buf = phdrsp;
455 xlateto.d_size = phdrsp_bytes;
456
457 /* ph_ buffer may be unaligned, in which case xlatetom would not work.
458 xlatetom does work when the in and out d_buf are equal (but not
459 for any other overlap). */
460 size_t phdr_align = (class32
461 ? __alignof__ (Elf32_Phdr)
462 : __alignof__ (Elf64_Phdr));
463 if (((uintptr_t) ph_buffer & (phdr_align - 1)) != 0)
464 {
465 memcpy (phdrsp, ph_buffer, phdrsp_bytes);
466 xlatefrom.d_buf = phdrsp;
467 }
468
469 /* Track the bounds of the file visible in memory. */
470 GElf_Off file_trimmed_end = 0; /* Proper p_vaddr + p_filesz end. */
471 GElf_Off file_end = 0; /* Rounded up to effective page size. */
472 GElf_Off contiguous = 0; /* Visible as contiguous file from START. */
473 GElf_Off total_filesz = 0; /* Total size of data to read. */
474
475 /* Collect the bias between START and the containing PT_LOAD's p_vaddr. */
476 GElf_Addr bias = 0;
477 bool found_bias = false;
478
479 /* Collect the unbiased bounds of the module here. */
480 GElf_Addr module_start = -1l;
481 GElf_Addr module_end = 0;
482 GElf_Addr module_address_sync = 0;
483
484 /* If we see PT_DYNAMIC, record it here. */
485 GElf_Addr dyn_vaddr = 0;
486 GElf_Xword dyn_filesz = 0;
487
488 Elf32_Phdr *p32 = phdrsp;
489 Elf64_Phdr *p64 = phdrsp;
490 if ((ei_class == ELFCLASS32
491 && elf32_xlatetom (&xlateto, &xlatefrom, ei_data) == NULL)
492 || (ei_class == ELFCLASS64
493 && elf64_xlatetom (&xlateto, &xlatefrom, ei_data) == NULL))
494 {
495 found_bias = false; /* Trigger error check */
496 }
497 else
498 {
499 /* Consider each of the program headers we've read from the image. */
500 for (uint_fast16_t i = 0; i < phnum; ++i)
501 {
502 bool is32 = (ei_class == ELFCLASS32);
503 GElf_Word type = is32 ? p32[i].p_type : p64[i].p_type;
504 GElf_Addr vaddr = is32 ? p32[i].p_vaddr : p64[i].p_vaddr;
505 GElf_Xword memsz = is32 ? p32[i].p_memsz : p64[i].p_memsz;
506 GElf_Off offset = is32 ? p32[i].p_offset : p64[i].p_offset;
507 GElf_Xword filesz = is32 ? p32[i].p_filesz : p64[i].p_filesz;
508 GElf_Xword align = is32 ? p32[i].p_align : p64[i].p_align;
509
510 if (type == PT_DYNAMIC)
511 {
512 dyn_vaddr = vaddr;
513 dyn_filesz = filesz;
514 }
515 else if (type == PT_NOTE)
516 {
517 /* If we have already seen a build ID, we don't care any more. */
518 if (build_id.memory != NULL || filesz == 0)
519 continue; /* Next header */
520
521 /* We calculate from the p_offset of the note segment,
522 because we don't yet know the bias for its p_vaddr. */
523 const GElf_Addr note_vaddr = start + offset;
524 void *data = NULL;
525 size_t data_size = 0;
526 if (read_portion (&read_state, &data, &data_size,
527 start, segment, note_vaddr, filesz))
528 continue; /* Next header */
529
530 if (filesz > SIZE_MAX / sizeof (Elf32_Nhdr))
531 continue;
532
533 assert (sizeof (Elf32_Nhdr) == sizeof (Elf64_Nhdr));
534
535 void *notes;
536 if (ei_data == MY_ELFDATA
537 && (uintptr_t) data == (align == 8
538 ? NOTE_ALIGN8 ((uintptr_t) data)
539 : NOTE_ALIGN4 ((uintptr_t) data)))
540 notes = data;
541 else
542 {
543 const unsigned int xencoding = ehdr.e32.e_ident[EI_DATA];
544
545 if (filesz > SIZE_MAX / sizeof (Elf32_Nhdr))
546 continue;
547 notes = malloc (filesz);
548 if (unlikely (notes == NULL))
549 continue; /* Next header */
550 xlatefrom.d_type = xlateto.d_type = (align == 8
551 ? ELF_T_NHDR8
552 : ELF_T_NHDR);
553 xlatefrom.d_buf = (void *) data;
554 xlatefrom.d_size = filesz;
555 xlateto.d_buf = notes;
556 xlateto.d_size = filesz;
557
558 /* data may be unaligned, in which case xlatetom would not work.
559 xlatetom does work when the in and out d_buf are equal (but not
560 for any other overlap). */
561 if ((uintptr_t) data != (align == 8
562 ? NOTE_ALIGN8 ((uintptr_t) data)
563 : NOTE_ALIGN4 ((uintptr_t) data)))
564 {
565 memcpy (notes, data, filesz);
566 xlatefrom.d_buf = notes;
567 }
568
569 if (elf32_xlatetom (&xlateto, &xlatefrom, xencoding) == NULL)
570 {
571 free (notes);
572 finish_portion (&read_state, &data, &data_size);
573 continue;
574 }
575 }
576
577 const GElf_Nhdr *nh = notes;
578 size_t len = 0;
579 while (filesz - len > sizeof (*nh))
580 {
581 len += sizeof (*nh);
582
583 size_t namesz = nh->n_namesz;
584 namesz = align == 8 ? NOTE_ALIGN8 (namesz) : NOTE_ALIGN4 (namesz);
585 if (namesz > filesz - len || len + namesz < namesz)
586 break;
587
588 void *note_name = notes + len;
589 len += namesz;
590
591 size_t descsz = nh->n_descsz;
592 descsz = align == 8 ? NOTE_ALIGN8 (descsz) : NOTE_ALIGN4 (descsz);
593 if (descsz > filesz - len || len + descsz < descsz)
594 break;
595
596 void *note_desc = notes + len;
597 len += descsz;
598
599 /* We don't handle very short or really large build-ids. We need at
600 at least 3 and allow for up to 64 (normally ids are 20 long). */
601 #define MIN_BUILD_ID_BYTES 3
602 #define MAX_BUILD_ID_BYTES 64
603 if (nh->n_type == NT_GNU_BUILD_ID
604 && nh->n_descsz >= MIN_BUILD_ID_BYTES
605 && nh->n_descsz <= MAX_BUILD_ID_BYTES
606 && nh->n_namesz == sizeof "GNU"
607 && !memcmp (note_name, "GNU", sizeof "GNU"))
608 {
609 build_id.vaddr = (note_desc
610 - (const void *) notes
611 + note_vaddr);
612 build_id.len = nh->n_descsz;
613 build_id.memory = malloc (build_id.len);
614 if (likely (build_id.memory != NULL))
615 memcpy (build_id.memory, note_desc, build_id.len);
616 break;
617 }
618
619 nh = (void *) notes + len;
620 }
621
622 if (notes != data)
623 free (notes);
624 finish_portion (&read_state, &data, &data_size);
625 }
626 else if (type == PT_LOAD)
627 {
628 align = (dwfl->segment_align > 1
629 ? dwfl->segment_align : (align ?: 1));
630
631 GElf_Addr vaddr_end = (vaddr + memsz + align - 1) & -align;
632 GElf_Addr filesz_vaddr = (filesz < memsz
633 ? vaddr + filesz : vaddr_end);
634 GElf_Off filesz_offset = filesz_vaddr - vaddr + offset;
635
636 if (file_trimmed_end < offset + filesz)
637 {
638 file_trimmed_end = offset + filesz;
639
640 /* Trim the last segment so we don't bother with zeros
641 in the last page that are off the end of the file.
642 However, if the extra bit in that page includes the
643 section headers, keep them. */
644 if (shdrs_end <= filesz_offset
645 && shdrs_end > file_trimmed_end)
646 {
647 filesz += shdrs_end - file_trimmed_end;
648 file_trimmed_end = shdrs_end;
649 }
650 }
651
652 total_filesz += filesz;
653
654 if (file_end < filesz_offset)
655 {
656 file_end = filesz_offset;
657 if (filesz_vaddr - start == filesz_offset)
658 contiguous = file_end;
659 }
660
661 if (!found_bias && (offset & -align) == 0
662 && likely (filesz_offset >= phoff + phnum * phentsize))
663 {
664 bias = start - vaddr;
665 found_bias = true;
666 }
667
668 if ((vaddr & -align) < module_start)
669 {
670 module_start = vaddr & -align;
671 module_address_sync = vaddr + memsz;
672 }
673
674 if (module_end < vaddr_end)
675 module_end = vaddr_end;
676 }
677 }
678 }
679
680 finish_portion (&read_state, &ph_buffer, &ph_buffer_size);
681
682 /* We must have seen the segment covering offset 0, or else the ELF
683 header we read at START was not produced by these program headers. */
684 if (unlikely (!found_bias))
685 goto out;
686
687 /* Now we know enough to report a module for sure: its bounds. */
688 module_start += bias;
689 module_end += bias;
690
691 dyn_vaddr += bias;
692
693 /* NAME found from link map has precedence over DT_SONAME possibly read
694 below. */
695 bool name_is_final = false;
696
697 /* Try to match up DYN_VADDR against L_LD as found in link map.
698 Segments sniffing may guess invalid address as the first read-only memory
699 mapping may not be dumped to the core file (if ELF headers are not dumped)
700 and the ELF header is dumped first with the read/write mapping of the same
701 file at higher addresses. */
702 if (r_debug_info != NULL)
703 for (const struct r_debug_info_module *module = r_debug_info->module;
704 module != NULL; module = module->next)
705 if (module_start <= module->l_ld && module->l_ld < module_end)
706 {
707 /* L_LD read from link map must be right while DYN_VADDR is unsafe.
708 Therefore subtract DYN_VADDR and add L_LD to get a possibly
709 corrective displacement for all addresses computed so far. */
710 GElf_Addr fixup = module->l_ld - dyn_vaddr;
711 if ((fixup & (dwfl->segment_align - 1)) == 0
712 && module_start + fixup <= module->l_ld
713 && module->l_ld < module_end + fixup)
714 {
715 module_start += fixup;
716 module_end += fixup;
717 dyn_vaddr += fixup;
718 bias += fixup;
719 if (module->name[0] != '\0')
720 {
721 name = xbasename (module->name);
722 name_is_final = true;
723 }
724 break;
725 }
726 }
727
728 if (r_debug_info != NULL)
729 {
730 bool skip_this_module = false;
731 for (struct r_debug_info_module *module = r_debug_info->module;
732 module != NULL; module = module->next)
733 if ((module_end > module->start && module_start < module->end)
734 || dyn_vaddr == module->l_ld)
735 {
736 if (module->elf != NULL
737 && invalid_elf (module->elf, module->disk_file_has_build_id,
738 &build_id))
739 {
740 /* If MODULE's build-id doesn't match the disk file's
741 build-id, close ELF only if MODULE and ELF refer to
742 different builds of files with the same name. This
743 prevents premature closure of the correct ELF in cases
744 where segments of a module are non-contiguous in memory. */
745 if (name != NULL && module->name[0] != '\0'
746 && strcmp (xbasename (module->name), xbasename (name)) == 0)
747 {
748 elf_end (module->elf);
749 close (module->fd);
750 module->elf = NULL;
751 module->fd = -1;
752 }
753 }
754 else if (module->elf != NULL)
755 {
756 /* This module has already been reported. */
757 skip_this_module = true;
758 }
759 else
760 {
761 /* Only report this module if we haven't already done so. */
762 for (Dwfl_Module *mod = dwfl->modulelist; mod != NULL;
763 mod = mod->next)
764 if (mod->low_addr == module_start
765 && mod->high_addr == module_end)
766 skip_this_module = true;
767 }
768 }
769 if (skip_this_module)
770 goto out;
771 }
772
773 const char *file_note_name = handle_file_note (module_start, module_end,
774 ei_class, ei_data,
775 note_file, note_file_size);
776 if (file_note_name)
777 {
778 name = file_note_name;
779 name_is_final = true;
780 bool invalid = false;
781 fd = open (name, O_RDONLY);
782 if (fd >= 0)
783 {
784 Dwfl_Error error = __libdw_open_file (&fd, &elf, true, false);
785 if (error == DWFL_E_NOERROR)
786 invalid = invalid_elf (elf, true /* disk_file_has_build_id */,
787 &build_id);
788 }
789 if (invalid)
790 {
791 /* The file was there, but the build_id didn't match. We
792 still want to report the module, but need to get the ELF
793 some other way if possible. */
794 close (fd);
795 fd = -1;
796 elf_end (elf);
797 elf = NULL;
798 }
799 }
800
801 /* Examine its .dynamic section to get more interesting details.
802 If it has DT_SONAME, we'll use that as the module name.
803 If it has a DT_DEBUG, then it's actually a PIE rather than a DSO.
804 We need its DT_STRTAB and DT_STRSZ to decipher DT_SONAME,
805 and they also tell us the essential portion of the file
806 for fetching symbols. */
807 GElf_Addr soname_stroff = 0;
808 GElf_Addr dynstr_vaddr = 0;
809 GElf_Xword dynstrsz = 0;
810 bool execlike = false;
811 const size_t dyn_entsize = (ei_class == ELFCLASS32
812 ? sizeof (Elf32_Dyn) : sizeof (Elf64_Dyn));
813 void *dyn_data = NULL;
814 size_t dyn_data_size = 0;
815 if (dyn_filesz != 0 && dyn_filesz % dyn_entsize == 0
816 && ! read_portion (&read_state, &dyn_data, &dyn_data_size,
817 start, segment, dyn_vaddr, dyn_filesz))
818 {
819 if ((dyn_filesz / dyn_entsize) == 0
820 || dyn_filesz > (SIZE_MAX / dyn_entsize))
821 goto out;
822 void *dyns = malloc (dyn_filesz);
823 Elf32_Dyn *d32 = dyns;
824 Elf64_Dyn *d64 = dyns;
825 if (unlikely (dyns == NULL))
826 goto out;
827
828 xlatefrom.d_type = xlateto.d_type = ELF_T_DYN;
829 xlatefrom.d_buf = (void *) dyn_data;
830 xlatefrom.d_size = dyn_filesz;
831 xlateto.d_buf = dyns;
832 xlateto.d_size = dyn_filesz;
833
834 /* dyn_data may be unaligned, in which case xlatetom would not work.
835 xlatetom does work when the in and out d_buf are equal (but not
836 for any other overlap). */
837 bool is32 = (ei_class == ELFCLASS32);
838 size_t dyn_align = (is32
839 ? __alignof__ (Elf32_Dyn)
840 : __alignof__ (Elf64_Dyn));
841 if (((uintptr_t) dyn_data & (dyn_align - 1)) != 0)
842 {
843 memcpy (dyns, dyn_data, dyn_filesz);
844 xlatefrom.d_buf = dyns;
845 }
846
847 if ((is32 && elf32_xlatetom (&xlateto, &xlatefrom, ei_data) != NULL)
848 || (!is32 && elf64_xlatetom (&xlateto, &xlatefrom, ei_data) != NULL))
849 {
850 size_t n = (is32
851 ? (dyn_filesz / sizeof (Elf32_Dyn))
852 : (dyn_filesz / sizeof (Elf64_Dyn)));
853 for (size_t i = 0; i < n; ++i)
854 {
855 GElf_Sxword tag = is32 ? d32[i].d_tag : d64[i].d_tag;
856 GElf_Xword val = is32 ? d32[i].d_un.d_val : d64[i].d_un.d_val;
857
858 if (tag == DT_DEBUG)
859 execlike = true;
860 else if (tag == DT_SONAME)
861 soname_stroff = val;
862 else if (tag == DT_STRTAB)
863 dynstr_vaddr = val;
864 else if (tag == DT_STRSZ)
865 dynstrsz = val;
866 else
867 continue;
868
869 if (soname_stroff != 0 && dynstr_vaddr != 0 && dynstrsz != 0)
870 break;
871 }
872 }
873 free (dyns);
874 }
875 finish_portion (&read_state, &dyn_data, &dyn_data_size);
876
877 /* We'll use the name passed in or a stupid default if not DT_SONAME. */
878 if (name == NULL)
879 name = e_type == ET_EXEC ? "[exe]" : execlike ? "[pie]" : "[dso]";
880
881 void *soname = NULL;
882 size_t soname_size = 0;
883 if (! name_is_final && dynstrsz != 0 && dynstr_vaddr != 0)
884 {
885 /* We know the bounds of the .dynstr section.
886
887 The DYNSTR_VADDR pointer comes from the .dynamic section
888 (DT_STRTAB, detected above). Ordinarily the dynamic linker
889 will have adjusted this pointer in place so it's now an
890 absolute address. But sometimes .dynamic is read-only (in
891 vDSOs and odd architectures), and sometimes the adjustment
892 just hasn't happened yet in the memory image we looked at.
893 So treat DYNSTR_VADDR as an absolute address if it falls
894 within the module bounds, or try applying the phdr bias
895 when that adjusts it to fall within the module bounds. */
896
897 if ((dynstr_vaddr < module_start || dynstr_vaddr >= module_end)
898 && dynstr_vaddr + bias >= module_start
899 && dynstr_vaddr + bias < module_end)
900 dynstr_vaddr += bias;
901
902 if (unlikely (dynstr_vaddr + dynstrsz > module_end))
903 dynstrsz = 0;
904
905 /* Try to get the DT_SONAME string. */
906 if (soname_stroff != 0 && soname_stroff + 1 < dynstrsz
907 && ! read_portion (&read_state, &soname, &soname_size,
908 start, segment,
909 dynstr_vaddr + soname_stroff, 0))
910 name = soname;
911 }
912
913 /* Now that we have chosen the module's name and bounds, report it.
914 If we found a build ID, report that too. */
915
916 Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, name,
917 module_start, module_end);
918
919 // !execlike && ET_EXEC is PIE.
920 // execlike && !ET_EXEC is a static executable.
921 if (mod != NULL && (execlike || ehdr.e32.e_type == ET_EXEC))
922 mod->is_executable = true;
923
924 if (likely (mod != NULL) && build_id.memory != NULL
925 && unlikely (INTUSE(dwfl_module_report_build_id) (mod,
926 build_id.memory,
927 build_id.len,
928 build_id.vaddr)))
929 {
930 mod->gc = true;
931 mod = NULL;
932 }
933
934 /* At this point we do not need BUILD_ID or NAME any more.
935 They have been copied. */
936 free (build_id.memory);
937 build_id.memory = NULL;
938 finish_portion (&read_state, &soname, &soname_size);
939
940 if (unlikely (mod == NULL))
941 {
942 ndx = -1;
943 goto out;
944 }
945 else
946 ndx++;
947
948 /* We have reported the module. Now let the caller decide whether we
949 should read the whole thing in right now. */
950
951 const GElf_Off cost = (contiguous < file_trimmed_end ? total_filesz
952 : buffer_available >= contiguous ? 0
953 : contiguous - buffer_available);
954 const GElf_Off worthwhile = ((dynstr_vaddr == 0 || dynstrsz == 0) ? 0
955 : dynstr_vaddr + dynstrsz - start);
956 const GElf_Off whole = MAX (file_trimmed_end, shdrs_end);
957
958 if (elf == NULL
959 && (*read_eagerly) (MODCB_ARGS (mod), &buffer, &buffer_available,
960 cost, worthwhile, whole, contiguous,
961 read_eagerly_arg, &elf)
962 && elf == NULL)
963 {
964 /* The caller wants to read the whole file in right now, but hasn't
965 done it for us. Fill in a local image of the virtual file. */
966
967 if (file_trimmed_end > maxread)
968 file_trimmed_end = maxread;
969
970 void *contents = calloc (1, file_trimmed_end);
971 if (unlikely (contents == NULL))
972 goto out;
973
974 if (contiguous < file_trimmed_end)
975 {
976 /* We can't use the memory image verbatim as the file image.
977 So we'll be reading into a local image of the virtual file. */
978 for (uint_fast16_t i = 0; i < phnum; ++i)
979 {
980 bool is32 = (ei_class == ELFCLASS32);
981 GElf_Word type = is32 ? p32[i].p_type : p64[i].p_type;
982
983 if (type != PT_LOAD)
984 continue;
985
986 GElf_Addr vaddr = is32 ? p32[i].p_vaddr : p64[i].p_vaddr;
987 GElf_Off offset = is32 ? p32[i].p_offset : p64[i].p_offset;
988 GElf_Xword filesz = is32 ? p32[i].p_filesz : p64[i].p_filesz;
989
990 /* Don't try to read beyond the actual end of file. */
991 if (offset >= file_trimmed_end)
992 continue;
993
994 void *into = contents + offset;
995 size_t read_size = MIN (filesz, file_trimmed_end - offset);
996 (*memory_callback) (dwfl, addr_segndx (dwfl, segment,
997 vaddr + bias, false),
998 &into, &read_size, vaddr + bias, read_size,
999 memory_callback_arg);
1000 }
1001 }
1002 else
1003 {
1004 /* The whole file sits contiguous in memory,
1005 but the caller didn't want to just do it. */
1006
1007 const size_t have = MIN (buffer_available, file_trimmed_end);
1008 memcpy (contents, buffer, have);
1009
1010 if (have < file_trimmed_end)
1011 {
1012 void *into = contents + have;
1013 size_t read_size = file_trimmed_end - have;
1014 (*memory_callback) (dwfl,
1015 addr_segndx (dwfl, segment,
1016 start + have, false),
1017 &into, &read_size, start + have,
1018 read_size, memory_callback_arg);
1019 }
1020 }
1021
1022 elf = elf_memory (contents, file_trimmed_end);
1023 if (unlikely (elf == NULL))
1024 free (contents);
1025 else
1026 elf->flags |= ELF_F_MALLOCED;
1027 }
1028
1029 if (elf != NULL && mod->main.elf == NULL)
1030 {
1031 /* Install the file in the module. */
1032 mod->main.elf = elf;
1033 mod->main.fd = fd;
1034 elf = NULL;
1035 fd = -1;
1036 mod->main.vaddr = module_start - bias;
1037 mod->main.address_sync = module_address_sync;
1038 mod->main_bias = bias;
1039 }
1040
1041 out:
1042 if (build_id.memory != NULL)
1043 free (build_id.memory);
1044 free (phdrsp);
1045 if (buffer != NULL)
1046 (*memory_callback) (dwfl, -1, &buffer, &buffer_available, 0, 0,
1047 memory_callback_arg);
1048
1049 if (elf != NULL)
1050 elf_end (elf);
1051 if (fd != -1)
1052 close (fd);
1053 return ndx;
1054 }
1055