xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_rtld.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2014-2019 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "ac_rtld.h"
8 
9 #include "ac_binary.h"
10 #include "ac_gpu_info.h"
11 #include "util/compiler.h"
12 #include "util/u_dynarray.h"
13 #include "util/u_math.h"
14 
15 #include <gelf.h>
16 #include <libelf.h>
17 #include <stdarg.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 #ifndef EM_AMDGPU
23 // Old distributions may not have this enum constant
24 #define EM_AMDGPU 224
25 #endif
26 
27 #ifndef STT_AMDGPU_LDS
28 #define STT_AMDGPU_LDS 13 // this is deprecated -- remove
29 #endif
30 
31 #ifndef SHN_AMDGPU_LDS
32 #define SHN_AMDGPU_LDS 0xff00
33 #endif
34 
35 #ifndef R_AMDGPU_NONE
36 #define R_AMDGPU_NONE          0
37 #define R_AMDGPU_ABS32_LO      1
38 #define R_AMDGPU_ABS32_HI      2
39 #define R_AMDGPU_ABS64         3
40 #define R_AMDGPU_REL32         4
41 #define R_AMDGPU_REL64         5
42 #define R_AMDGPU_ABS32         6
43 #define R_AMDGPU_GOTPCREL      7
44 #define R_AMDGPU_GOTPCREL32_LO 8
45 #define R_AMDGPU_GOTPCREL32_HI 9
46 #define R_AMDGPU_REL32_LO      10
47 #define R_AMDGPU_REL32_HI      11
48 #define R_AMDGPU_RELATIVE64    13
49 #endif
50 
51 /* For the UMR disassembler. */
52 #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
53 #define DEBUGGER_NUM_MARKERS        5
54 
55 struct ac_rtld_section {
56    bool is_rx : 1;
57    bool is_pasted_text : 1;
58    uint64_t offset;
59    const char *name;
60 };
61 
62 struct ac_rtld_part {
63    Elf *elf;
64    struct ac_rtld_section *sections;
65    unsigned num_sections;
66 };
67 
report_errorvf(const char * fmt,va_list va)68 static void report_errorvf(const char *fmt, va_list va)
69 {
70    fprintf(stderr, "ac_rtld error: ");
71 
72    vfprintf(stderr, fmt, va);
73 
74    fprintf(stderr, "\n");
75 }
76 
77 static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
78 
report_errorf(const char * fmt,...)79 static void report_errorf(const char *fmt, ...)
80 {
81    va_list va;
82    va_start(va, fmt);
83    report_errorvf(fmt, va);
84    va_end(va);
85 }
86 
87 static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
88 
report_elf_errorf(const char * fmt,...)89 static void report_elf_errorf(const char *fmt, ...)
90 {
91    va_list va;
92    va_start(va, fmt);
93    report_errorvf(fmt, va);
94    va_end(va);
95 
96    fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
97 }
98 
99 /**
100  * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
101  * \p part_idx.
102  */
find_symbol(const struct util_dynarray * symbols,const char * name,unsigned part_idx)103 static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
104                                                 const char *name, unsigned part_idx)
105 {
106    util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
107       if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
108          return symbol;
109    }
110    return NULL;
111 }
112 
compare_symbol_by_align(const void * lhsp,const void * rhsp)113 static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
114 {
115    const struct ac_rtld_symbol *lhs = lhsp;
116    const struct ac_rtld_symbol *rhs = rhsp;
117    if (rhs->align > lhs->align)
118       return 1;
119    if (rhs->align < lhs->align)
120       return -1;
121    return 0;
122 }
123 
124 /**
125  * Sort the given symbol list by decreasing alignment and assign offsets.
126  */
layout_symbols(struct ac_rtld_symbol * symbols,unsigned num_symbols,uint64_t * ptotal_size)127 static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
128                            uint64_t *ptotal_size)
129 {
130    qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
131 
132    uint64_t total_size = *ptotal_size;
133 
134    for (unsigned i = 0; i < num_symbols; ++i) {
135       struct ac_rtld_symbol *s = &symbols[i];
136       assert(util_is_power_of_two_nonzero(s->align));
137 
138       total_size = align64(total_size, s->align);
139       s->offset = total_size;
140 
141       if (total_size + s->size < total_size) {
142          report_errorf("%s: size overflow", __func__);
143          return false;
144       }
145 
146       total_size += s->size;
147    }
148 
149    *ptotal_size = total_size;
150    return true;
151 }
152 
153 /**
154  * Read LDS symbols from the given \p section of the ELF of \p part and append
155  * them to the LDS symbols list.
156  *
157  * Shared LDS symbols are filtered out.
158  */
read_private_lds_symbols(struct ac_rtld_binary * binary,unsigned part_idx,Elf_Scn * section,uint32_t * lds_end_align)159 static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
160                                      Elf_Scn *section, uint32_t *lds_end_align)
161 {
162 #define report_if(cond)                                                                            \
163    do {                                                                                            \
164       if ((cond)) {                                                                                \
165          report_errorf(#cond);                                                                     \
166          return false;                                                                             \
167       }                                                                                            \
168    } while (false)
169 #define report_elf_if(cond)                                                                        \
170    do {                                                                                            \
171       if ((cond)) {                                                                                \
172          report_elf_errorf(#cond);                                                                 \
173          return false;                                                                             \
174       }                                                                                            \
175    } while (false)
176 
177    struct ac_rtld_part *part = &binary->parts[part_idx];
178    Elf64_Shdr *shdr = elf64_getshdr(section);
179    uint32_t strtabidx = shdr->sh_link;
180    Elf_Data *symbols_data = elf_getdata(section, NULL);
181    report_elf_if(!symbols_data);
182 
183    const Elf64_Sym *symbol = symbols_data->d_buf;
184    size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
185 
186    for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
187       struct ac_rtld_symbol s = {0};
188 
189       if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
190          /* old-style LDS symbols from initial prototype -- remove eventually */
191          s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
192       } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
193          s.align = MIN2(symbol->st_value, 1u << 16);
194          report_if(!util_is_power_of_two_nonzero(s.align));
195       } else
196          continue;
197 
198       report_if(symbol->st_size > 1u << 29);
199 
200       s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
201       s.size = symbol->st_size;
202       s.part_idx = part_idx;
203 
204       if (!strcmp(s.name, "__lds_end")) {
205          report_elf_if(s.size != 0);
206          *lds_end_align = MAX2(*lds_end_align, s.align);
207          continue;
208       }
209 
210       const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
211       if (shared) {
212          report_elf_if(s.align > shared->align);
213          report_elf_if(s.size > shared->size);
214          continue;
215       }
216 
217       util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
218    }
219 
220    return true;
221 
222 #undef report_if
223 #undef report_elf_if
224 }
225 
226 /**
227  * Open a binary consisting of one or more shader parts.
228  *
229  * \param binary the uninitialized struct
230  * \param i binary opening parameters
231  */
ac_rtld_open(struct ac_rtld_binary * binary,struct ac_rtld_open_info i)232 bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
233 {
234    /* One of the libelf implementations
235     * (http://www.mr511.de/software/english.htm) requires calling
236     * elf_version() before elf_memory().
237     */
238    elf_version(EV_CURRENT);
239 
240    memset(binary, 0, sizeof(*binary));
241    memcpy(&binary->options, &i.options, sizeof(binary->options));
242    binary->wave_size = i.wave_size;
243    binary->gfx_level = i.info->gfx_level;
244    binary->num_parts = i.num_parts;
245    binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
246    if (!binary->parts)
247       return false;
248 
249    uint64_t pasted_text_size = 0;
250    uint64_t rx_align = 1;
251    uint64_t rx_size = 0;
252    uint64_t exec_size = 0;
253 
254 #define report_if(cond)                                                                            \
255    do {                                                                                            \
256       if ((cond)) {                                                                                \
257          report_errorf(#cond);                                                                     \
258          goto fail;                                                                                \
259       }                                                                                            \
260    } while (false)
261 #define report_elf_if(cond)                                                                        \
262    do {                                                                                            \
263       if ((cond)) {                                                                                \
264          report_elf_errorf(#cond);                                                                 \
265          goto fail;                                                                                \
266       }                                                                                            \
267    } while (false)
268 
269    /* Copy and layout shared LDS symbols. */
270    if (i.num_shared_lds_symbols) {
271       if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
272                                 i.num_shared_lds_symbols))
273          goto fail;
274 
275       memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
276    }
277 
278    util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
279       symbol->part_idx = ~0u;
280 
281    unsigned max_lds_size = i.info->gfx_level == GFX6 ? 32 * 1024 : 64 * 1024;
282 
283    uint64_t shared_lds_size = 0;
284    if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
285       goto fail;
286 
287    if (shared_lds_size > max_lds_size) {
288       fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
289               (unsigned)shared_lds_size, max_lds_size);
290       goto fail;
291    }
292    binary->lds_size = shared_lds_size;
293 
294    /* First pass over all parts: open ELFs, pre-determine the placement of
295     * sections in the memory image, and collect and layout private LDS symbols. */
296    uint32_t lds_end_align = 0;
297 
298    if (binary->options.halt_at_entry)
299       pasted_text_size += 4;
300 
301    for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
302       struct ac_rtld_part *part = &binary->parts[part_idx];
303       unsigned part_lds_symbols_begin =
304          util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
305 
306       part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
307       report_elf_if(!part->elf);
308 
309       const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
310       report_elf_if(!ehdr);
311       report_if(ehdr->e_machine != EM_AMDGPU);
312 
313       size_t section_str_index;
314       size_t num_shdrs;
315       report_elf_if(elf_getshdrstrndx(part->elf, &section_str_index) < 0);
316       report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
317 
318       part->num_sections = num_shdrs;
319       part->sections = calloc(sizeof(*part->sections), num_shdrs);
320       report_if(!part->sections);
321 
322       Elf_Scn *section = NULL;
323       bool first_section = true;
324       while ((section = elf_nextscn(part->elf, section))) {
325          Elf64_Shdr *shdr = elf64_getshdr(section);
326          struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
327          s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
328          report_elf_if(!s->name);
329 
330          /* Cannot actually handle linked objects yet */
331          report_elf_if(shdr->sh_addr != 0);
332 
333          /* Alignment must be 0 or a power of two */
334          report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
335          uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
336 
337          if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
338             report_if(shdr->sh_flags & SHF_WRITE);
339 
340             s->is_rx = true;
341 
342             if (shdr->sh_flags & SHF_EXECINSTR) {
343                report_elf_if(shdr->sh_size & 3);
344 
345                if (!strcmp(s->name, ".text"))
346                   s->is_pasted_text = true;
347 
348                exec_size += shdr->sh_size;
349             }
350 
351             if (s->is_pasted_text) {
352                if (part_idx > 0 && first_section && binary->options.waitcnt_wa) {
353                   /* Reserve a dword at the beginning of this part. */
354                   exec_size += 4;
355                   pasted_text_size += 4;
356                   first_section = false;
357                }
358 
359                s->offset = pasted_text_size;
360                pasted_text_size += shdr->sh_size;
361             } else {
362                rx_align = align(rx_align, sh_align);
363                rx_size = align(rx_size, sh_align);
364                s->offset = rx_size;
365                rx_size += shdr->sh_size;
366             }
367          } else if (shdr->sh_type == SHT_SYMTAB) {
368             if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
369                goto fail;
370          }
371       }
372 
373       uint64_t part_lds_size = shared_lds_size;
374       if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
375                                                 part_lds_symbols_begin),
376                           util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
377                              part_lds_symbols_begin,
378                           &part_lds_size))
379          goto fail;
380       binary->lds_size = MAX2(binary->lds_size, part_lds_size);
381    }
382 
383    binary->rx_end_markers = pasted_text_size;
384    pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
385 
386    /* __lds_end is a special symbol that points at the end of the memory
387     * occupied by other LDS symbols. Its alignment is taken as the
388     * maximum of its alignment over all shader parts where it occurs.
389     */
390    if (lds_end_align) {
391       binary->lds_size = align(binary->lds_size, lds_end_align);
392 
393       struct ac_rtld_symbol *lds_end =
394          util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
395       lds_end->name = "__lds_end";
396       lds_end->size = 0;
397       lds_end->align = lds_end_align;
398       lds_end->offset = binary->lds_size;
399       lds_end->part_idx = ~0u;
400    }
401 
402    if (binary->lds_size > max_lds_size) {
403       fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
404               (unsigned)binary->lds_size, max_lds_size);
405       goto fail;
406    }
407 
408    /* Second pass: Adjust offsets of non-pasted text sections. */
409    binary->rx_size = pasted_text_size;
410    binary->rx_size = align(binary->rx_size, rx_align);
411 
412    for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
413       struct ac_rtld_part *part = &binary->parts[part_idx];
414       size_t num_shdrs;
415       elf_getshdrnum(part->elf, &num_shdrs);
416 
417       for (unsigned j = 0; j < num_shdrs; ++j) {
418          struct ac_rtld_section *s = &part->sections[j];
419          if (s->is_rx && !s->is_pasted_text)
420             s->offset += binary->rx_size;
421       }
422    }
423 
424    binary->rx_size += rx_size;
425    binary->exec_size = exec_size;
426 
427    return true;
428 
429 #undef report_if
430 #undef report_elf_if
431 
432 fail:
433    ac_rtld_close(binary);
434    return false;
435 }
436 
ac_rtld_close(struct ac_rtld_binary * binary)437 void ac_rtld_close(struct ac_rtld_binary *binary)
438 {
439    for (unsigned i = 0; i < binary->num_parts; ++i) {
440       struct ac_rtld_part *part = &binary->parts[i];
441       free(part->sections);
442       elf_end(part->elf);
443    }
444 
445    util_dynarray_fini(&binary->lds_symbols);
446    free(binary->parts);
447    binary->parts = NULL;
448    binary->num_parts = 0;
449 }
450 
get_section_by_name(struct ac_rtld_part * part,const char * name,const char ** data,size_t * nbytes)451 static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
452                                 size_t *nbytes)
453 {
454    for (unsigned i = 0; i < part->num_sections; ++i) {
455       struct ac_rtld_section *s = &part->sections[i];
456       if (s->name && !strcmp(name, s->name)) {
457          Elf_Scn *target_scn = elf_getscn(part->elf, i);
458          Elf_Data *target_data = elf_getdata(target_scn, NULL);
459          if (!target_data) {
460             report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
461             return false;
462          }
463 
464          *data = target_data->d_buf;
465          *nbytes = target_data->d_size;
466          return true;
467       }
468    }
469    return false;
470 }
471 
ac_rtld_get_section_by_name(struct ac_rtld_binary * binary,const char * name,const char ** data,size_t * nbytes)472 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
473                                  size_t *nbytes)
474 {
475    assert(binary->num_parts == 1);
476    return get_section_by_name(&binary->parts[0], name, data, nbytes);
477 }
478 
ac_rtld_read_config(const struct radeon_info * info,struct ac_rtld_binary * binary,struct ac_shader_config * config)479 bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
480                          struct ac_shader_config *config)
481 {
482    for (unsigned i = 0; i < binary->num_parts; ++i) {
483       struct ac_rtld_part *part = &binary->parts[i];
484       const char *config_data;
485       size_t config_nbytes;
486 
487       if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
488          return false;
489 
490       /* TODO: be precise about scratch use? */
491       struct ac_shader_config c = {0};
492       ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, info, &c);
493 
494       config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
495       config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
496       config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
497       config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
498       config->scratch_bytes_per_wave =
499          MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
500 
501       assert(i == 0 || config->float_mode == c.float_mode);
502       config->float_mode = c.float_mode;
503 
504       /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
505        * the main shader part is used. */
506       assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
507       config->spi_ps_input_ena = c.spi_ps_input_ena;
508       config->spi_ps_input_addr = c.spi_ps_input_addr;
509 
510       /* TODO: consistently use LDS symbols for this */
511       config->lds_size = MAX2(config->lds_size, c.lds_size);
512 
513       /* TODO: Should we combine these somehow? It's currently only
514        * used for radeonsi's compute, where multiple parts aren't used. */
515       assert(config->rsrc1 == 0 && config->rsrc2 == 0);
516       config->rsrc1 = c.rsrc1;
517       config->rsrc2 = c.rsrc2;
518    }
519 
520    return true;
521 }
522 
resolve_symbol(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Sym * sym,const char * name,uint64_t * value)523 static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
524                            const Elf64_Sym *sym, const char *name, uint64_t *value)
525 {
526    /* TODO: properly disentangle the undef and the LDS cases once
527     * STT_AMDGPU_LDS is retired. */
528    if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
529       const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
530 
531       if (lds_sym) {
532          *value = lds_sym->offset;
533          return true;
534       }
535 
536       /* TODO: resolve from other parts */
537 
538       if (u->get_external_symbol(u->binary->gfx_level, u->cb_data, name, value))
539          return true;
540 
541       report_errorf("symbol %s: unknown", name);
542       return false;
543    }
544 
545    struct ac_rtld_part *part = &u->binary->parts[part_idx];
546    if (sym->st_shndx >= part->num_sections) {
547       report_errorf("symbol %s: section out of bounds", name);
548       return false;
549    }
550 
551    struct ac_rtld_section *s = &part->sections[sym->st_shndx];
552    if (!s->is_rx) {
553       report_errorf("symbol %s: bad section", name);
554       return false;
555    }
556 
557    uint64_t section_base = u->rx_va + s->offset;
558 
559    *value = section_base + sym->st_value;
560    return true;
561 }
562 
apply_relocs(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Shdr * reloc_shdr,const Elf_Data * reloc_data)563 static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
564                          const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
565 {
566 #define report_if(cond)                                                                            \
567    do {                                                                                            \
568       if ((cond)) {                                                                                \
569          report_errorf(#cond);                                                                     \
570          return false;                                                                             \
571       }                                                                                            \
572    } while (false)
573 #define report_elf_if(cond)                                                                        \
574    do {                                                                                            \
575       if ((cond)) {                                                                                \
576          report_elf_errorf(#cond);                                                                 \
577          return false;                                                                             \
578       }                                                                                            \
579    } while (false)
580 
581    struct ac_rtld_part *part = &u->binary->parts[part_idx];
582    Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
583    report_elf_if(!target_scn);
584 
585    Elf_Data *target_data = elf_getdata(target_scn, NULL);
586    report_elf_if(!target_data);
587 
588    Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
589    report_elf_if(!symbols_scn);
590 
591    Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
592    report_elf_if(!symbols_shdr);
593    uint32_t strtabidx = symbols_shdr->sh_link;
594 
595    Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
596    report_elf_if(!symbols_data);
597 
598    const Elf64_Sym *symbols = symbols_data->d_buf;
599    size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
600 
601    struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
602    report_if(!s->is_rx);
603 
604    const char *orig_base = target_data->d_buf;
605    char *dst_base = u->rx_ptr + s->offset;
606    uint64_t va_base = u->rx_va + s->offset;
607 
608    Elf64_Rel *rel = reloc_data->d_buf;
609    size_t num_relocs = reloc_data->d_size / sizeof(*rel);
610    for (size_t i = 0; i < num_relocs; ++i, ++rel) {
611       size_t r_sym = ELF64_R_SYM(rel->r_info);
612       unsigned r_type = ELF64_R_TYPE(rel->r_info);
613 
614       const char *orig_ptr = orig_base + rel->r_offset;
615       char *dst_ptr = dst_base + rel->r_offset;
616       uint64_t va = va_base + rel->r_offset;
617 
618       uint64_t symbol;
619       uint64_t addend;
620 
621       if (r_sym == STN_UNDEF) {
622          symbol = 0;
623       } else {
624          report_elf_if(r_sym >= num_symbols);
625 
626          const Elf64_Sym *sym = &symbols[r_sym];
627          const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
628          report_elf_if(!symbol_name);
629 
630          if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
631             return false;
632       }
633 
634       /* TODO: Should we also support .rela sections, where the
635        * addend is part of the relocation record? */
636 
637       /* Load the addend from the ELF instead of the destination,
638        * because the destination may be in VRAM. */
639       switch (r_type) {
640       case R_AMDGPU_ABS32:
641       case R_AMDGPU_ABS32_LO:
642       case R_AMDGPU_ABS32_HI:
643       case R_AMDGPU_REL32:
644       case R_AMDGPU_REL32_LO:
645       case R_AMDGPU_REL32_HI:
646          addend = *(const uint32_t *)orig_ptr;
647          break;
648       case R_AMDGPU_ABS64:
649       case R_AMDGPU_REL64:
650          addend = *(const uint64_t *)orig_ptr;
651          break;
652       default:
653          report_errorf("unsupported r_type == %u", r_type);
654          return false;
655       }
656 
657       uint64_t abs = symbol + addend;
658 
659       switch (r_type) {
660       case R_AMDGPU_ABS32:
661          assert((uint32_t)abs == abs);
662          FALLTHROUGH;
663       case R_AMDGPU_ABS32_LO:
664          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
665          break;
666       case R_AMDGPU_ABS32_HI:
667          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
668          break;
669       case R_AMDGPU_ABS64:
670          *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
671          break;
672       case R_AMDGPU_REL32:
673          assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
674          FALLTHROUGH;
675       case R_AMDGPU_REL32_LO:
676          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
677          break;
678       case R_AMDGPU_REL32_HI:
679          *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
680          break;
681       case R_AMDGPU_REL64:
682          *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
683          break;
684       default:
685          unreachable("bad r_type");
686       }
687    }
688 
689    return true;
690 
691 #undef report_if
692 #undef report_elf_if
693 }
694 
695 /**
696  * Upload the binary or binaries to the provided GPU buffers, including
697  * relocations.
698  */
ac_rtld_upload(struct ac_rtld_upload_info * u)699 int ac_rtld_upload(struct ac_rtld_upload_info *u)
700 {
701 #define report_if(cond)                                                                            \
702    do {                                                                                            \
703       if ((cond)) {                                                                                \
704          report_errorf(#cond);                                                                     \
705          return -1;                                                                             \
706       }                                                                                            \
707    } while (false)
708 #define report_elf_if(cond)                                                                        \
709    do {                                                                                            \
710       if ((cond)) {                                                                                \
711          report_errorf(#cond);                                                                     \
712          return -1;                                                                             \
713       }                                                                                            \
714    } while (false)
715 
716    int size = 0;
717    if (u->binary->options.halt_at_entry) {
718       /* s_sethalt 1 */
719       *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
720    }
721 
722    /* First pass: upload raw section data and lay out private LDS symbols. */
723    for (unsigned i = 0; i < u->binary->num_parts; ++i) {
724       struct ac_rtld_part *part = &u->binary->parts[i];
725 
726       bool first_section = true;
727       Elf_Scn *section = NULL;
728       while ((section = elf_nextscn(part->elf, section))) {
729          Elf64_Shdr *shdr = elf64_getshdr(section);
730          struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
731 
732          if (!s->is_rx)
733             continue;
734 
735          report_if(shdr->sh_type != SHT_PROGBITS);
736 
737          Elf_Data *data = elf_getdata(section, NULL);
738          report_elf_if(!data || data->d_size != shdr->sh_size);
739 
740          if (i > 0 && first_section && u->binary->options.waitcnt_wa) {
741             assert(s->offset >= 4);
742             *(uint32_t *)(u->rx_ptr + s->offset - 4) = util_cpu_to_le32(0xbf880fff);
743             first_section = false;
744          }
745 
746          memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
747 
748          size = MAX2(size, s->offset + shdr->sh_size);
749       }
750    }
751 
752    if (u->binary->rx_end_markers) {
753       uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
754       for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
755          *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
756       size += 4 * DEBUGGER_NUM_MARKERS;
757    }
758 
759    /* Second pass: handle relocations, overwriting uploaded data where
760     * appropriate. */
761    for (unsigned i = 0; i < u->binary->num_parts; ++i) {
762       struct ac_rtld_part *part = &u->binary->parts[i];
763       Elf_Scn *section = NULL;
764       while ((section = elf_nextscn(part->elf, section))) {
765          Elf64_Shdr *shdr = elf64_getshdr(section);
766          if (shdr->sh_type == SHT_REL) {
767             Elf_Data *relocs = elf_getdata(section, NULL);
768             report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
769             if (!apply_relocs(u, i, shdr, relocs))
770                return -1;
771          } else if (shdr->sh_type == SHT_RELA) {
772             report_errorf("SHT_RELA not supported");
773             return -1;
774          }
775       }
776    }
777 
778    return size;
779 
780 #undef report_if
781 #undef report_elf_if
782 }
783