xref: /aosp_15_r20/external/google-breakpad/src/common/dwarf/bytereader.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1*9712c20fSFrederick Mayle // Copyright 2010 Google LLC
2*9712c20fSFrederick Mayle //
3*9712c20fSFrederick Mayle // Redistribution and use in source and binary forms, with or without
4*9712c20fSFrederick Mayle // modification, are permitted provided that the following conditions are
5*9712c20fSFrederick Mayle // met:
6*9712c20fSFrederick Mayle //
7*9712c20fSFrederick Mayle //     * Redistributions of source code must retain the above copyright
8*9712c20fSFrederick Mayle // notice, this list of conditions and the following disclaimer.
9*9712c20fSFrederick Mayle //     * Redistributions in binary form must reproduce the above
10*9712c20fSFrederick Mayle // copyright notice, this list of conditions and the following disclaimer
11*9712c20fSFrederick Mayle // in the documentation and/or other materials provided with the
12*9712c20fSFrederick Mayle // distribution.
13*9712c20fSFrederick Mayle //     * Neither the name of Google LLC nor the names of its
14*9712c20fSFrederick Mayle // contributors may be used to endorse or promote products derived from
15*9712c20fSFrederick Mayle // this software without specific prior written permission.
16*9712c20fSFrederick Mayle //
17*9712c20fSFrederick Mayle // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*9712c20fSFrederick Mayle // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*9712c20fSFrederick Mayle // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*9712c20fSFrederick Mayle // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21*9712c20fSFrederick Mayle // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22*9712c20fSFrederick Mayle // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23*9712c20fSFrederick Mayle // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*9712c20fSFrederick Mayle // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*9712c20fSFrederick Mayle // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*9712c20fSFrederick Mayle // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*9712c20fSFrederick Mayle // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*9712c20fSFrederick Mayle 
29*9712c20fSFrederick Mayle #ifdef HAVE_CONFIG_H
30*9712c20fSFrederick Mayle #include <config.h>  // Must come first
31*9712c20fSFrederick Mayle #endif
32*9712c20fSFrederick Mayle 
33*9712c20fSFrederick Mayle #include <assert.h>
34*9712c20fSFrederick Mayle #include <stdint.h>
35*9712c20fSFrederick Mayle #include <stdlib.h>
36*9712c20fSFrederick Mayle 
37*9712c20fSFrederick Mayle #include "common/dwarf/bytereader-inl.h"
38*9712c20fSFrederick Mayle #include "common/dwarf/bytereader.h"
39*9712c20fSFrederick Mayle 
40*9712c20fSFrederick Mayle namespace google_breakpad {
41*9712c20fSFrederick Mayle 
ByteReader(enum Endianness endian)42*9712c20fSFrederick Mayle ByteReader::ByteReader(enum Endianness endian)
43*9712c20fSFrederick Mayle     :offset_reader_(NULL), address_reader_(NULL), endian_(endian),
44*9712c20fSFrederick Mayle      address_size_(0), offset_size_(0),
45*9712c20fSFrederick Mayle      have_section_base_(), have_text_base_(), have_data_base_(),
46*9712c20fSFrederick Mayle      have_function_base_() { }
47*9712c20fSFrederick Mayle 
~ByteReader()48*9712c20fSFrederick Mayle ByteReader::~ByteReader() { }
49*9712c20fSFrederick Mayle 
SetOffsetSize(uint8_t size)50*9712c20fSFrederick Mayle void ByteReader::SetOffsetSize(uint8_t size) {
51*9712c20fSFrederick Mayle   offset_size_ = size;
52*9712c20fSFrederick Mayle   assert(size == 4 || size == 8);
53*9712c20fSFrederick Mayle   if (size == 4) {
54*9712c20fSFrederick Mayle     this->offset_reader_ = &ByteReader::ReadFourBytes;
55*9712c20fSFrederick Mayle   } else {
56*9712c20fSFrederick Mayle     this->offset_reader_ = &ByteReader::ReadEightBytes;
57*9712c20fSFrederick Mayle   }
58*9712c20fSFrederick Mayle }
59*9712c20fSFrederick Mayle 
SetAddressSize(uint8_t size)60*9712c20fSFrederick Mayle void ByteReader::SetAddressSize(uint8_t size) {
61*9712c20fSFrederick Mayle   address_size_ = size;
62*9712c20fSFrederick Mayle   assert(size == 4 || size == 8);
63*9712c20fSFrederick Mayle   if (size == 4) {
64*9712c20fSFrederick Mayle     this->address_reader_ = &ByteReader::ReadFourBytes;
65*9712c20fSFrederick Mayle   } else {
66*9712c20fSFrederick Mayle     this->address_reader_ = &ByteReader::ReadEightBytes;
67*9712c20fSFrederick Mayle   }
68*9712c20fSFrederick Mayle }
69*9712c20fSFrederick Mayle 
ReadInitialLength(const uint8_t * start,size_t * len)70*9712c20fSFrederick Mayle uint64_t ByteReader::ReadInitialLength(const uint8_t* start, size_t* len) {
71*9712c20fSFrederick Mayle   const uint64_t initial_length = ReadFourBytes(start);
72*9712c20fSFrederick Mayle   start += 4;
73*9712c20fSFrederick Mayle 
74*9712c20fSFrederick Mayle   // In DWARF2/3, if the initial length is all 1 bits, then the offset
75*9712c20fSFrederick Mayle   // size is 8 and we need to read the next 8 bytes for the real length.
76*9712c20fSFrederick Mayle   if (initial_length == 0xffffffff) {
77*9712c20fSFrederick Mayle     SetOffsetSize(8);
78*9712c20fSFrederick Mayle     *len = 12;
79*9712c20fSFrederick Mayle     return ReadOffset(start);
80*9712c20fSFrederick Mayle   } else {
81*9712c20fSFrederick Mayle     SetOffsetSize(4);
82*9712c20fSFrederick Mayle     *len = 4;
83*9712c20fSFrederick Mayle   }
84*9712c20fSFrederick Mayle   return initial_length;
85*9712c20fSFrederick Mayle }
86*9712c20fSFrederick Mayle 
ValidEncoding(DwarfPointerEncoding encoding) const87*9712c20fSFrederick Mayle bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const {
88*9712c20fSFrederick Mayle   if (encoding == DW_EH_PE_omit) return true;
89*9712c20fSFrederick Mayle   if (encoding == DW_EH_PE_aligned) return true;
90*9712c20fSFrederick Mayle   if ((encoding & 0x7) > DW_EH_PE_udata8)
91*9712c20fSFrederick Mayle     return false;
92*9712c20fSFrederick Mayle   if ((encoding & 0x70) > DW_EH_PE_funcrel)
93*9712c20fSFrederick Mayle     return false;
94*9712c20fSFrederick Mayle   return true;
95*9712c20fSFrederick Mayle }
96*9712c20fSFrederick Mayle 
UsableEncoding(DwarfPointerEncoding encoding) const97*9712c20fSFrederick Mayle bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const {
98*9712c20fSFrederick Mayle   switch (encoding & 0x70) {
99*9712c20fSFrederick Mayle     case DW_EH_PE_absptr:  return true;
100*9712c20fSFrederick Mayle     case DW_EH_PE_pcrel:   return have_section_base_;
101*9712c20fSFrederick Mayle     case DW_EH_PE_textrel: return have_text_base_;
102*9712c20fSFrederick Mayle     case DW_EH_PE_datarel: return have_data_base_;
103*9712c20fSFrederick Mayle     case DW_EH_PE_funcrel: return have_function_base_;
104*9712c20fSFrederick Mayle     default:               return false;
105*9712c20fSFrederick Mayle   }
106*9712c20fSFrederick Mayle }
107*9712c20fSFrederick Mayle 
ReadEncodedPointer(const uint8_t * buffer,DwarfPointerEncoding encoding,size_t * len) const108*9712c20fSFrederick Mayle uint64_t ByteReader::ReadEncodedPointer(const uint8_t* buffer,
109*9712c20fSFrederick Mayle                                       DwarfPointerEncoding encoding,
110*9712c20fSFrederick Mayle                                       size_t* len) const {
111*9712c20fSFrederick Mayle   // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't
112*9712c20fSFrederick Mayle   // see it here.
113*9712c20fSFrederick Mayle   assert(encoding != DW_EH_PE_omit);
114*9712c20fSFrederick Mayle 
115*9712c20fSFrederick Mayle   // The Linux Standards Base 4.0 does not make this clear, but the
116*9712c20fSFrederick Mayle   // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c)
117*9712c20fSFrederick Mayle   // agree that aligned pointers are always absolute, machine-sized,
118*9712c20fSFrederick Mayle   // machine-signed pointers.
119*9712c20fSFrederick Mayle   if (encoding == DW_EH_PE_aligned) {
120*9712c20fSFrederick Mayle     assert(have_section_base_);
121*9712c20fSFrederick Mayle 
122*9712c20fSFrederick Mayle     // We don't need to align BUFFER in *our* address space. Rather, we
123*9712c20fSFrederick Mayle     // need to find the next position in our buffer that would be aligned
124*9712c20fSFrederick Mayle     // when the .eh_frame section the buffer contains is loaded into the
125*9712c20fSFrederick Mayle     // program's memory. So align assuming that buffer_base_ gets loaded at
126*9712c20fSFrederick Mayle     // address section_base_, where section_base_ itself may or may not be
127*9712c20fSFrederick Mayle     // aligned.
128*9712c20fSFrederick Mayle 
129*9712c20fSFrederick Mayle     // First, find the offset to START from the closest prior aligned
130*9712c20fSFrederick Mayle     // address.
131*9712c20fSFrederick Mayle     uint64_t skew = section_base_ & (AddressSize() - 1);
132*9712c20fSFrederick Mayle     // Now find the offset from that aligned address to buffer.
133*9712c20fSFrederick Mayle     uint64_t offset = skew + (buffer - buffer_base_);
134*9712c20fSFrederick Mayle     // Round up to the next boundary.
135*9712c20fSFrederick Mayle     uint64_t aligned = (offset + AddressSize() - 1) & -AddressSize();
136*9712c20fSFrederick Mayle     // Convert back to a pointer.
137*9712c20fSFrederick Mayle     const uint8_t* aligned_buffer = buffer_base_ + (aligned - skew);
138*9712c20fSFrederick Mayle     // Finally, store the length and actually fetch the pointer.
139*9712c20fSFrederick Mayle     *len = aligned_buffer - buffer + AddressSize();
140*9712c20fSFrederick Mayle     return ReadAddress(aligned_buffer);
141*9712c20fSFrederick Mayle   }
142*9712c20fSFrederick Mayle 
143*9712c20fSFrederick Mayle   // Extract the value first, ignoring whether it's a pointer or an
144*9712c20fSFrederick Mayle   // offset relative to some base.
145*9712c20fSFrederick Mayle   uint64_t offset;
146*9712c20fSFrederick Mayle   switch (encoding & 0x0f) {
147*9712c20fSFrederick Mayle     case DW_EH_PE_absptr:
148*9712c20fSFrederick Mayle       // DW_EH_PE_absptr is weird, as it is used as a meaningful value for
149*9712c20fSFrederick Mayle       // both the high and low nybble of encoding bytes. When it appears in
150*9712c20fSFrederick Mayle       // the high nybble, it means that the pointer is absolute, not an
151*9712c20fSFrederick Mayle       // offset from some base address. When it appears in the low nybble,
152*9712c20fSFrederick Mayle       // as here, it means that the pointer is stored as a normal
153*9712c20fSFrederick Mayle       // machine-sized and machine-signed address. A low nybble of
154*9712c20fSFrederick Mayle       // DW_EH_PE_absptr does not imply that the pointer is absolute; it is
155*9712c20fSFrederick Mayle       // correct for us to treat the value as an offset from a base address
156*9712c20fSFrederick Mayle       // if the upper nybble is not DW_EH_PE_absptr.
157*9712c20fSFrederick Mayle       offset = ReadAddress(buffer);
158*9712c20fSFrederick Mayle       *len = AddressSize();
159*9712c20fSFrederick Mayle       break;
160*9712c20fSFrederick Mayle 
161*9712c20fSFrederick Mayle     case DW_EH_PE_uleb128:
162*9712c20fSFrederick Mayle       offset = ReadUnsignedLEB128(buffer, len);
163*9712c20fSFrederick Mayle       break;
164*9712c20fSFrederick Mayle 
165*9712c20fSFrederick Mayle     case DW_EH_PE_udata2:
166*9712c20fSFrederick Mayle       offset = ReadTwoBytes(buffer);
167*9712c20fSFrederick Mayle       *len = 2;
168*9712c20fSFrederick Mayle       break;
169*9712c20fSFrederick Mayle 
170*9712c20fSFrederick Mayle     case DW_EH_PE_udata4:
171*9712c20fSFrederick Mayle       offset = ReadFourBytes(buffer);
172*9712c20fSFrederick Mayle       *len = 4;
173*9712c20fSFrederick Mayle       break;
174*9712c20fSFrederick Mayle 
175*9712c20fSFrederick Mayle     case DW_EH_PE_udata8:
176*9712c20fSFrederick Mayle       offset = ReadEightBytes(buffer);
177*9712c20fSFrederick Mayle       *len = 8;
178*9712c20fSFrederick Mayle       break;
179*9712c20fSFrederick Mayle 
180*9712c20fSFrederick Mayle     case DW_EH_PE_sleb128:
181*9712c20fSFrederick Mayle       offset = ReadSignedLEB128(buffer, len);
182*9712c20fSFrederick Mayle       break;
183*9712c20fSFrederick Mayle 
184*9712c20fSFrederick Mayle     case DW_EH_PE_sdata2:
185*9712c20fSFrederick Mayle       offset = ReadTwoBytes(buffer);
186*9712c20fSFrederick Mayle       // Sign-extend from 16 bits.
187*9712c20fSFrederick Mayle       offset = (offset ^ 0x8000) - 0x8000;
188*9712c20fSFrederick Mayle       *len = 2;
189*9712c20fSFrederick Mayle       break;
190*9712c20fSFrederick Mayle 
191*9712c20fSFrederick Mayle     case DW_EH_PE_sdata4:
192*9712c20fSFrederick Mayle       offset = ReadFourBytes(buffer);
193*9712c20fSFrederick Mayle       // Sign-extend from 32 bits.
194*9712c20fSFrederick Mayle       offset = (offset ^ 0x80000000ULL) - 0x80000000ULL;
195*9712c20fSFrederick Mayle       *len = 4;
196*9712c20fSFrederick Mayle       break;
197*9712c20fSFrederick Mayle 
198*9712c20fSFrederick Mayle     case DW_EH_PE_sdata8:
199*9712c20fSFrederick Mayle       // No need to sign-extend; this is the full width of our type.
200*9712c20fSFrederick Mayle       offset = ReadEightBytes(buffer);
201*9712c20fSFrederick Mayle       *len = 8;
202*9712c20fSFrederick Mayle       break;
203*9712c20fSFrederick Mayle 
204*9712c20fSFrederick Mayle     default:
205*9712c20fSFrederick Mayle       abort();
206*9712c20fSFrederick Mayle   }
207*9712c20fSFrederick Mayle 
208*9712c20fSFrederick Mayle   // Find the appropriate base address.
209*9712c20fSFrederick Mayle   uint64_t base;
210*9712c20fSFrederick Mayle   switch (encoding & 0x70) {
211*9712c20fSFrederick Mayle     case DW_EH_PE_absptr:
212*9712c20fSFrederick Mayle       base = 0;
213*9712c20fSFrederick Mayle       break;
214*9712c20fSFrederick Mayle 
215*9712c20fSFrederick Mayle     case DW_EH_PE_pcrel:
216*9712c20fSFrederick Mayle       assert(have_section_base_);
217*9712c20fSFrederick Mayle       base = section_base_ + (buffer - buffer_base_);
218*9712c20fSFrederick Mayle       break;
219*9712c20fSFrederick Mayle 
220*9712c20fSFrederick Mayle     case DW_EH_PE_textrel:
221*9712c20fSFrederick Mayle       assert(have_text_base_);
222*9712c20fSFrederick Mayle       base = text_base_;
223*9712c20fSFrederick Mayle       break;
224*9712c20fSFrederick Mayle 
225*9712c20fSFrederick Mayle     case DW_EH_PE_datarel:
226*9712c20fSFrederick Mayle       assert(have_data_base_);
227*9712c20fSFrederick Mayle       base = data_base_;
228*9712c20fSFrederick Mayle       break;
229*9712c20fSFrederick Mayle 
230*9712c20fSFrederick Mayle     case DW_EH_PE_funcrel:
231*9712c20fSFrederick Mayle       assert(have_function_base_);
232*9712c20fSFrederick Mayle       base = function_base_;
233*9712c20fSFrederick Mayle       break;
234*9712c20fSFrederick Mayle 
235*9712c20fSFrederick Mayle     default:
236*9712c20fSFrederick Mayle       abort();
237*9712c20fSFrederick Mayle   }
238*9712c20fSFrederick Mayle 
239*9712c20fSFrederick Mayle   uint64_t pointer = base + offset;
240*9712c20fSFrederick Mayle 
241*9712c20fSFrederick Mayle   // Remove inappropriate upper bits.
242*9712c20fSFrederick Mayle   if (AddressSize() == 4)
243*9712c20fSFrederick Mayle     pointer = pointer & 0xffffffff;
244*9712c20fSFrederick Mayle   else
245*9712c20fSFrederick Mayle     assert(AddressSize() == sizeof(uint64_t));
246*9712c20fSFrederick Mayle 
247*9712c20fSFrederick Mayle   return pointer;
248*9712c20fSFrederick Mayle }
249*9712c20fSFrederick Mayle 
GetEndianness() const250*9712c20fSFrederick Mayle Endianness ByteReader::GetEndianness() const {
251*9712c20fSFrederick Mayle   return endian_;
252*9712c20fSFrederick Mayle }
253*9712c20fSFrederick Mayle 
254*9712c20fSFrederick Mayle }  // namespace google_breakpad
255