xref: /aosp_15_r20/external/google-breakpad/src/common/test_assembler.h (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1*9712c20fSFrederick Mayle // -*- mode: C++ -*-
2*9712c20fSFrederick Mayle 
3*9712c20fSFrederick Mayle // Copyright 2010 Google LLC
4*9712c20fSFrederick Mayle //
5*9712c20fSFrederick Mayle // Redistribution and use in source and binary forms, with or without
6*9712c20fSFrederick Mayle // modification, are permitted provided that the following conditions are
7*9712c20fSFrederick Mayle // met:
8*9712c20fSFrederick Mayle //
9*9712c20fSFrederick Mayle //     * Redistributions of source code must retain the above copyright
10*9712c20fSFrederick Mayle // notice, this list of conditions and the following disclaimer.
11*9712c20fSFrederick Mayle //     * Redistributions in binary form must reproduce the above
12*9712c20fSFrederick Mayle // copyright notice, this list of conditions and the following disclaimer
13*9712c20fSFrederick Mayle // in the documentation and/or other materials provided with the
14*9712c20fSFrederick Mayle // distribution.
15*9712c20fSFrederick Mayle //     * Neither the name of Google LLC nor the names of its
16*9712c20fSFrederick Mayle // contributors may be used to endorse or promote products derived from
17*9712c20fSFrederick Mayle // this software without specific prior written permission.
18*9712c20fSFrederick Mayle //
19*9712c20fSFrederick Mayle // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20*9712c20fSFrederick Mayle // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21*9712c20fSFrederick Mayle // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22*9712c20fSFrederick Mayle // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23*9712c20fSFrederick Mayle // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24*9712c20fSFrederick Mayle // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25*9712c20fSFrederick Mayle // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26*9712c20fSFrederick Mayle // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27*9712c20fSFrederick Mayle // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28*9712c20fSFrederick Mayle // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29*9712c20fSFrederick Mayle // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*9712c20fSFrederick Mayle 
31*9712c20fSFrederick Mayle // Original author: Jim Blandy <[email protected]> <[email protected]>
32*9712c20fSFrederick Mayle 
33*9712c20fSFrederick Mayle // test-assembler.h: interface to class for building complex binary streams.
34*9712c20fSFrederick Mayle 
35*9712c20fSFrederick Mayle // To test the Breakpad symbol dumper and processor thoroughly, for
36*9712c20fSFrederick Mayle // all combinations of host system and minidump processor
37*9712c20fSFrederick Mayle // architecture, we need to be able to easily generate complex test
38*9712c20fSFrederick Mayle // data like debugging information and minidump files.
39*9712c20fSFrederick Mayle //
40*9712c20fSFrederick Mayle // For example, if we want our unit tests to provide full code
41*9712c20fSFrederick Mayle // coverage for stack walking, it may be difficult to persuade the
42*9712c20fSFrederick Mayle // compiler to generate every possible sort of stack walking
43*9712c20fSFrederick Mayle // information that we want to support; there are probably DWARF CFI
44*9712c20fSFrederick Mayle // opcodes that GCC never emits. Similarly, if we want to test our
45*9712c20fSFrederick Mayle // error handling, we will need to generate damaged minidumps or
46*9712c20fSFrederick Mayle // debugging information that (we hope) the client or compiler will
47*9712c20fSFrederick Mayle // never produce on its own.
48*9712c20fSFrederick Mayle //
49*9712c20fSFrederick Mayle // google_breakpad::TestAssembler provides a predictable and
50*9712c20fSFrederick Mayle // (relatively) simple way to generate complex formatted data streams
51*9712c20fSFrederick Mayle // like minidumps and CFI. Furthermore, because TestAssembler is
52*9712c20fSFrederick Mayle // portable, developers without access to (say) Visual Studio or a
53*9712c20fSFrederick Mayle // SPARC assembler can still work on test data for those targets.
54*9712c20fSFrederick Mayle 
55*9712c20fSFrederick Mayle #ifndef PROCESSOR_TEST_ASSEMBLER_H_
56*9712c20fSFrederick Mayle #define PROCESSOR_TEST_ASSEMBLER_H_
57*9712c20fSFrederick Mayle 
58*9712c20fSFrederick Mayle #include <list>
59*9712c20fSFrederick Mayle #include <vector>
60*9712c20fSFrederick Mayle #include <string>
61*9712c20fSFrederick Mayle 
62*9712c20fSFrederick Mayle #include "common/using_std_string.h"
63*9712c20fSFrederick Mayle #include "google_breakpad/common/breakpad_types.h"
64*9712c20fSFrederick Mayle 
65*9712c20fSFrederick Mayle namespace google_breakpad {
66*9712c20fSFrederick Mayle 
67*9712c20fSFrederick Mayle using std::list;
68*9712c20fSFrederick Mayle using std::vector;
69*9712c20fSFrederick Mayle 
70*9712c20fSFrederick Mayle namespace test_assembler {
71*9712c20fSFrederick Mayle 
72*9712c20fSFrederick Mayle // A Label represents a value not yet known that we need to store in a
73*9712c20fSFrederick Mayle // section. As long as all the labels a section refers to are defined
74*9712c20fSFrederick Mayle // by the time we retrieve its contents as bytes, we can use undefined
75*9712c20fSFrederick Mayle // labels freely in that section's construction.
76*9712c20fSFrederick Mayle //
77*9712c20fSFrederick Mayle // A label can be in one of three states:
78*9712c20fSFrederick Mayle // - undefined,
79*9712c20fSFrederick Mayle // - defined as the sum of some other label and a constant, or
80*9712c20fSFrederick Mayle // - a constant.
81*9712c20fSFrederick Mayle //
82*9712c20fSFrederick Mayle // A label's value never changes, but it can accumulate constraints.
83*9712c20fSFrederick Mayle // Adding labels and integers is permitted, and yields a label.
84*9712c20fSFrederick Mayle // Subtracting a constant from a label is permitted, and also yields a
85*9712c20fSFrederick Mayle // label. Subtracting two labels that have some relationship to each
86*9712c20fSFrederick Mayle // other is permitted, and yields a constant.
87*9712c20fSFrederick Mayle //
88*9712c20fSFrederick Mayle // For example:
89*9712c20fSFrederick Mayle //
90*9712c20fSFrederick Mayle //   Label a;               // a's value is undefined
91*9712c20fSFrederick Mayle //   Label b;               // b's value is undefined
92*9712c20fSFrederick Mayle //   {
93*9712c20fSFrederick Mayle //     Label c = a + 4;     // okay, even though a's value is unknown
94*9712c20fSFrederick Mayle //     b = c + 4;           // also okay; b is now a+8
95*9712c20fSFrederick Mayle //   }
96*9712c20fSFrederick Mayle //   Label d = b - 2;       // okay; d == a+6, even though c is gone
97*9712c20fSFrederick Mayle //   d.Value();             // error: d's value is not yet known
98*9712c20fSFrederick Mayle //   d - a;                 // is 6, even though their values are not known
99*9712c20fSFrederick Mayle //   a = 12;                // now b == 20, and d == 18
100*9712c20fSFrederick Mayle //   d.Value();             // 18: no longer an error
101*9712c20fSFrederick Mayle //   b.Value();             // 20
102*9712c20fSFrederick Mayle //   d = 10;                // error: d is already defined.
103*9712c20fSFrederick Mayle //
104*9712c20fSFrederick Mayle // Label objects' lifetimes are unconstrained: notice that, in the
105*9712c20fSFrederick Mayle // above example, even though a and b are only related through c, and
106*9712c20fSFrederick Mayle // c goes out of scope, the assignment to a sets b's value as well. In
107*9712c20fSFrederick Mayle // particular, it's not necessary to ensure that a Label lives beyond
108*9712c20fSFrederick Mayle // Sections that refer to it.
109*9712c20fSFrederick Mayle class Label {
110*9712c20fSFrederick Mayle  public:
111*9712c20fSFrederick Mayle   Label();                      // An undefined label.
112*9712c20fSFrederick Mayle   Label(uint64_t value);       // A label with a fixed value
113*9712c20fSFrederick Mayle   Label(const Label& value);    // A label equal to another.
114*9712c20fSFrederick Mayle   ~Label();
115*9712c20fSFrederick Mayle 
116*9712c20fSFrederick Mayle   // Return this label's value; it must be known.
117*9712c20fSFrederick Mayle   //
118*9712c20fSFrederick Mayle   // Providing this as a cast operator is nifty, but the conversions
119*9712c20fSFrederick Mayle   // happen in unexpected places. In particular, ISO C++ says that
120*9712c20fSFrederick Mayle   // Label + size_t becomes ambigious, because it can't decide whether
121*9712c20fSFrederick Mayle   // to convert the Label to a uint64_t and then to a size_t, or use
122*9712c20fSFrederick Mayle   // the overloaded operator that returns a new label, even though the
123*9712c20fSFrederick Mayle   // former could fail if the label is not yet defined and the latter won't.
124*9712c20fSFrederick Mayle   uint64_t Value() const;
125*9712c20fSFrederick Mayle 
126*9712c20fSFrederick Mayle   Label& operator=(uint64_t value);
127*9712c20fSFrederick Mayle   Label& operator=(const Label& value);
128*9712c20fSFrederick Mayle   Label operator+(uint64_t addend) const;
129*9712c20fSFrederick Mayle   Label operator-(uint64_t subtrahend) const;
130*9712c20fSFrederick Mayle   uint64_t operator-(const Label& subtrahend) const;
131*9712c20fSFrederick Mayle 
132*9712c20fSFrederick Mayle   // We could also provide == and != that work on undefined, but
133*9712c20fSFrederick Mayle   // related, labels.
134*9712c20fSFrederick Mayle 
135*9712c20fSFrederick Mayle   // Return true if this label's value is known. If VALUE_P is given,
136*9712c20fSFrederick Mayle   // set *VALUE_P to the known value if returning true.
137*9712c20fSFrederick Mayle   bool IsKnownConstant(uint64_t* value_p = NULL) const;
138*9712c20fSFrederick Mayle 
139*9712c20fSFrederick Mayle   // Return true if the offset from LABEL to this label is known. If
140*9712c20fSFrederick Mayle   // OFFSET_P is given, set *OFFSET_P to the offset when returning true.
141*9712c20fSFrederick Mayle   //
142*9712c20fSFrederick Mayle   // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m',
143*9712c20fSFrederick Mayle   // except that it also returns a value indicating whether the
144*9712c20fSFrederick Mayle   // subtraction is possible given what we currently know of l and m.
145*9712c20fSFrederick Mayle   // It can be possible even if we don't know l and m's values. For
146*9712c20fSFrederick Mayle   // example:
147*9712c20fSFrederick Mayle   //
148*9712c20fSFrederick Mayle   //   Label l, m;
149*9712c20fSFrederick Mayle   //   m = l + 10;
150*9712c20fSFrederick Mayle   //   l.IsKnownConstant();             // false
151*9712c20fSFrederick Mayle   //   m.IsKnownConstant();             // false
152*9712c20fSFrederick Mayle   //   uint64_t d;
153*9712c20fSFrederick Mayle   //   l.IsKnownOffsetFrom(m, &d);      // true, and sets d to -10.
154*9712c20fSFrederick Mayle   //   l-m                              // -10
155*9712c20fSFrederick Mayle   //   m-l                              // 10
156*9712c20fSFrederick Mayle   //   m.Value()                        // error: m's value is not known
157*9712c20fSFrederick Mayle   bool IsKnownOffsetFrom(const Label& label, uint64_t* offset_p = NULL) const;
158*9712c20fSFrederick Mayle 
159*9712c20fSFrederick Mayle  private:
160*9712c20fSFrederick Mayle   // A label's value, or if that is not yet known, how the value is
161*9712c20fSFrederick Mayle   // related to other labels' values. A binding may be:
162*9712c20fSFrederick Mayle   // - a known constant,
163*9712c20fSFrederick Mayle   // - constrained to be equal to some other binding plus a constant, or
164*9712c20fSFrederick Mayle   // - unconstrained, and free to take on any value.
165*9712c20fSFrederick Mayle   //
166*9712c20fSFrederick Mayle   // Many labels may point to a single binding, and each binding may
167*9712c20fSFrederick Mayle   // refer to another, so bindings and labels form trees whose leaves
168*9712c20fSFrederick Mayle   // are labels, whose interior nodes (and roots) are bindings, and
169*9712c20fSFrederick Mayle   // where links point from children to parents. Bindings are
170*9712c20fSFrederick Mayle   // reference counted, allowing labels to be lightweight, copyable,
171*9712c20fSFrederick Mayle   // assignable, placed in containers, and so on.
172*9712c20fSFrederick Mayle   class Binding {
173*9712c20fSFrederick Mayle    public:
174*9712c20fSFrederick Mayle     Binding();
175*9712c20fSFrederick Mayle     Binding(uint64_t addend);
176*9712c20fSFrederick Mayle     ~Binding();
177*9712c20fSFrederick Mayle 
178*9712c20fSFrederick Mayle     // Increment our reference count.
Acquire()179*9712c20fSFrederick Mayle     void Acquire() { reference_count_++; };
180*9712c20fSFrederick Mayle     // Decrement our reference count, and return true if it is zero.
Release()181*9712c20fSFrederick Mayle     bool Release() { return --reference_count_ == 0; }
182*9712c20fSFrederick Mayle 
183*9712c20fSFrederick Mayle     // Set this binding to be equal to BINDING + ADDEND. If BINDING is
184*9712c20fSFrederick Mayle     // NULL, then set this binding to the known constant ADDEND.
185*9712c20fSFrederick Mayle     // Update every binding on this binding's chain to point directly
186*9712c20fSFrederick Mayle     // to BINDING, or to be a constant, with addends adjusted
187*9712c20fSFrederick Mayle     // appropriately.
188*9712c20fSFrederick Mayle     void Set(Binding* binding, uint64_t value);
189*9712c20fSFrederick Mayle 
190*9712c20fSFrederick Mayle     // Return what we know about the value of this binding.
191*9712c20fSFrederick Mayle     // - If this binding's value is a known constant, set BASE to
192*9712c20fSFrederick Mayle     //   NULL, and set ADDEND to its value.
193*9712c20fSFrederick Mayle     // - If this binding is not a known constant but related to other
194*9712c20fSFrederick Mayle     //   bindings, set BASE to the binding at the end of the relation
195*9712c20fSFrederick Mayle     //   chain (which will always be unconstrained), and set ADDEND to the
196*9712c20fSFrederick Mayle     //   value to add to that binding's value to get this binding's
197*9712c20fSFrederick Mayle     //   value.
198*9712c20fSFrederick Mayle     // - If this binding is unconstrained, set BASE to this, and leave
199*9712c20fSFrederick Mayle     //   ADDEND unchanged.
200*9712c20fSFrederick Mayle     void Get(Binding** base, uint64_t* addend);
201*9712c20fSFrederick Mayle 
202*9712c20fSFrederick Mayle    private:
203*9712c20fSFrederick Mayle     // There are three cases:
204*9712c20fSFrederick Mayle     //
205*9712c20fSFrederick Mayle     // - A binding representing a known constant value has base_ NULL,
206*9712c20fSFrederick Mayle     //   and addend_ equal to the value.
207*9712c20fSFrederick Mayle     //
208*9712c20fSFrederick Mayle     // - A binding representing a completely unconstrained value has
209*9712c20fSFrederick Mayle     //   base_ pointing to this; addend_ is unused.
210*9712c20fSFrederick Mayle     //
211*9712c20fSFrederick Mayle     // - A binding whose value is related to some other binding's
212*9712c20fSFrederick Mayle     //   value has base_ pointing to that other binding, and addend_
213*9712c20fSFrederick Mayle     //   set to the amount to add to that binding's value to get this
214*9712c20fSFrederick Mayle     //   binding's value. We only represent relationships of the form
215*9712c20fSFrederick Mayle     //   x = y+c.
216*9712c20fSFrederick Mayle     //
217*9712c20fSFrederick Mayle     // Thus, the bind_ links form a chain terminating in either a
218*9712c20fSFrederick Mayle     // known constant value or a completely unconstrained value. Most
219*9712c20fSFrederick Mayle     // operations on bindings do path compression: they change every
220*9712c20fSFrederick Mayle     // binding on the chain to point directly to the final value,
221*9712c20fSFrederick Mayle     // adjusting addends as appropriate.
222*9712c20fSFrederick Mayle     Binding* base_;
223*9712c20fSFrederick Mayle     uint64_t addend_;
224*9712c20fSFrederick Mayle 
225*9712c20fSFrederick Mayle     // The number of Labels and Bindings pointing to this binding.
226*9712c20fSFrederick Mayle     // (When a binding points to itself, indicating a completely
227*9712c20fSFrederick Mayle     // unconstrained binding, that doesn't count as a reference.)
228*9712c20fSFrederick Mayle     int reference_count_;
229*9712c20fSFrederick Mayle   };
230*9712c20fSFrederick Mayle 
231*9712c20fSFrederick Mayle   // This label's value.
232*9712c20fSFrederick Mayle   Binding* value_;
233*9712c20fSFrederick Mayle };
234*9712c20fSFrederick Mayle 
235*9712c20fSFrederick Mayle inline Label operator+(uint64_t a, const Label& l) { return l + a; }
236*9712c20fSFrederick Mayle // Note that int-Label isn't defined, as negating a Label is not an
237*9712c20fSFrederick Mayle // operation we support.
238*9712c20fSFrederick Mayle 
239*9712c20fSFrederick Mayle // Conventions for representing larger numbers as sequences of bytes.
240*9712c20fSFrederick Mayle enum Endianness {
241*9712c20fSFrederick Mayle   kBigEndian,        // Big-endian: the most significant byte comes first.
242*9712c20fSFrederick Mayle   kLittleEndian,     // Little-endian: the least significant byte comes first.
243*9712c20fSFrederick Mayle   kUnsetEndian,      // used internally
244*9712c20fSFrederick Mayle };
245*9712c20fSFrederick Mayle 
246*9712c20fSFrederick Mayle // A section is a sequence of bytes, constructed by appending bytes
247*9712c20fSFrederick Mayle // to the end. Sections have a convenient and flexible set of member
248*9712c20fSFrederick Mayle // functions for appending data in various formats: big-endian and
249*9712c20fSFrederick Mayle // little-endian signed and unsigned values of different sizes;
250*9712c20fSFrederick Mayle // LEB128 and ULEB128 values (see below), and raw blocks of bytes.
251*9712c20fSFrederick Mayle //
252*9712c20fSFrederick Mayle // If you need to append a value to a section that is not convenient
253*9712c20fSFrederick Mayle // to compute immediately, you can create a label, append the
254*9712c20fSFrederick Mayle // label's value to the section, and then set the label's value
255*9712c20fSFrederick Mayle // later, when it's convenient to do so. Once a label's value is
256*9712c20fSFrederick Mayle // known, the section class takes care of updating all previously
257*9712c20fSFrederick Mayle // appended references to it.
258*9712c20fSFrederick Mayle //
259*9712c20fSFrederick Mayle // Once all the labels to which a section refers have had their
260*9712c20fSFrederick Mayle // values determined, you can get a copy of the section's contents
261*9712c20fSFrederick Mayle // as a string.
262*9712c20fSFrederick Mayle //
263*9712c20fSFrederick Mayle // Note that there is no specified "start of section" label. This is
264*9712c20fSFrederick Mayle // because there are typically several different meanings for "the
265*9712c20fSFrederick Mayle // start of a section": the offset of the section within an object
266*9712c20fSFrederick Mayle // file, the address in memory at which the section's content appear,
267*9712c20fSFrederick Mayle // and so on. It's up to the code that uses the Section class to
268*9712c20fSFrederick Mayle // keep track of these explicitly, as they depend on the application.
269*9712c20fSFrederick Mayle class Section {
270*9712c20fSFrederick Mayle  public:
271*9712c20fSFrederick Mayle   Section(Endianness endianness = kUnsetEndian)
endianness_(endianness)272*9712c20fSFrederick Mayle       : endianness_(endianness) { };
273*9712c20fSFrederick Mayle 
274*9712c20fSFrederick Mayle   // A base class destructor should be either public and virtual,
275*9712c20fSFrederick Mayle   // or protected and nonvirtual.
~Section()276*9712c20fSFrederick Mayle   virtual ~Section() { };
277*9712c20fSFrederick Mayle 
278*9712c20fSFrederick Mayle   // Set the default endianness of this section to ENDIANNESS. This
279*9712c20fSFrederick Mayle   // sets the behavior of the D<N> appending functions. If the
280*9712c20fSFrederick Mayle   // assembler's default endianness was set, this is the
set_endianness(Endianness endianness)281*9712c20fSFrederick Mayle   void set_endianness(Endianness endianness) {
282*9712c20fSFrederick Mayle     endianness_ = endianness;
283*9712c20fSFrederick Mayle   }
284*9712c20fSFrederick Mayle 
285*9712c20fSFrederick Mayle   // Return the default endianness of this section.
endianness()286*9712c20fSFrederick Mayle   Endianness endianness() const { return endianness_; }
287*9712c20fSFrederick Mayle 
288*9712c20fSFrederick Mayle   // Append the SIZE bytes at DATA or the contents of STRING to the
289*9712c20fSFrederick Mayle   // end of this section. Return a reference to this section.
Append(const uint8_t * data,size_t size)290*9712c20fSFrederick Mayle   Section& Append(const uint8_t* data, size_t size) {
291*9712c20fSFrederick Mayle     contents_.append(reinterpret_cast<const char*>(data), size);
292*9712c20fSFrederick Mayle     return *this;
293*9712c20fSFrederick Mayle   };
Append(const string & data)294*9712c20fSFrederick Mayle   Section& Append(const string& data) {
295*9712c20fSFrederick Mayle     contents_.append(data);
296*9712c20fSFrederick Mayle     return *this;
297*9712c20fSFrederick Mayle   };
298*9712c20fSFrederick Mayle 
299*9712c20fSFrederick Mayle   // Append SIZE copies of BYTE to the end of this section. Return a
300*9712c20fSFrederick Mayle   // reference to this section.
Append(size_t size,uint8_t byte)301*9712c20fSFrederick Mayle   Section& Append(size_t size, uint8_t byte) {
302*9712c20fSFrederick Mayle     contents_.append(size, (char) byte);
303*9712c20fSFrederick Mayle     return *this;
304*9712c20fSFrederick Mayle   }
305*9712c20fSFrederick Mayle 
306*9712c20fSFrederick Mayle   // Append NUMBER to this section. ENDIANNESS is the endianness to
307*9712c20fSFrederick Mayle   // use to write the number. SIZE is the length of the number in
308*9712c20fSFrederick Mayle   // bytes. Return a reference to this section.
309*9712c20fSFrederick Mayle   Section& Append(Endianness endianness, size_t size, uint64_t number);
310*9712c20fSFrederick Mayle   Section& Append(Endianness endianness, size_t size, const Label& label);
311*9712c20fSFrederick Mayle 
312*9712c20fSFrederick Mayle   // Append SECTION to the end of this section. The labels SECTION
313*9712c20fSFrederick Mayle   // refers to need not be defined yet.
314*9712c20fSFrederick Mayle   //
315*9712c20fSFrederick Mayle   // Note that this has no effect on any Labels' values, or on
316*9712c20fSFrederick Mayle   // SECTION. If placing SECTION within 'this' provides new
317*9712c20fSFrederick Mayle   // constraints on existing labels' values, then it's up to the
318*9712c20fSFrederick Mayle   // caller to fiddle with those labels as needed.
319*9712c20fSFrederick Mayle   Section& Append(const Section& section);
320*9712c20fSFrederick Mayle 
321*9712c20fSFrederick Mayle   // Append the contents of DATA as a series of bytes terminated by
322*9712c20fSFrederick Mayle   // a NULL character.
AppendCString(const string & data)323*9712c20fSFrederick Mayle   Section& AppendCString(const string& data) {
324*9712c20fSFrederick Mayle     Append(data);
325*9712c20fSFrederick Mayle     contents_ += '\0';
326*9712c20fSFrederick Mayle     return *this;
327*9712c20fSFrederick Mayle   }
328*9712c20fSFrederick Mayle 
329*9712c20fSFrederick Mayle   // Append at most SIZE bytes from DATA; if DATA is less than SIZE bytes
330*9712c20fSFrederick Mayle   // long, pad with '\0' characters.
AppendCString(const string & data,size_t size)331*9712c20fSFrederick Mayle   Section& AppendCString(const string& data, size_t size) {
332*9712c20fSFrederick Mayle     contents_.append(data, 0, size);
333*9712c20fSFrederick Mayle     if (data.size() < size)
334*9712c20fSFrederick Mayle       Append(size - data.size(), 0);
335*9712c20fSFrederick Mayle     return *this;
336*9712c20fSFrederick Mayle   }
337*9712c20fSFrederick Mayle 
338*9712c20fSFrederick Mayle   // Append VALUE or LABEL to this section, with the given bit width and
339*9712c20fSFrederick Mayle   // endianness. Return a reference to this section.
340*9712c20fSFrederick Mayle   //
341*9712c20fSFrederick Mayle   // The names of these functions have the form <ENDIANNESS><BITWIDTH>:
342*9712c20fSFrederick Mayle   // <ENDIANNESS> is either 'L' (little-endian, least significant byte first),
343*9712c20fSFrederick Mayle   //                        'B' (big-endian, most significant byte first), or
344*9712c20fSFrederick Mayle   //                        'D' (default, the section's default endianness)
345*9712c20fSFrederick Mayle   // <BITWIDTH> is 8, 16, 32, or 64.
346*9712c20fSFrederick Mayle   //
347*9712c20fSFrederick Mayle   // Since endianness doesn't matter for a single byte, all the
348*9712c20fSFrederick Mayle   // <BITWIDTH>=8 functions are equivalent.
349*9712c20fSFrederick Mayle   //
350*9712c20fSFrederick Mayle   // These can be used to write both signed and unsigned values, as
351*9712c20fSFrederick Mayle   // the compiler will properly sign-extend a signed value before
352*9712c20fSFrederick Mayle   // passing it to the function, at which point the function's
353*9712c20fSFrederick Mayle   // behavior is the same either way.
L8(uint8_t value)354*9712c20fSFrederick Mayle   Section& L8(uint8_t value) { contents_ += value; return *this; }
B8(uint8_t value)355*9712c20fSFrederick Mayle   Section& B8(uint8_t value) { contents_ += value; return *this; }
D8(uint8_t value)356*9712c20fSFrederick Mayle   Section& D8(uint8_t value) { contents_ += value; return *this; }
357*9712c20fSFrederick Mayle   Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t),
358*9712c20fSFrederick Mayle           &B16(uint16_t), &B32(uint32_t), &B64(uint64_t),
359*9712c20fSFrederick Mayle           &D16(uint16_t), &D32(uint32_t), &D64(uint64_t);
360*9712c20fSFrederick Mayle   Section &L8(const Label& label),  &L16(const Label& label),
361*9712c20fSFrederick Mayle           &L32(const Label& label), &L64(const Label& label),
362*9712c20fSFrederick Mayle           &B8(const Label& label),  &B16(const Label& label),
363*9712c20fSFrederick Mayle           &B32(const Label& label), &B64(const Label& label),
364*9712c20fSFrederick Mayle           &D8(const Label& label),  &D16(const Label& label),
365*9712c20fSFrederick Mayle           &D32(const Label& label), &D64(const Label& label);
366*9712c20fSFrederick Mayle 
367*9712c20fSFrederick Mayle   // Append VALUE in a signed LEB128 (Little-Endian Base 128) form.
368*9712c20fSFrederick Mayle   //
369*9712c20fSFrederick Mayle   // The signed LEB128 representation of an integer N is a variable
370*9712c20fSFrederick Mayle   // number of bytes:
371*9712c20fSFrederick Mayle   //
372*9712c20fSFrederick Mayle   // - If N is between -0x40 and 0x3f, then its signed LEB128
373*9712c20fSFrederick Mayle   //   representation is a single byte whose value is N.
374*9712c20fSFrederick Mayle   //
375*9712c20fSFrederick Mayle   // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
376*9712c20fSFrederick Mayle   //   0x80, followed by the signed LEB128 representation of N / 128,
377*9712c20fSFrederick Mayle   //   rounded towards negative infinity.
378*9712c20fSFrederick Mayle   //
379*9712c20fSFrederick Mayle   // In other words, we break VALUE into groups of seven bits, put
380*9712c20fSFrederick Mayle   // them in little-endian order, and then write them as eight-bit
381*9712c20fSFrederick Mayle   // bytes with the high bit on all but the last.
382*9712c20fSFrederick Mayle   //
383*9712c20fSFrederick Mayle   // Note that VALUE cannot be a Label (we would have to implement
384*9712c20fSFrederick Mayle   // relaxation).
385*9712c20fSFrederick Mayle   Section& LEB128(long long value);
386*9712c20fSFrederick Mayle 
387*9712c20fSFrederick Mayle   // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form.
388*9712c20fSFrederick Mayle   //
389*9712c20fSFrederick Mayle   // The unsigned LEB128 representation of an integer N is a variable
390*9712c20fSFrederick Mayle   // number of bytes:
391*9712c20fSFrederick Mayle   //
392*9712c20fSFrederick Mayle   // - If N is between 0 and 0x7f, then its unsigned LEB128
393*9712c20fSFrederick Mayle   //   representation is a single byte whose value is N.
394*9712c20fSFrederick Mayle   //
395*9712c20fSFrederick Mayle   // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
396*9712c20fSFrederick Mayle   //   0x80, followed by the unsigned LEB128 representation of N /
397*9712c20fSFrederick Mayle   //   128, rounded towards negative infinity.
398*9712c20fSFrederick Mayle   //
399*9712c20fSFrederick Mayle   // Note that VALUE cannot be a Label (we would have to implement
400*9712c20fSFrederick Mayle   // relaxation).
401*9712c20fSFrederick Mayle   Section& ULEB128(uint64_t value);
402*9712c20fSFrederick Mayle 
403*9712c20fSFrederick Mayle   // Jump to the next location aligned on an ALIGNMENT-byte boundary,
404*9712c20fSFrederick Mayle   // relative to the start of the section. Fill the gap with PAD_BYTE.
405*9712c20fSFrederick Mayle   // ALIGNMENT must be a power of two. Return a reference to this
406*9712c20fSFrederick Mayle   // section.
407*9712c20fSFrederick Mayle   Section& Align(size_t alignment, uint8_t pad_byte = 0);
408*9712c20fSFrederick Mayle 
409*9712c20fSFrederick Mayle   // Clear the contents of this section.
410*9712c20fSFrederick Mayle   void Clear();
411*9712c20fSFrederick Mayle 
412*9712c20fSFrederick Mayle   // Return the current size of the section.
Size()413*9712c20fSFrederick Mayle   size_t Size() const { return contents_.size(); }
414*9712c20fSFrederick Mayle 
415*9712c20fSFrederick Mayle   // Return a label representing the start of the section.
416*9712c20fSFrederick Mayle   //
417*9712c20fSFrederick Mayle   // It is up to the user whether this label represents the section's
418*9712c20fSFrederick Mayle   // position in an object file, the section's address in memory, or
419*9712c20fSFrederick Mayle   // what have you; some applications may need both, in which case
420*9712c20fSFrederick Mayle   // this simple-minded interface won't be enough. This class only
421*9712c20fSFrederick Mayle   // provides a single start label, for use with the Here and Mark
422*9712c20fSFrederick Mayle   // member functions.
423*9712c20fSFrederick Mayle   //
424*9712c20fSFrederick Mayle   // Ideally, we'd provide this in a subclass that actually knows more
425*9712c20fSFrederick Mayle   // about the application at hand and can provide an appropriate
426*9712c20fSFrederick Mayle   // collection of start labels. But then the appending member
427*9712c20fSFrederick Mayle   // functions like Append and D32 would return a reference to the
428*9712c20fSFrederick Mayle   // base class, not the derived class, and the chaining won't work.
429*9712c20fSFrederick Mayle   // Since the only value here is in pretty notation, that's a fatal
430*9712c20fSFrederick Mayle   // flaw.
start()431*9712c20fSFrederick Mayle   Label start() const { return start_; }
432*9712c20fSFrederick Mayle 
433*9712c20fSFrederick Mayle   // Return a label representing the point at which the next Appended
434*9712c20fSFrederick Mayle   // item will appear in the section, relative to start().
Here()435*9712c20fSFrederick Mayle   Label Here() const { return start_ + Size(); }
436*9712c20fSFrederick Mayle 
437*9712c20fSFrederick Mayle   // Set *LABEL to Here, and return a reference to this section.
Mark(Label * label)438*9712c20fSFrederick Mayle   Section& Mark(Label* label) { *label = Here(); return *this; }
439*9712c20fSFrederick Mayle 
440*9712c20fSFrederick Mayle   // If there are no undefined label references left in this
441*9712c20fSFrederick Mayle   // section, set CONTENTS to the contents of this section, as a
442*9712c20fSFrederick Mayle   // string, and clear this section. Return true on success, or false
443*9712c20fSFrederick Mayle   // if there were still undefined labels.
444*9712c20fSFrederick Mayle   bool GetContents(string* contents);
445*9712c20fSFrederick Mayle 
446*9712c20fSFrederick Mayle  private:
447*9712c20fSFrederick Mayle   // Used internally. A reference to a label's value.
448*9712c20fSFrederick Mayle   struct Reference {
ReferenceReference449*9712c20fSFrederick Mayle     Reference(size_t set_offset, Endianness set_endianness,  size_t set_size,
450*9712c20fSFrederick Mayle               const Label& set_label)
451*9712c20fSFrederick Mayle         : offset(set_offset), endianness(set_endianness), size(set_size),
452*9712c20fSFrederick Mayle           label(set_label) { }
453*9712c20fSFrederick Mayle 
454*9712c20fSFrederick Mayle     // The offset of the reference within the section.
455*9712c20fSFrederick Mayle     size_t offset;
456*9712c20fSFrederick Mayle 
457*9712c20fSFrederick Mayle     // The endianness of the reference.
458*9712c20fSFrederick Mayle     Endianness endianness;
459*9712c20fSFrederick Mayle 
460*9712c20fSFrederick Mayle     // The size of the reference.
461*9712c20fSFrederick Mayle     size_t size;
462*9712c20fSFrederick Mayle 
463*9712c20fSFrederick Mayle     // The label to which this is a reference.
464*9712c20fSFrederick Mayle     Label label;
465*9712c20fSFrederick Mayle   };
466*9712c20fSFrederick Mayle 
467*9712c20fSFrederick Mayle   // The default endianness of this section.
468*9712c20fSFrederick Mayle   Endianness endianness_;
469*9712c20fSFrederick Mayle 
470*9712c20fSFrederick Mayle   // The contents of the section.
471*9712c20fSFrederick Mayle   string contents_;
472*9712c20fSFrederick Mayle 
473*9712c20fSFrederick Mayle   // References to labels within those contents.
474*9712c20fSFrederick Mayle   vector<Reference> references_;
475*9712c20fSFrederick Mayle 
476*9712c20fSFrederick Mayle   // A label referring to the beginning of the section.
477*9712c20fSFrederick Mayle   Label start_;
478*9712c20fSFrederick Mayle };
479*9712c20fSFrederick Mayle 
480*9712c20fSFrederick Mayle }  // namespace test_assembler
481*9712c20fSFrederick Mayle }  // namespace google_breakpad
482*9712c20fSFrederick Mayle 
483*9712c20fSFrederick Mayle #endif  // PROCESSOR_TEST_ASSEMBLER_H_
484