1 // -*- mode: C++ -*- 2 3 // Copyright 2010 Google LLC 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google LLC nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Original author: Jim Blandy <[email protected]> <[email protected]> 32 33 // test-assembler.h: interface to class for building complex binary streams. 34 35 // To test the Breakpad symbol dumper and processor thoroughly, for 36 // all combinations of host system and minidump processor 37 // architecture, we need to be able to easily generate complex test 38 // data like debugging information and minidump files. 39 // 40 // For example, if we want our unit tests to provide full code 41 // coverage for stack walking, it may be difficult to persuade the 42 // compiler to generate every possible sort of stack walking 43 // information that we want to support; there are probably DWARF CFI 44 // opcodes that GCC never emits. Similarly, if we want to test our 45 // error handling, we will need to generate damaged minidumps or 46 // debugging information that (we hope) the client or compiler will 47 // never produce on its own. 48 // 49 // google_breakpad::TestAssembler provides a predictable and 50 // (relatively) simple way to generate complex formatted data streams 51 // like minidumps and CFI. Furthermore, because TestAssembler is 52 // portable, developers without access to (say) Visual Studio or a 53 // SPARC assembler can still work on test data for those targets. 54 55 #ifndef PROCESSOR_TEST_ASSEMBLER_H_ 56 #define PROCESSOR_TEST_ASSEMBLER_H_ 57 58 #include <list> 59 #include <vector> 60 #include <string> 61 62 #include "common/using_std_string.h" 63 #include "google_breakpad/common/breakpad_types.h" 64 65 namespace google_breakpad { 66 67 using std::list; 68 using std::vector; 69 70 namespace test_assembler { 71 72 // A Label represents a value not yet known that we need to store in a 73 // section. As long as all the labels a section refers to are defined 74 // by the time we retrieve its contents as bytes, we can use undefined 75 // labels freely in that section's construction. 76 // 77 // A label can be in one of three states: 78 // - undefined, 79 // - defined as the sum of some other label and a constant, or 80 // - a constant. 81 // 82 // A label's value never changes, but it can accumulate constraints. 83 // Adding labels and integers is permitted, and yields a label. 84 // Subtracting a constant from a label is permitted, and also yields a 85 // label. Subtracting two labels that have some relationship to each 86 // other is permitted, and yields a constant. 87 // 88 // For example: 89 // 90 // Label a; // a's value is undefined 91 // Label b; // b's value is undefined 92 // { 93 // Label c = a + 4; // okay, even though a's value is unknown 94 // b = c + 4; // also okay; b is now a+8 95 // } 96 // Label d = b - 2; // okay; d == a+6, even though c is gone 97 // d.Value(); // error: d's value is not yet known 98 // d - a; // is 6, even though their values are not known 99 // a = 12; // now b == 20, and d == 18 100 // d.Value(); // 18: no longer an error 101 // b.Value(); // 20 102 // d = 10; // error: d is already defined. 103 // 104 // Label objects' lifetimes are unconstrained: notice that, in the 105 // above example, even though a and b are only related through c, and 106 // c goes out of scope, the assignment to a sets b's value as well. In 107 // particular, it's not necessary to ensure that a Label lives beyond 108 // Sections that refer to it. 109 class Label { 110 public: 111 Label(); // An undefined label. 112 Label(uint64_t value); // A label with a fixed value 113 Label(const Label& value); // A label equal to another. 114 ~Label(); 115 116 // Return this label's value; it must be known. 117 // 118 // Providing this as a cast operator is nifty, but the conversions 119 // happen in unexpected places. In particular, ISO C++ says that 120 // Label + size_t becomes ambigious, because it can't decide whether 121 // to convert the Label to a uint64_t and then to a size_t, or use 122 // the overloaded operator that returns a new label, even though the 123 // former could fail if the label is not yet defined and the latter won't. 124 uint64_t Value() const; 125 126 Label& operator=(uint64_t value); 127 Label& operator=(const Label& value); 128 Label operator+(uint64_t addend) const; 129 Label operator-(uint64_t subtrahend) const; 130 uint64_t operator-(const Label& subtrahend) const; 131 132 // We could also provide == and != that work on undefined, but 133 // related, labels. 134 135 // Return true if this label's value is known. If VALUE_P is given, 136 // set *VALUE_P to the known value if returning true. 137 bool IsKnownConstant(uint64_t* value_p = NULL) const; 138 139 // Return true if the offset from LABEL to this label is known. If 140 // OFFSET_P is given, set *OFFSET_P to the offset when returning true. 141 // 142 // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m', 143 // except that it also returns a value indicating whether the 144 // subtraction is possible given what we currently know of l and m. 145 // It can be possible even if we don't know l and m's values. For 146 // example: 147 // 148 // Label l, m; 149 // m = l + 10; 150 // l.IsKnownConstant(); // false 151 // m.IsKnownConstant(); // false 152 // uint64_t d; 153 // l.IsKnownOffsetFrom(m, &d); // true, and sets d to -10. 154 // l-m // -10 155 // m-l // 10 156 // m.Value() // error: m's value is not known 157 bool IsKnownOffsetFrom(const Label& label, uint64_t* offset_p = NULL) const; 158 159 private: 160 // A label's value, or if that is not yet known, how the value is 161 // related to other labels' values. A binding may be: 162 // - a known constant, 163 // - constrained to be equal to some other binding plus a constant, or 164 // - unconstrained, and free to take on any value. 165 // 166 // Many labels may point to a single binding, and each binding may 167 // refer to another, so bindings and labels form trees whose leaves 168 // are labels, whose interior nodes (and roots) are bindings, and 169 // where links point from children to parents. Bindings are 170 // reference counted, allowing labels to be lightweight, copyable, 171 // assignable, placed in containers, and so on. 172 class Binding { 173 public: 174 Binding(); 175 Binding(uint64_t addend); 176 ~Binding(); 177 178 // Increment our reference count. Acquire()179 void Acquire() { reference_count_++; }; 180 // Decrement our reference count, and return true if it is zero. Release()181 bool Release() { return --reference_count_ == 0; } 182 183 // Set this binding to be equal to BINDING + ADDEND. If BINDING is 184 // NULL, then set this binding to the known constant ADDEND. 185 // Update every binding on this binding's chain to point directly 186 // to BINDING, or to be a constant, with addends adjusted 187 // appropriately. 188 void Set(Binding* binding, uint64_t value); 189 190 // Return what we know about the value of this binding. 191 // - If this binding's value is a known constant, set BASE to 192 // NULL, and set ADDEND to its value. 193 // - If this binding is not a known constant but related to other 194 // bindings, set BASE to the binding at the end of the relation 195 // chain (which will always be unconstrained), and set ADDEND to the 196 // value to add to that binding's value to get this binding's 197 // value. 198 // - If this binding is unconstrained, set BASE to this, and leave 199 // ADDEND unchanged. 200 void Get(Binding** base, uint64_t* addend); 201 202 private: 203 // There are three cases: 204 // 205 // - A binding representing a known constant value has base_ NULL, 206 // and addend_ equal to the value. 207 // 208 // - A binding representing a completely unconstrained value has 209 // base_ pointing to this; addend_ is unused. 210 // 211 // - A binding whose value is related to some other binding's 212 // value has base_ pointing to that other binding, and addend_ 213 // set to the amount to add to that binding's value to get this 214 // binding's value. We only represent relationships of the form 215 // x = y+c. 216 // 217 // Thus, the bind_ links form a chain terminating in either a 218 // known constant value or a completely unconstrained value. Most 219 // operations on bindings do path compression: they change every 220 // binding on the chain to point directly to the final value, 221 // adjusting addends as appropriate. 222 Binding* base_; 223 uint64_t addend_; 224 225 // The number of Labels and Bindings pointing to this binding. 226 // (When a binding points to itself, indicating a completely 227 // unconstrained binding, that doesn't count as a reference.) 228 int reference_count_; 229 }; 230 231 // This label's value. 232 Binding* value_; 233 }; 234 235 inline Label operator+(uint64_t a, const Label& l) { return l + a; } 236 // Note that int-Label isn't defined, as negating a Label is not an 237 // operation we support. 238 239 // Conventions for representing larger numbers as sequences of bytes. 240 enum Endianness { 241 kBigEndian, // Big-endian: the most significant byte comes first. 242 kLittleEndian, // Little-endian: the least significant byte comes first. 243 kUnsetEndian, // used internally 244 }; 245 246 // A section is a sequence of bytes, constructed by appending bytes 247 // to the end. Sections have a convenient and flexible set of member 248 // functions for appending data in various formats: big-endian and 249 // little-endian signed and unsigned values of different sizes; 250 // LEB128 and ULEB128 values (see below), and raw blocks of bytes. 251 // 252 // If you need to append a value to a section that is not convenient 253 // to compute immediately, you can create a label, append the 254 // label's value to the section, and then set the label's value 255 // later, when it's convenient to do so. Once a label's value is 256 // known, the section class takes care of updating all previously 257 // appended references to it. 258 // 259 // Once all the labels to which a section refers have had their 260 // values determined, you can get a copy of the section's contents 261 // as a string. 262 // 263 // Note that there is no specified "start of section" label. This is 264 // because there are typically several different meanings for "the 265 // start of a section": the offset of the section within an object 266 // file, the address in memory at which the section's content appear, 267 // and so on. It's up to the code that uses the Section class to 268 // keep track of these explicitly, as they depend on the application. 269 class Section { 270 public: 271 Section(Endianness endianness = kUnsetEndian) endianness_(endianness)272 : endianness_(endianness) { }; 273 274 // A base class destructor should be either public and virtual, 275 // or protected and nonvirtual. ~Section()276 virtual ~Section() { }; 277 278 // Set the default endianness of this section to ENDIANNESS. This 279 // sets the behavior of the D<N> appending functions. If the 280 // assembler's default endianness was set, this is the set_endianness(Endianness endianness)281 void set_endianness(Endianness endianness) { 282 endianness_ = endianness; 283 } 284 285 // Return the default endianness of this section. endianness()286 Endianness endianness() const { return endianness_; } 287 288 // Append the SIZE bytes at DATA or the contents of STRING to the 289 // end of this section. Return a reference to this section. Append(const uint8_t * data,size_t size)290 Section& Append(const uint8_t* data, size_t size) { 291 contents_.append(reinterpret_cast<const char*>(data), size); 292 return *this; 293 }; Append(const string & data)294 Section& Append(const string& data) { 295 contents_.append(data); 296 return *this; 297 }; 298 299 // Append SIZE copies of BYTE to the end of this section. Return a 300 // reference to this section. Append(size_t size,uint8_t byte)301 Section& Append(size_t size, uint8_t byte) { 302 contents_.append(size, (char) byte); 303 return *this; 304 } 305 306 // Append NUMBER to this section. ENDIANNESS is the endianness to 307 // use to write the number. SIZE is the length of the number in 308 // bytes. Return a reference to this section. 309 Section& Append(Endianness endianness, size_t size, uint64_t number); 310 Section& Append(Endianness endianness, size_t size, const Label& label); 311 312 // Append SECTION to the end of this section. The labels SECTION 313 // refers to need not be defined yet. 314 // 315 // Note that this has no effect on any Labels' values, or on 316 // SECTION. If placing SECTION within 'this' provides new 317 // constraints on existing labels' values, then it's up to the 318 // caller to fiddle with those labels as needed. 319 Section& Append(const Section& section); 320 321 // Append the contents of DATA as a series of bytes terminated by 322 // a NULL character. AppendCString(const string & data)323 Section& AppendCString(const string& data) { 324 Append(data); 325 contents_ += '\0'; 326 return *this; 327 } 328 329 // Append at most SIZE bytes from DATA; if DATA is less than SIZE bytes 330 // long, pad with '\0' characters. AppendCString(const string & data,size_t size)331 Section& AppendCString(const string& data, size_t size) { 332 contents_.append(data, 0, size); 333 if (data.size() < size) 334 Append(size - data.size(), 0); 335 return *this; 336 } 337 338 // Append VALUE or LABEL to this section, with the given bit width and 339 // endianness. Return a reference to this section. 340 // 341 // The names of these functions have the form <ENDIANNESS><BITWIDTH>: 342 // <ENDIANNESS> is either 'L' (little-endian, least significant byte first), 343 // 'B' (big-endian, most significant byte first), or 344 // 'D' (default, the section's default endianness) 345 // <BITWIDTH> is 8, 16, 32, or 64. 346 // 347 // Since endianness doesn't matter for a single byte, all the 348 // <BITWIDTH>=8 functions are equivalent. 349 // 350 // These can be used to write both signed and unsigned values, as 351 // the compiler will properly sign-extend a signed value before 352 // passing it to the function, at which point the function's 353 // behavior is the same either way. L8(uint8_t value)354 Section& L8(uint8_t value) { contents_ += value; return *this; } B8(uint8_t value)355 Section& B8(uint8_t value) { contents_ += value; return *this; } D8(uint8_t value)356 Section& D8(uint8_t value) { contents_ += value; return *this; } 357 Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t), 358 &B16(uint16_t), &B32(uint32_t), &B64(uint64_t), 359 &D16(uint16_t), &D32(uint32_t), &D64(uint64_t); 360 Section &L8(const Label& label), &L16(const Label& label), 361 &L32(const Label& label), &L64(const Label& label), 362 &B8(const Label& label), &B16(const Label& label), 363 &B32(const Label& label), &B64(const Label& label), 364 &D8(const Label& label), &D16(const Label& label), 365 &D32(const Label& label), &D64(const Label& label); 366 367 // Append VALUE in a signed LEB128 (Little-Endian Base 128) form. 368 // 369 // The signed LEB128 representation of an integer N is a variable 370 // number of bytes: 371 // 372 // - If N is between -0x40 and 0x3f, then its signed LEB128 373 // representation is a single byte whose value is N. 374 // 375 // - Otherwise, its signed LEB128 representation is (N & 0x7f) | 376 // 0x80, followed by the signed LEB128 representation of N / 128, 377 // rounded towards negative infinity. 378 // 379 // In other words, we break VALUE into groups of seven bits, put 380 // them in little-endian order, and then write them as eight-bit 381 // bytes with the high bit on all but the last. 382 // 383 // Note that VALUE cannot be a Label (we would have to implement 384 // relaxation). 385 Section& LEB128(long long value); 386 387 // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form. 388 // 389 // The unsigned LEB128 representation of an integer N is a variable 390 // number of bytes: 391 // 392 // - If N is between 0 and 0x7f, then its unsigned LEB128 393 // representation is a single byte whose value is N. 394 // 395 // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | 396 // 0x80, followed by the unsigned LEB128 representation of N / 397 // 128, rounded towards negative infinity. 398 // 399 // Note that VALUE cannot be a Label (we would have to implement 400 // relaxation). 401 Section& ULEB128(uint64_t value); 402 403 // Jump to the next location aligned on an ALIGNMENT-byte boundary, 404 // relative to the start of the section. Fill the gap with PAD_BYTE. 405 // ALIGNMENT must be a power of two. Return a reference to this 406 // section. 407 Section& Align(size_t alignment, uint8_t pad_byte = 0); 408 409 // Clear the contents of this section. 410 void Clear(); 411 412 // Return the current size of the section. Size()413 size_t Size() const { return contents_.size(); } 414 415 // Return a label representing the start of the section. 416 // 417 // It is up to the user whether this label represents the section's 418 // position in an object file, the section's address in memory, or 419 // what have you; some applications may need both, in which case 420 // this simple-minded interface won't be enough. This class only 421 // provides a single start label, for use with the Here and Mark 422 // member functions. 423 // 424 // Ideally, we'd provide this in a subclass that actually knows more 425 // about the application at hand and can provide an appropriate 426 // collection of start labels. But then the appending member 427 // functions like Append and D32 would return a reference to the 428 // base class, not the derived class, and the chaining won't work. 429 // Since the only value here is in pretty notation, that's a fatal 430 // flaw. start()431 Label start() const { return start_; } 432 433 // Return a label representing the point at which the next Appended 434 // item will appear in the section, relative to start(). Here()435 Label Here() const { return start_ + Size(); } 436 437 // Set *LABEL to Here, and return a reference to this section. Mark(Label * label)438 Section& Mark(Label* label) { *label = Here(); return *this; } 439 440 // If there are no undefined label references left in this 441 // section, set CONTENTS to the contents of this section, as a 442 // string, and clear this section. Return true on success, or false 443 // if there were still undefined labels. 444 bool GetContents(string* contents); 445 446 private: 447 // Used internally. A reference to a label's value. 448 struct Reference { ReferenceReference449 Reference(size_t set_offset, Endianness set_endianness, size_t set_size, 450 const Label& set_label) 451 : offset(set_offset), endianness(set_endianness), size(set_size), 452 label(set_label) { } 453 454 // The offset of the reference within the section. 455 size_t offset; 456 457 // The endianness of the reference. 458 Endianness endianness; 459 460 // The size of the reference. 461 size_t size; 462 463 // The label to which this is a reference. 464 Label label; 465 }; 466 467 // The default endianness of this section. 468 Endianness endianness_; 469 470 // The contents of the section. 471 string contents_; 472 473 // References to labels within those contents. 474 vector<Reference> references_; 475 476 // A label referring to the beginning of the section. 477 Label start_; 478 }; 479 480 } // namespace test_assembler 481 } // namespace google_breakpad 482 483 #endif // PROCESSOR_TEST_ASSEMBLER_H_ 484