xref: /aosp_15_r20/external/zucchini/disassembler.h (revision a03ca8b91e029cd15055c20c78c2e087c84792e4)
1*a03ca8b9SKrzysztof Kosiński // Copyright 2017 The Chromium Authors. All rights reserved.
2*a03ca8b9SKrzysztof Kosiński // Use of this source code is governed by a BSD-style license that can be
3*a03ca8b9SKrzysztof Kosiński // found in the LICENSE file.
4*a03ca8b9SKrzysztof Kosiński 
5*a03ca8b9SKrzysztof Kosiński #ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
6*a03ca8b9SKrzysztof Kosiński #define COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
7*a03ca8b9SKrzysztof Kosiński 
8*a03ca8b9SKrzysztof Kosiński #include <stddef.h>
9*a03ca8b9SKrzysztof Kosiński 
10*a03ca8b9SKrzysztof Kosiński #include <memory>
11*a03ca8b9SKrzysztof Kosiński #include <optional>
12*a03ca8b9SKrzysztof Kosiński #include <string>
13*a03ca8b9SKrzysztof Kosiński #include <vector>
14*a03ca8b9SKrzysztof Kosiński 
15*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/buffer_view.h"
16*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/image_utils.h"
17*a03ca8b9SKrzysztof Kosiński 
18*a03ca8b9SKrzysztof Kosiński namespace zucchini {
19*a03ca8b9SKrzysztof Kosiński 
20*a03ca8b9SKrzysztof Kosiński // A vacuous ReferenceReader that produces no references.
21*a03ca8b9SKrzysztof Kosiński class EmptyReferenceReader : public ReferenceReader {
22*a03ca8b9SKrzysztof Kosiński  public:
23*a03ca8b9SKrzysztof Kosiński   std::optional<Reference> GetNext() override;
24*a03ca8b9SKrzysztof Kosiński };
25*a03ca8b9SKrzysztof Kosiński 
26*a03ca8b9SKrzysztof Kosiński // A vacuous EmptyReferenceWriter that does not write.
27*a03ca8b9SKrzysztof Kosiński class EmptyReferenceWriter : public ReferenceWriter {
28*a03ca8b9SKrzysztof Kosiński  public:
29*a03ca8b9SKrzysztof Kosiński   void PutNext(Reference reference) override;
30*a03ca8b9SKrzysztof Kosiński };
31*a03ca8b9SKrzysztof Kosiński 
32*a03ca8b9SKrzysztof Kosiński // Disassembler needs to be declared before ReferenceGroup because the latter
33*a03ca8b9SKrzysztof Kosiński // contains member pointers based on the former, and we use a compiler flag,
34*a03ca8b9SKrzysztof Kosiński // -fcomplete-member-pointers, which enforces that member pointer base types are
35*a03ca8b9SKrzysztof Kosiński // complete. This flag helps prevent us from running into problems in the
36*a03ca8b9SKrzysztof Kosiński // Microsoft C++ ABI (see https://crbug.com/847724).
37*a03ca8b9SKrzysztof Kosiński 
38*a03ca8b9SKrzysztof Kosiński class ReferenceGroup;
39*a03ca8b9SKrzysztof Kosiński 
40*a03ca8b9SKrzysztof Kosiński // A Disassembler is used to encapsulate architecture specific operations, to:
41*a03ca8b9SKrzysztof Kosiński // - Describe types of references found in the architecture using traits.
42*a03ca8b9SKrzysztof Kosiński // - Extract references contained in an image file.
43*a03ca8b9SKrzysztof Kosiński // - Correct target for some references.
44*a03ca8b9SKrzysztof Kosiński class Disassembler {
45*a03ca8b9SKrzysztof Kosiński  public:
46*a03ca8b9SKrzysztof Kosiński   // Attempts to parse |image| and create an architecture-specifc Disassembler,
47*a03ca8b9SKrzysztof Kosiński   // as determined by DIS, which is inherited from Disassembler. Returns an
48*a03ca8b9SKrzysztof Kosiński   // instance of DIS if successful, and null otherwise.
49*a03ca8b9SKrzysztof Kosiński   template <class DIS>
Make(ConstBufferView image)50*a03ca8b9SKrzysztof Kosiński   static std::unique_ptr<DIS> Make(ConstBufferView image) {
51*a03ca8b9SKrzysztof Kosiński     auto disasm = std::make_unique<DIS>();
52*a03ca8b9SKrzysztof Kosiński     if (!disasm->Parse(image))
53*a03ca8b9SKrzysztof Kosiński       return nullptr;
54*a03ca8b9SKrzysztof Kosiński     return disasm;
55*a03ca8b9SKrzysztof Kosiński   }
56*a03ca8b9SKrzysztof Kosiński 
57*a03ca8b9SKrzysztof Kosiński   Disassembler(const Disassembler&) = delete;
58*a03ca8b9SKrzysztof Kosiński   const Disassembler& operator=(const Disassembler&) = delete;
59*a03ca8b9SKrzysztof Kosiński   virtual ~Disassembler();
60*a03ca8b9SKrzysztof Kosiński 
61*a03ca8b9SKrzysztof Kosiński   // Returns the type of executable handled by the Disassembler.
62*a03ca8b9SKrzysztof Kosiński   virtual ExecutableType GetExeType() const = 0;
63*a03ca8b9SKrzysztof Kosiński 
64*a03ca8b9SKrzysztof Kosiński   // Returns a more detailed description of the executable type.
65*a03ca8b9SKrzysztof Kosiński   virtual std::string GetExeTypeString() const = 0;
66*a03ca8b9SKrzysztof Kosiński 
67*a03ca8b9SKrzysztof Kosiński   // Creates and returns a vector that contains all groups of references.
68*a03ca8b9SKrzysztof Kosiński   // Groups must be aggregated by pool.
69*a03ca8b9SKrzysztof Kosiński   virtual std::vector<ReferenceGroup> MakeReferenceGroups() const = 0;
70*a03ca8b9SKrzysztof Kosiński 
image()71*a03ca8b9SKrzysztof Kosiński   ConstBufferView image() const { return image_; }
size()72*a03ca8b9SKrzysztof Kosiński   size_t size() const { return image_.size(); }
73*a03ca8b9SKrzysztof Kosiński 
num_equivalence_iterations()74*a03ca8b9SKrzysztof Kosiński   int num_equivalence_iterations() const { return num_equivalence_iterations_; }
75*a03ca8b9SKrzysztof Kosiński 
76*a03ca8b9SKrzysztof Kosiński  protected:
77*a03ca8b9SKrzysztof Kosiński   explicit Disassembler(int num_equivalence_iterations);
78*a03ca8b9SKrzysztof Kosiński 
79*a03ca8b9SKrzysztof Kosiński   // Parses |image| and initializes internal states. Returns true on success.
80*a03ca8b9SKrzysztof Kosiński   // This must be called once and before any other operation.
81*a03ca8b9SKrzysztof Kosiński   virtual bool Parse(ConstBufferView image) = 0;
82*a03ca8b9SKrzysztof Kosiński 
83*a03ca8b9SKrzysztof Kosiński   // Raw image data. After Parse(), a Disassembler should shrink this to contain
84*a03ca8b9SKrzysztof Kosiński   // only the portion containing the executable file it recognizes.
85*a03ca8b9SKrzysztof Kosiński   ConstBufferView image_;
86*a03ca8b9SKrzysztof Kosiński 
87*a03ca8b9SKrzysztof Kosiński   // The number of iterations to run for equivalence map generation. This should
88*a03ca8b9SKrzysztof Kosiński   // roughly be the max length of reference indirection chains.
89*a03ca8b9SKrzysztof Kosiński   int num_equivalence_iterations_;
90*a03ca8b9SKrzysztof Kosiński };
91*a03ca8b9SKrzysztof Kosiński 
92*a03ca8b9SKrzysztof Kosiński // A ReferenceGroup is associated with a specific |type| and has convenience
93*a03ca8b9SKrzysztof Kosiński // methods to obtain readers and writers for that type. A ReferenceGroup does
94*a03ca8b9SKrzysztof Kosiński // not store references; it is a lightweight class that communicates with the
95*a03ca8b9SKrzysztof Kosiński // disassembler to operate on them.
96*a03ca8b9SKrzysztof Kosiński class ReferenceGroup {
97*a03ca8b9SKrzysztof Kosiński  public:
98*a03ca8b9SKrzysztof Kosiński   // Member function pointer used to obtain a ReferenceReader.
99*a03ca8b9SKrzysztof Kosiński   using ReaderFactory = std::unique_ptr<ReferenceReader> (
100*a03ca8b9SKrzysztof Kosiński       Disassembler::*)(offset_t lower, offset_t upper);
101*a03ca8b9SKrzysztof Kosiński 
102*a03ca8b9SKrzysztof Kosiński   // Member function pointer used to obtain a ReferenceWriter.
103*a03ca8b9SKrzysztof Kosiński   using WriterFactory = std::unique_ptr<ReferenceWriter> (Disassembler::*)(
104*a03ca8b9SKrzysztof Kosiński       MutableBufferView image);
105*a03ca8b9SKrzysztof Kosiński 
106*a03ca8b9SKrzysztof Kosiński   // RefinedGeneratorFactory and RefinedReceptorFactory don't have to be
107*a03ca8b9SKrzysztof Kosiński   // identical to GeneratorFactory and ReceptorFactory, but they must be
108*a03ca8b9SKrzysztof Kosiński   // convertible. As a result, they can be pointer to member function of a
109*a03ca8b9SKrzysztof Kosiński   // derived Disassembler.
110*a03ca8b9SKrzysztof Kosiński   template <class RefinedReaderFactory, class RefinedWriterFactory>
ReferenceGroup(ReferenceTypeTraits traits,RefinedReaderFactory reader_factory,RefinedWriterFactory writer_factory)111*a03ca8b9SKrzysztof Kosiński   ReferenceGroup(ReferenceTypeTraits traits,
112*a03ca8b9SKrzysztof Kosiński                  RefinedReaderFactory reader_factory,
113*a03ca8b9SKrzysztof Kosiński                  RefinedWriterFactory writer_factory)
114*a03ca8b9SKrzysztof Kosiński       : traits_(traits),
115*a03ca8b9SKrzysztof Kosiński         reader_factory_(static_cast<ReaderFactory>(reader_factory)),
116*a03ca8b9SKrzysztof Kosiński         writer_factory_(static_cast<WriterFactory>(writer_factory)) {}
117*a03ca8b9SKrzysztof Kosiński 
118*a03ca8b9SKrzysztof Kosiński   // Returns a reader for all references in the binary.
119*a03ca8b9SKrzysztof Kosiński   // Invalidates any other writer or reader previously obtained for |disasm|.
120*a03ca8b9SKrzysztof Kosiński   std::unique_ptr<ReferenceReader> GetReader(Disassembler* disasm) const;
121*a03ca8b9SKrzysztof Kosiński 
122*a03ca8b9SKrzysztof Kosiński   // Returns a reader for references whose bytes are entirely contained in
123*a03ca8b9SKrzysztof Kosiński   // |[lower, upper)|.
124*a03ca8b9SKrzysztof Kosiński   // Invalidates any other writer or reader previously obtained for |disasm|.
125*a03ca8b9SKrzysztof Kosiński   std::unique_ptr<ReferenceReader> GetReader(offset_t lower,
126*a03ca8b9SKrzysztof Kosiński                                              offset_t upper,
127*a03ca8b9SKrzysztof Kosiński                                              Disassembler* disasm) const;
128*a03ca8b9SKrzysztof Kosiński 
129*a03ca8b9SKrzysztof Kosiński   // Returns a writer for references in |image|, assuming that |image| was the
130*a03ca8b9SKrzysztof Kosiński   // same one initially parsed by |disasm|.
131*a03ca8b9SKrzysztof Kosiński   // Invalidates any other writer or reader previously obtained for |disasm|.
132*a03ca8b9SKrzysztof Kosiński   std::unique_ptr<ReferenceWriter> GetWriter(MutableBufferView image,
133*a03ca8b9SKrzysztof Kosiński                                              Disassembler* disasm) const;
134*a03ca8b9SKrzysztof Kosiński 
135*a03ca8b9SKrzysztof Kosiński   // Returns traits describing the reference type.
traits()136*a03ca8b9SKrzysztof Kosiński   const ReferenceTypeTraits& traits() const { return traits_; }
137*a03ca8b9SKrzysztof Kosiński 
138*a03ca8b9SKrzysztof Kosiński   // Shorthand for traits().width.
width()139*a03ca8b9SKrzysztof Kosiński   offset_t width() const { return traits().width; }
140*a03ca8b9SKrzysztof Kosiński 
141*a03ca8b9SKrzysztof Kosiński   // Shorthand for traits().type_tag.
type_tag()142*a03ca8b9SKrzysztof Kosiński   TypeTag type_tag() const { return traits().type_tag; }
143*a03ca8b9SKrzysztof Kosiński 
144*a03ca8b9SKrzysztof Kosiński   // Shorthand for traits().pool_tag.
pool_tag()145*a03ca8b9SKrzysztof Kosiński   PoolTag pool_tag() const { return traits().pool_tag; }
146*a03ca8b9SKrzysztof Kosiński 
147*a03ca8b9SKrzysztof Kosiński  private:
148*a03ca8b9SKrzysztof Kosiński   ReferenceTypeTraits traits_;
149*a03ca8b9SKrzysztof Kosiński   ReaderFactory reader_factory_ = nullptr;
150*a03ca8b9SKrzysztof Kosiński   WriterFactory writer_factory_ = nullptr;
151*a03ca8b9SKrzysztof Kosiński };
152*a03ca8b9SKrzysztof Kosiński 
153*a03ca8b9SKrzysztof Kosiński }  // namespace zucchini
154*a03ca8b9SKrzysztof Kosiński 
155*a03ca8b9SKrzysztof Kosiński #endif  // COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
156