xref: /aosp_15_r20/external/zucchini/disassembler.h (revision a03ca8b91e029cd15055c20c78c2e087c84792e4)
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
6 #define COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
7 
8 #include <stddef.h>
9 
10 #include <memory>
11 #include <optional>
12 #include <string>
13 #include <vector>
14 
15 #include "components/zucchini/buffer_view.h"
16 #include "components/zucchini/image_utils.h"
17 
18 namespace zucchini {
19 
20 // A vacuous ReferenceReader that produces no references.
21 class EmptyReferenceReader : public ReferenceReader {
22  public:
23   std::optional<Reference> GetNext() override;
24 };
25 
26 // A vacuous EmptyReferenceWriter that does not write.
27 class EmptyReferenceWriter : public ReferenceWriter {
28  public:
29   void PutNext(Reference reference) override;
30 };
31 
32 // Disassembler needs to be declared before ReferenceGroup because the latter
33 // contains member pointers based on the former, and we use a compiler flag,
34 // -fcomplete-member-pointers, which enforces that member pointer base types are
35 // complete. This flag helps prevent us from running into problems in the
36 // Microsoft C++ ABI (see https://crbug.com/847724).
37 
38 class ReferenceGroup;
39 
40 // A Disassembler is used to encapsulate architecture specific operations, to:
41 // - Describe types of references found in the architecture using traits.
42 // - Extract references contained in an image file.
43 // - Correct target for some references.
44 class Disassembler {
45  public:
46   // Attempts to parse |image| and create an architecture-specifc Disassembler,
47   // as determined by DIS, which is inherited from Disassembler. Returns an
48   // instance of DIS if successful, and null otherwise.
49   template <class DIS>
Make(ConstBufferView image)50   static std::unique_ptr<DIS> Make(ConstBufferView image) {
51     auto disasm = std::make_unique<DIS>();
52     if (!disasm->Parse(image))
53       return nullptr;
54     return disasm;
55   }
56 
57   Disassembler(const Disassembler&) = delete;
58   const Disassembler& operator=(const Disassembler&) = delete;
59   virtual ~Disassembler();
60 
61   // Returns the type of executable handled by the Disassembler.
62   virtual ExecutableType GetExeType() const = 0;
63 
64   // Returns a more detailed description of the executable type.
65   virtual std::string GetExeTypeString() const = 0;
66 
67   // Creates and returns a vector that contains all groups of references.
68   // Groups must be aggregated by pool.
69   virtual std::vector<ReferenceGroup> MakeReferenceGroups() const = 0;
70 
image()71   ConstBufferView image() const { return image_; }
size()72   size_t size() const { return image_.size(); }
73 
num_equivalence_iterations()74   int num_equivalence_iterations() const { return num_equivalence_iterations_; }
75 
76  protected:
77   explicit Disassembler(int num_equivalence_iterations);
78 
79   // Parses |image| and initializes internal states. Returns true on success.
80   // This must be called once and before any other operation.
81   virtual bool Parse(ConstBufferView image) = 0;
82 
83   // Raw image data. After Parse(), a Disassembler should shrink this to contain
84   // only the portion containing the executable file it recognizes.
85   ConstBufferView image_;
86 
87   // The number of iterations to run for equivalence map generation. This should
88   // roughly be the max length of reference indirection chains.
89   int num_equivalence_iterations_;
90 };
91 
92 // A ReferenceGroup is associated with a specific |type| and has convenience
93 // methods to obtain readers and writers for that type. A ReferenceGroup does
94 // not store references; it is a lightweight class that communicates with the
95 // disassembler to operate on them.
96 class ReferenceGroup {
97  public:
98   // Member function pointer used to obtain a ReferenceReader.
99   using ReaderFactory = std::unique_ptr<ReferenceReader> (
100       Disassembler::*)(offset_t lower, offset_t upper);
101 
102   // Member function pointer used to obtain a ReferenceWriter.
103   using WriterFactory = std::unique_ptr<ReferenceWriter> (Disassembler::*)(
104       MutableBufferView image);
105 
106   // RefinedGeneratorFactory and RefinedReceptorFactory don't have to be
107   // identical to GeneratorFactory and ReceptorFactory, but they must be
108   // convertible. As a result, they can be pointer to member function of a
109   // derived Disassembler.
110   template <class RefinedReaderFactory, class RefinedWriterFactory>
ReferenceGroup(ReferenceTypeTraits traits,RefinedReaderFactory reader_factory,RefinedWriterFactory writer_factory)111   ReferenceGroup(ReferenceTypeTraits traits,
112                  RefinedReaderFactory reader_factory,
113                  RefinedWriterFactory writer_factory)
114       : traits_(traits),
115         reader_factory_(static_cast<ReaderFactory>(reader_factory)),
116         writer_factory_(static_cast<WriterFactory>(writer_factory)) {}
117 
118   // Returns a reader for all references in the binary.
119   // Invalidates any other writer or reader previously obtained for |disasm|.
120   std::unique_ptr<ReferenceReader> GetReader(Disassembler* disasm) const;
121 
122   // Returns a reader for references whose bytes are entirely contained in
123   // |[lower, upper)|.
124   // Invalidates any other writer or reader previously obtained for |disasm|.
125   std::unique_ptr<ReferenceReader> GetReader(offset_t lower,
126                                              offset_t upper,
127                                              Disassembler* disasm) const;
128 
129   // Returns a writer for references in |image|, assuming that |image| was the
130   // same one initially parsed by |disasm|.
131   // Invalidates any other writer or reader previously obtained for |disasm|.
132   std::unique_ptr<ReferenceWriter> GetWriter(MutableBufferView image,
133                                              Disassembler* disasm) const;
134 
135   // Returns traits describing the reference type.
traits()136   const ReferenceTypeTraits& traits() const { return traits_; }
137 
138   // Shorthand for traits().width.
width()139   offset_t width() const { return traits().width; }
140 
141   // Shorthand for traits().type_tag.
type_tag()142   TypeTag type_tag() const { return traits().type_tag; }
143 
144   // Shorthand for traits().pool_tag.
pool_tag()145   PoolTag pool_tag() const { return traits().pool_tag; }
146 
147  private:
148   ReferenceTypeTraits traits_;
149   ReaderFactory reader_factory_ = nullptr;
150   WriterFactory writer_factory_ = nullptr;
151 };
152 
153 }  // namespace zucchini
154 
155 #endif  // COMPONENTS_ZUCCHINI_DISASSEMBLER_H_
156