1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_
6 #define COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_
7 
8 #include <stdint.h>
9 #include <stdlib.h>
10 
11 #include <memory>
12 #include <optional>
13 #include <string>
14 #include <vector>
15 
16 #include "components/zucchini/disassembler.h"
17 #include "components/zucchini/image_utils.h"
18 #include "components/zucchini/type_ztf.h"
19 
20 namespace zucchini {
21 
22 // Disassembler for text based files. This file format is supported for
23 // debugging Zucchini and is not intended for production usage.
24 //
25 // A valid Zucchini Text Format (ZTF) file is specified as follows:
26 //
27 // Header:
28 //   The first four bytes must be - 'Z' 'T' 'x' 't'
29 // Footer:
30 //   The last five bytes must be  - 't' 'x' 'T' 'Z' '\n'
31 //   (note that terminating new line is required).
32 // Content:
33 //   The content can be any sequence of printable ASCII characters and new line
34 //   (but not carriage return). This excludes the sequence that comprises the
35 //   Footer.
36 // References:
37 //   A reference is either Absolute or Relative. All references must begin and
38 //   end with a pair of enclosing characters <open>, <close>. The options are:
39 //     - Angles:      '<' and '>'
40 //     - Braces:      '{' and '}'
41 //     - Brackets:    '[' and ']'
42 //     - Parentheses: '(' and ')'
43 //
44 //   A reference contains three items:
45 //     - A line number       <line>
46 //     - A delimiter     ',' <delimiter>
47 //     - A column number     <col>
48 //     <line> and <col> may contain 1-3 digits and both must contain the same
49 //     number of digits. If a number is too short then it can be left-padded
50 //     with '0'.
51 //
52 //   For Absolute references, <line> and <col> are 1-based (i.e. positive)
53 //   index of line and column numbers of a character in the ZTF. This follows
54 //   standard convention for text editors. Note that "\n" is considered to be
55 //   part of a preceding line.
56 //
57 //     <open><line><delimiter><col><close>
58 //
59 //   For Relative references, <line> and <col> are integer offsets deltas of the
60 //   target's (absolute) line and column relative to the line and column of the
61 //   reference's first byte (i.e. <open>). Relative references have <sign> ('+'
62 //   or '-') before <line> and <col>. For the special case of "0", "00", etc.,
63 //   <sign> must be "+".
64 //
65 //     <open><sign><line><delimiter><sign><col><close>
66 //
67 //   If a reference points outside the target either in writing or reading it is
68 //   considered invalid and ignored. Similarly if it overflows a line. i.e. if a
69 //   line is 10 characters long and a references targets character 11 of that
70 //   line it is rejected. Lines are delimited with '\n' which is counted toward
71 //   the line length.
72 //
73 //   If a reference is to be written that would overwrite a '\n' character it is
74 //   ignored as this would break all other line values.
75 
76 enum : size_t { kMaxDigitCount = 3 };
77 
78 // Helper class for translating among offset_t, ztf::LineCol and
79 // ztf::DeltaLineCol.
80 class ZtfTranslator {
81  public:
82   ZtfTranslator();
83   ZtfTranslator(const ZtfTranslator&) = delete;
84   const ZtfTranslator& operator=(const ZtfTranslator&) = delete;
85   ~ZtfTranslator();
86 
87   // Initializes |line_starts_| with the contents of |image|.
88   bool Init(ConstBufferView image);
89 
90   // Checks if |lc| is a valid location in the file.
91   bool IsValid(ztf::LineCol lc) const;
92 
93   // Checks if |dlc| relative to |offset| is a valid location in the file.
94   bool IsValid(offset_t offset, ztf::DeltaLineCol dlc) const;
95 
96   // Returns the offset corresponding to |line_col| if it is valid. Otherwise
97   // returns |kInvalidOffset|.
98   offset_t LineColToOffset(ztf::LineCol line_col) const;
99 
100   // Returns the ztf::LineCol for an |offset| if it is valid. Otherwise returns
101   // std::nullopt.
102   std::optional<ztf::LineCol> OffsetToLineCol(offset_t offset) const;
103 
104  private:
105   // Returns an iterator to the range containing |offset|. Which is represented
106   // by the starting offset. The next element will contain the upper bound of
107   // the range.
108   std::vector<offset_t>::const_iterator SearchForRange(offset_t offset) const;
109 
110   // Returns the length of a 1-indexed line. The caller is expected to check
111   // that the requested line exists.
112   offset_t LineLength(uint16_t line) const;
113 
NumLines()114   offset_t NumLines() const {
115     return static_cast<offset_t>(line_starts_.size() - 1);
116   }
117 
118   // |line_starts_| is a sorted list of each line's starting offset, along with
119   // the image size as the sentinel; it looks like {0, ..., image.size}.
120   std::vector<offset_t> line_starts_;
121 };
122 
123 // Disassembler for Zucchini Text Format (ZTF).
124 class DisassemblerZtf : public Disassembler {
125  public:
126   static constexpr uint16_t kVersion = 1;
127 
128   // Target Pools
129   enum ReferencePool : uint8_t {
130     kAngles,      // <>
131     kBraces,      // {}
132     kBrackets,    // []
133     kParentheses  // ()
134   };
135 
136   // Type breakdown. Should contain all permutations of ReferencePool, Abs|Rel
137   // and the possible number of digits (1-3).
138   enum ReferenceType : uint8_t {
139     kAnglesAbs1,
140     kAnglesAbs2,
141     kAnglesAbs3,
142     kAnglesRel1,
143     kAnglesRel2,
144     kAnglesRel3,
145     kBracesAbs1,
146     kBracesAbs2,
147     kBracesAbs3,
148     kBracesRel1,
149     kBracesRel2,
150     kBracesRel3,
151     kBracketsAbs1,
152     kBracketsAbs2,
153     kBracketsAbs3,
154     kBracketsRel1,
155     kBracketsRel2,
156     kBracketsRel3,
157     kParenthesesAbs1,
158     kParenthesesAbs2,
159     kParenthesesAbs3,
160     kParenthesesRel1,
161     kParenthesesRel2,
162     kParenthesesRel3,
163     kNumTypes
164   };
165 
166   DisassemblerZtf();
167   DisassemblerZtf(const DisassemblerZtf&) = delete;
168   const DisassemblerZtf& operator=(const DisassemblerZtf&) = delete;
169   ~DisassemblerZtf() override;
170 
171   // Applies quick checks to determine if |image| *may* point to the start of a
172   // ZTF file. Returns true on success.
173   static bool QuickDetect(ConstBufferView image);
174 
175   // Disassembler:
176   ExecutableType GetExeType() const override;
177   std::string GetExeTypeString() const override;
178   std::vector<ReferenceGroup> MakeReferenceGroups() const override;
179 
180   // Reference Readers, templated to allow configurable digit count and pool.
181   template <uint8_t digits, ReferencePool pool>
182   std::unique_ptr<ReferenceReader> MakeReadAbs(offset_t lo, offset_t hi);
183   template <uint8_t digits, ReferencePool pool>
184   std::unique_ptr<ReferenceReader> MakeReadRel(offset_t lo, offset_t hi);
185 
186   // Reference Writers, templated to allow configurable digit count and pool.
187   template <uint8_t digits, ReferencePool pool>
188   std::unique_ptr<ReferenceWriter> MakeWriteAbs(MutableBufferView image);
189   template <uint8_t digits, ReferencePool pool>
190   std::unique_ptr<ReferenceWriter> MakeWriteRel(MutableBufferView image);
191 
192  private:
193   friend Disassembler;
194 
195   // Disassembler:
196   bool Parse(ConstBufferView image) override;
197 
198   ZtfTranslator translator_;
199 };
200 
201 }  // namespace zucchini
202 
203 #endif  // COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_
204