xref: /aosp_15_r20/external/zucchini/disassembler_ztf.h (revision a03ca8b91e029cd15055c20c78c2e087c84792e4)
1*a03ca8b9SKrzysztof Kosiński // Copyright 2018 The Chromium Authors. All rights reserved.
2*a03ca8b9SKrzysztof Kosiński // Use of this source code is governed by a BSD-style license that can be
3*a03ca8b9SKrzysztof Kosiński // found in the LICENSE file.
4*a03ca8b9SKrzysztof Kosiński 
5*a03ca8b9SKrzysztof Kosiński #ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_
6*a03ca8b9SKrzysztof Kosiński #define COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_
7*a03ca8b9SKrzysztof Kosiński 
8*a03ca8b9SKrzysztof Kosiński #include <stdint.h>
9*a03ca8b9SKrzysztof Kosiński #include <stdlib.h>
10*a03ca8b9SKrzysztof Kosiński 
11*a03ca8b9SKrzysztof Kosiński #include <memory>
12*a03ca8b9SKrzysztof Kosiński #include <optional>
13*a03ca8b9SKrzysztof Kosiński #include <string>
14*a03ca8b9SKrzysztof Kosiński #include <vector>
15*a03ca8b9SKrzysztof Kosiński 
16*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/disassembler.h"
17*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/image_utils.h"
18*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/type_ztf.h"
19*a03ca8b9SKrzysztof Kosiński 
20*a03ca8b9SKrzysztof Kosiński namespace zucchini {
21*a03ca8b9SKrzysztof Kosiński 
22*a03ca8b9SKrzysztof Kosiński // Disassembler for text based files. This file format is supported for
23*a03ca8b9SKrzysztof Kosiński // debugging Zucchini and is not intended for production usage.
24*a03ca8b9SKrzysztof Kosiński //
25*a03ca8b9SKrzysztof Kosiński // A valid Zucchini Text Format (ZTF) file is specified as follows:
26*a03ca8b9SKrzysztof Kosiński //
27*a03ca8b9SKrzysztof Kosiński // Header:
28*a03ca8b9SKrzysztof Kosiński //   The first four bytes must be - 'Z' 'T' 'x' 't'
29*a03ca8b9SKrzysztof Kosiński // Footer:
30*a03ca8b9SKrzysztof Kosiński //   The last five bytes must be  - 't' 'x' 'T' 'Z' '\n'
31*a03ca8b9SKrzysztof Kosiński //   (note that terminating new line is required).
32*a03ca8b9SKrzysztof Kosiński // Content:
33*a03ca8b9SKrzysztof Kosiński //   The content can be any sequence of printable ASCII characters and new line
34*a03ca8b9SKrzysztof Kosiński //   (but not carriage return). This excludes the sequence that comprises the
35*a03ca8b9SKrzysztof Kosiński //   Footer.
36*a03ca8b9SKrzysztof Kosiński // References:
37*a03ca8b9SKrzysztof Kosiński //   A reference is either Absolute or Relative. All references must begin and
38*a03ca8b9SKrzysztof Kosiński //   end with a pair of enclosing characters <open>, <close>. The options are:
39*a03ca8b9SKrzysztof Kosiński //     - Angles:      '<' and '>'
40*a03ca8b9SKrzysztof Kosiński //     - Braces:      '{' and '}'
41*a03ca8b9SKrzysztof Kosiński //     - Brackets:    '[' and ']'
42*a03ca8b9SKrzysztof Kosiński //     - Parentheses: '(' and ')'
43*a03ca8b9SKrzysztof Kosiński //
44*a03ca8b9SKrzysztof Kosiński //   A reference contains three items:
45*a03ca8b9SKrzysztof Kosiński //     - A line number       <line>
46*a03ca8b9SKrzysztof Kosiński //     - A delimiter     ',' <delimiter>
47*a03ca8b9SKrzysztof Kosiński //     - A column number     <col>
48*a03ca8b9SKrzysztof Kosiński //     <line> and <col> may contain 1-3 digits and both must contain the same
49*a03ca8b9SKrzysztof Kosiński //     number of digits. If a number is too short then it can be left-padded
50*a03ca8b9SKrzysztof Kosiński //     with '0'.
51*a03ca8b9SKrzysztof Kosiński //
52*a03ca8b9SKrzysztof Kosiński //   For Absolute references, <line> and <col> are 1-based (i.e. positive)
53*a03ca8b9SKrzysztof Kosiński //   index of line and column numbers of a character in the ZTF. This follows
54*a03ca8b9SKrzysztof Kosiński //   standard convention for text editors. Note that "\n" is considered to be
55*a03ca8b9SKrzysztof Kosiński //   part of a preceding line.
56*a03ca8b9SKrzysztof Kosiński //
57*a03ca8b9SKrzysztof Kosiński //     <open><line><delimiter><col><close>
58*a03ca8b9SKrzysztof Kosiński //
59*a03ca8b9SKrzysztof Kosiński //   For Relative references, <line> and <col> are integer offsets deltas of the
60*a03ca8b9SKrzysztof Kosiński //   target's (absolute) line and column relative to the line and column of the
61*a03ca8b9SKrzysztof Kosiński //   reference's first byte (i.e. <open>). Relative references have <sign> ('+'
62*a03ca8b9SKrzysztof Kosiński //   or '-') before <line> and <col>. For the special case of "0", "00", etc.,
63*a03ca8b9SKrzysztof Kosiński //   <sign> must be "+".
64*a03ca8b9SKrzysztof Kosiński //
65*a03ca8b9SKrzysztof Kosiński //     <open><sign><line><delimiter><sign><col><close>
66*a03ca8b9SKrzysztof Kosiński //
67*a03ca8b9SKrzysztof Kosiński //   If a reference points outside the target either in writing or reading it is
68*a03ca8b9SKrzysztof Kosiński //   considered invalid and ignored. Similarly if it overflows a line. i.e. if a
69*a03ca8b9SKrzysztof Kosiński //   line is 10 characters long and a references targets character 11 of that
70*a03ca8b9SKrzysztof Kosiński //   line it is rejected. Lines are delimited with '\n' which is counted toward
71*a03ca8b9SKrzysztof Kosiński //   the line length.
72*a03ca8b9SKrzysztof Kosiński //
73*a03ca8b9SKrzysztof Kosiński //   If a reference is to be written that would overwrite a '\n' character it is
74*a03ca8b9SKrzysztof Kosiński //   ignored as this would break all other line values.
75*a03ca8b9SKrzysztof Kosiński 
76*a03ca8b9SKrzysztof Kosiński enum : size_t { kMaxDigitCount = 3 };
77*a03ca8b9SKrzysztof Kosiński 
78*a03ca8b9SKrzysztof Kosiński // Helper class for translating among offset_t, ztf::LineCol and
79*a03ca8b9SKrzysztof Kosiński // ztf::DeltaLineCol.
80*a03ca8b9SKrzysztof Kosiński class ZtfTranslator {
81*a03ca8b9SKrzysztof Kosiński  public:
82*a03ca8b9SKrzysztof Kosiński   ZtfTranslator();
83*a03ca8b9SKrzysztof Kosiński   ZtfTranslator(const ZtfTranslator&) = delete;
84*a03ca8b9SKrzysztof Kosiński   const ZtfTranslator& operator=(const ZtfTranslator&) = delete;
85*a03ca8b9SKrzysztof Kosiński   ~ZtfTranslator();
86*a03ca8b9SKrzysztof Kosiński 
87*a03ca8b9SKrzysztof Kosiński   // Initializes |line_starts_| with the contents of |image|.
88*a03ca8b9SKrzysztof Kosiński   bool Init(ConstBufferView image);
89*a03ca8b9SKrzysztof Kosiński 
90*a03ca8b9SKrzysztof Kosiński   // Checks if |lc| is a valid location in the file.
91*a03ca8b9SKrzysztof Kosiński   bool IsValid(ztf::LineCol lc) const;
92*a03ca8b9SKrzysztof Kosiński 
93*a03ca8b9SKrzysztof Kosiński   // Checks if |dlc| relative to |offset| is a valid location in the file.
94*a03ca8b9SKrzysztof Kosiński   bool IsValid(offset_t offset, ztf::DeltaLineCol dlc) const;
95*a03ca8b9SKrzysztof Kosiński 
96*a03ca8b9SKrzysztof Kosiński   // Returns the offset corresponding to |line_col| if it is valid. Otherwise
97*a03ca8b9SKrzysztof Kosiński   // returns |kInvalidOffset|.
98*a03ca8b9SKrzysztof Kosiński   offset_t LineColToOffset(ztf::LineCol line_col) const;
99*a03ca8b9SKrzysztof Kosiński 
100*a03ca8b9SKrzysztof Kosiński   // Returns the ztf::LineCol for an |offset| if it is valid. Otherwise returns
101*a03ca8b9SKrzysztof Kosiński   // std::nullopt.
102*a03ca8b9SKrzysztof Kosiński   std::optional<ztf::LineCol> OffsetToLineCol(offset_t offset) const;
103*a03ca8b9SKrzysztof Kosiński 
104*a03ca8b9SKrzysztof Kosiński  private:
105*a03ca8b9SKrzysztof Kosiński   // Returns an iterator to the range containing |offset|. Which is represented
106*a03ca8b9SKrzysztof Kosiński   // by the starting offset. The next element will contain the upper bound of
107*a03ca8b9SKrzysztof Kosiński   // the range.
108*a03ca8b9SKrzysztof Kosiński   std::vector<offset_t>::const_iterator SearchForRange(offset_t offset) const;
109*a03ca8b9SKrzysztof Kosiński 
110*a03ca8b9SKrzysztof Kosiński   // Returns the length of a 1-indexed line. The caller is expected to check
111*a03ca8b9SKrzysztof Kosiński   // that the requested line exists.
112*a03ca8b9SKrzysztof Kosiński   offset_t LineLength(uint16_t line) const;
113*a03ca8b9SKrzysztof Kosiński 
NumLines()114*a03ca8b9SKrzysztof Kosiński   offset_t NumLines() const {
115*a03ca8b9SKrzysztof Kosiński     return static_cast<offset_t>(line_starts_.size() - 1);
116*a03ca8b9SKrzysztof Kosiński   }
117*a03ca8b9SKrzysztof Kosiński 
118*a03ca8b9SKrzysztof Kosiński   // |line_starts_| is a sorted list of each line's starting offset, along with
119*a03ca8b9SKrzysztof Kosiński   // the image size as the sentinel; it looks like {0, ..., image.size}.
120*a03ca8b9SKrzysztof Kosiński   std::vector<offset_t> line_starts_;
121*a03ca8b9SKrzysztof Kosiński };
122*a03ca8b9SKrzysztof Kosiński 
123*a03ca8b9SKrzysztof Kosiński // Disassembler for Zucchini Text Format (ZTF).
124*a03ca8b9SKrzysztof Kosiński class DisassemblerZtf : public Disassembler {
125*a03ca8b9SKrzysztof Kosiński  public:
126*a03ca8b9SKrzysztof Kosiński   static constexpr uint16_t kVersion = 1;
127*a03ca8b9SKrzysztof Kosiński 
128*a03ca8b9SKrzysztof Kosiński   // Target Pools
129*a03ca8b9SKrzysztof Kosiński   enum ReferencePool : uint8_t {
130*a03ca8b9SKrzysztof Kosiński     kAngles,      // <>
131*a03ca8b9SKrzysztof Kosiński     kBraces,      // {}
132*a03ca8b9SKrzysztof Kosiński     kBrackets,    // []
133*a03ca8b9SKrzysztof Kosiński     kParentheses  // ()
134*a03ca8b9SKrzysztof Kosiński   };
135*a03ca8b9SKrzysztof Kosiński 
136*a03ca8b9SKrzysztof Kosiński   // Type breakdown. Should contain all permutations of ReferencePool, Abs|Rel
137*a03ca8b9SKrzysztof Kosiński   // and the possible number of digits (1-3).
138*a03ca8b9SKrzysztof Kosiński   enum ReferenceType : uint8_t {
139*a03ca8b9SKrzysztof Kosiński     kAnglesAbs1,
140*a03ca8b9SKrzysztof Kosiński     kAnglesAbs2,
141*a03ca8b9SKrzysztof Kosiński     kAnglesAbs3,
142*a03ca8b9SKrzysztof Kosiński     kAnglesRel1,
143*a03ca8b9SKrzysztof Kosiński     kAnglesRel2,
144*a03ca8b9SKrzysztof Kosiński     kAnglesRel3,
145*a03ca8b9SKrzysztof Kosiński     kBracesAbs1,
146*a03ca8b9SKrzysztof Kosiński     kBracesAbs2,
147*a03ca8b9SKrzysztof Kosiński     kBracesAbs3,
148*a03ca8b9SKrzysztof Kosiński     kBracesRel1,
149*a03ca8b9SKrzysztof Kosiński     kBracesRel2,
150*a03ca8b9SKrzysztof Kosiński     kBracesRel3,
151*a03ca8b9SKrzysztof Kosiński     kBracketsAbs1,
152*a03ca8b9SKrzysztof Kosiński     kBracketsAbs2,
153*a03ca8b9SKrzysztof Kosiński     kBracketsAbs3,
154*a03ca8b9SKrzysztof Kosiński     kBracketsRel1,
155*a03ca8b9SKrzysztof Kosiński     kBracketsRel2,
156*a03ca8b9SKrzysztof Kosiński     kBracketsRel3,
157*a03ca8b9SKrzysztof Kosiński     kParenthesesAbs1,
158*a03ca8b9SKrzysztof Kosiński     kParenthesesAbs2,
159*a03ca8b9SKrzysztof Kosiński     kParenthesesAbs3,
160*a03ca8b9SKrzysztof Kosiński     kParenthesesRel1,
161*a03ca8b9SKrzysztof Kosiński     kParenthesesRel2,
162*a03ca8b9SKrzysztof Kosiński     kParenthesesRel3,
163*a03ca8b9SKrzysztof Kosiński     kNumTypes
164*a03ca8b9SKrzysztof Kosiński   };
165*a03ca8b9SKrzysztof Kosiński 
166*a03ca8b9SKrzysztof Kosiński   DisassemblerZtf();
167*a03ca8b9SKrzysztof Kosiński   DisassemblerZtf(const DisassemblerZtf&) = delete;
168*a03ca8b9SKrzysztof Kosiński   const DisassemblerZtf& operator=(const DisassemblerZtf&) = delete;
169*a03ca8b9SKrzysztof Kosiński   ~DisassemblerZtf() override;
170*a03ca8b9SKrzysztof Kosiński 
171*a03ca8b9SKrzysztof Kosiński   // Applies quick checks to determine if |image| *may* point to the start of a
172*a03ca8b9SKrzysztof Kosiński   // ZTF file. Returns true on success.
173*a03ca8b9SKrzysztof Kosiński   static bool QuickDetect(ConstBufferView image);
174*a03ca8b9SKrzysztof Kosiński 
175*a03ca8b9SKrzysztof Kosiński   // Disassembler:
176*a03ca8b9SKrzysztof Kosiński   ExecutableType GetExeType() const override;
177*a03ca8b9SKrzysztof Kosiński   std::string GetExeTypeString() const override;
178*a03ca8b9SKrzysztof Kosiński   std::vector<ReferenceGroup> MakeReferenceGroups() const override;
179*a03ca8b9SKrzysztof Kosiński 
180*a03ca8b9SKrzysztof Kosiński   // Reference Readers, templated to allow configurable digit count and pool.
181*a03ca8b9SKrzysztof Kosiński   template <uint8_t digits, ReferencePool pool>
182*a03ca8b9SKrzysztof Kosiński   std::unique_ptr<ReferenceReader> MakeReadAbs(offset_t lo, offset_t hi);
183*a03ca8b9SKrzysztof Kosiński   template <uint8_t digits, ReferencePool pool>
184*a03ca8b9SKrzysztof Kosiński   std::unique_ptr<ReferenceReader> MakeReadRel(offset_t lo, offset_t hi);
185*a03ca8b9SKrzysztof Kosiński 
186*a03ca8b9SKrzysztof Kosiński   // Reference Writers, templated to allow configurable digit count and pool.
187*a03ca8b9SKrzysztof Kosiński   template <uint8_t digits, ReferencePool pool>
188*a03ca8b9SKrzysztof Kosiński   std::unique_ptr<ReferenceWriter> MakeWriteAbs(MutableBufferView image);
189*a03ca8b9SKrzysztof Kosiński   template <uint8_t digits, ReferencePool pool>
190*a03ca8b9SKrzysztof Kosiński   std::unique_ptr<ReferenceWriter> MakeWriteRel(MutableBufferView image);
191*a03ca8b9SKrzysztof Kosiński 
192*a03ca8b9SKrzysztof Kosiński  private:
193*a03ca8b9SKrzysztof Kosiński   friend Disassembler;
194*a03ca8b9SKrzysztof Kosiński 
195*a03ca8b9SKrzysztof Kosiński   // Disassembler:
196*a03ca8b9SKrzysztof Kosiński   bool Parse(ConstBufferView image) override;
197*a03ca8b9SKrzysztof Kosiński 
198*a03ca8b9SKrzysztof Kosiński   ZtfTranslator translator_;
199*a03ca8b9SKrzysztof Kosiński };
200*a03ca8b9SKrzysztof Kosiński 
201*a03ca8b9SKrzysztof Kosiński }  // namespace zucchini
202*a03ca8b9SKrzysztof Kosiński 
203*a03ca8b9SKrzysztof Kosiński #endif  // COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_
204