1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
6 #define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
7 
8 #include <stdint.h>
9 
10 #include <tuple>
11 #include <vector>
12 
13 #include "components/zucchini/algorithm.h"
14 #include "components/zucchini/image_utils.h"
15 
16 namespace zucchini {
17 
18 // There are several ways to reason about addresses in an image:
19 // - Offset: Position relative to start of image.
20 // - VA (Virtual Address): Virtual memory address of a loaded image. This is
21 //   subject to relocation by the OS.
22 // - RVA (Relative Virtual Address): VA relative to some base address. This is
23 //   the preferred way to specify pointers in an image.
24 //
25 // Zucchini is primarily concerned with offsets and RVAs. Executable images like
26 // PE and ELF are organized into sections. Each section specifies offset and RVA
27 // ranges as:
28 //   {Offset start, offset size, RVA start, RVA size}.
29 // This constitutes a basic unit to translate between offsets and RVAs. Note:
30 // |offset size| < |RVA size| is possible. For example, the .bss section can can
31 // have zero-filled statically-allocated data that have no corresponding bytes
32 // on image (to save space). This poses a problem for Zucchini, which stores
33 // addresses as offsets: now we'd have "dangling RVAs" that don't map to
34 // offsets! Some ways to handling this are:
35 // 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means
36 //    some reference targets would escape detection and processing.
37 // 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data
38 //    must not be read on these fake offsets, which are only valid as target
39 //    addresses for reference matching.
40 // As for |RVA size| < |offset size|, the extra portion just gets ignored.
41 //
42 // Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to
43 // fake offsets by adding a large value. This value can be chosen as an
44 // exclusive upper bound of all offsets (i.e., image size). This allows them to
45 // be easily detected and processed as a special-case.
46 // TODO(huangs): Investigate option (1), now that the refactored code makes
47 // experimentation easier.
48 // TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t|
49 // ranges and create "fake" units to accommodate dangling RVAs. Then
50 // AddressTranslator can be simplified.
51 
52 // Virtual Address relative to some base address (RVA). There's distinction
53 // between "valid RVA" and "existent RVA":
54 // - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|.
55 // - Existent RVA: An RVA that has semantic meaning in an image, and may
56 //   translate to an offset in an image or (if a dangling RVA) a fake offset.
57 //   All existent RVAs are valid RVAs.
58 using rva_t = uint32_t;
59 // Divide by 2 to match |kOffsetBound|.
60 constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2;
61 constexpr rva_t kInvalidRva = static_cast<rva_t>(-2);
62 
63 // A utility to translate between offsets and RVAs in an image.
64 class AddressTranslator {
65  public:
66   // A basic unit for address translation, roughly maps to a section, but may
67   // be processed (e.g., merged) as an optimization.
68   struct Unit {
offset_endUnit69     offset_t offset_end() const { return offset_begin + offset_size; }
rva_endUnit70     rva_t rva_end() const { return rva_begin + rva_size; }
IsEmptyUnit71     bool IsEmpty() const {
72       // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed
73       // yet, and once it is then it's empty.
74       // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but
75       // is not empty.
76       return rva_size == 0;
77     }
CoversOffsetUnit78     bool CoversOffset(offset_t offset) const {
79       return RangeCovers(offset_begin, offset_size, offset);
80     }
CoversRvaUnit81     bool CoversRva(rva_t rva) const {
82       return RangeCovers(rva_begin, rva_size, rva);
83     }
CoversDanglingRvaUnit84     bool CoversDanglingRva(rva_t rva) const {
85       return CoversRva(rva) && rva - rva_begin >= offset_size;
86     }
87     // Assumes valid |offset| (*cannot* be fake offset).
OffsetToRvaUnsafeUnit88     rva_t OffsetToRvaUnsafe(offset_t offset) const {
89       return offset - offset_begin + rva_begin;
90     }
91     // Assumes valid |rva| (*can* be danging RVA).
RvaToOffsetUnsafeUnit92     offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const {
93       rva_t delta = rva - rva_begin;
94       return delta < offset_size ? delta + offset_begin
95                                  : fake_offset_begin + rva;
96     }
HasDanglingRvaUnit97     bool HasDanglingRva() const { return rva_size > offset_size; }
98     friend bool operator==(const Unit& a, const Unit& b) {
99       return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) ==
100              std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size);
101     }
102 
103     offset_t offset_begin;
104     offset_t offset_size;
105     rva_t rva_begin;
106     rva_t rva_size;
107   };
108 
109   // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit
110   // found, to reduce the number of OffsetToUnit() calls for clustered queries.
111   class OffsetToRvaCache {
112    public:
113     // Embeds |translator| for use. Now object lifetime is tied to |translator|
114     // lifetime.
115     explicit OffsetToRvaCache(const AddressTranslator& translator);
116     OffsetToRvaCache(const OffsetToRvaCache&) = delete;
117     const OffsetToRvaCache& operator=(const OffsetToRvaCache&) = delete;
118 
119     rva_t Convert(offset_t offset) const;
120 
121    private:
122     const AddressTranslator& translator_;
123     mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
124   };
125 
126   // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit
127   // found, to reduce the number of RvaToUnit() calls for clustered queries.
128   class RvaToOffsetCache {
129    public:
130     // Embeds |translator| for use. Now object lifetime is tied to |translator|
131     // lifetime.
132     explicit RvaToOffsetCache(const AddressTranslator& translator);
133     RvaToOffsetCache(const RvaToOffsetCache&) = delete;
134     const RvaToOffsetCache& operator=(const RvaToOffsetCache&) = delete;
135 
136     bool IsValid(rva_t rva) const;
137 
138     offset_t Convert(rva_t rva) const;
139 
140    private:
141     const AddressTranslator& translator_;
142     mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
143   };
144 
145   enum Status {
146     kSuccess = 0,
147     kErrorOverflow,
148     kErrorBadOverlap,
149     kErrorBadOverlapDanglingRva,
150     kErrorFakeOffsetBeginTooLarge,
151   };
152 
153   AddressTranslator();
154   AddressTranslator(AddressTranslator&&);
155   AddressTranslator(const AddressTranslator&) = delete;
156   const AddressTranslator& operator=(const AddressTranslator&) = delete;
157   ~AddressTranslator();
158 
159   // Consumes |units| to populate data in this class. Performs consistency
160   // checks and overlapping Units. Returns Status to indicate success.
161   Status Initialize(std::vector<Unit>&& units);
162 
163   // Returns the (possibly dangling) RVA corresponding to |offset|, or
164   // kInvalidRva if not found.
165   rva_t OffsetToRva(offset_t offset) const;
166 
167   // Returns the (possibly fake) offset corresponding to |rva|, or
168   // kInvalidOffset if not found (i.e., |rva| is non-existent).
169   offset_t RvaToOffset(rva_t rva) const;
170 
171   // For testing.
fake_offset_begin()172   offset_t fake_offset_begin() const { return fake_offset_begin_; }
173 
units_sorted_by_offset()174   const std::vector<Unit>& units_sorted_by_offset() const {
175     return units_sorted_by_offset_;
176   }
177 
units_sorted_by_rva()178   const std::vector<Unit>& units_sorted_by_rva() const {
179     return units_sorted_by_rva_;
180   }
181 
182  private:
183   // Helper to find the Unit that contains given |offset| or |rva|. Returns null
184   // if not found.
185   const Unit* OffsetToUnit(offset_t offset) const;
186   const Unit* RvaToUnit(rva_t rva) const;
187 
188   // Storage of Units. All offset ranges are non-empty and disjoint. Likewise
189   // for all RVA ranges.
190   std::vector<Unit> units_sorted_by_offset_;
191   std::vector<Unit> units_sorted_by_rva_;
192 
193   // Conversion factor to translate between dangling RVAs and fake offsets.
194   offset_t fake_offset_begin_;
195 };
196 
197 }  // namespace zucchini
198 
199 #endif  // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
200