1 // Copyright 2017 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ 6 #define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ 7 8 #include <stdint.h> 9 10 #include <tuple> 11 #include <vector> 12 13 #include "components/zucchini/algorithm.h" 14 #include "components/zucchini/image_utils.h" 15 16 namespace zucchini { 17 18 // There are several ways to reason about addresses in an image: 19 // - Offset: Position relative to start of image. 20 // - VA (Virtual Address): Virtual memory address of a loaded image. This is 21 // subject to relocation by the OS. 22 // - RVA (Relative Virtual Address): VA relative to some base address. This is 23 // the preferred way to specify pointers in an image. 24 // 25 // Zucchini is primarily concerned with offsets and RVAs. Executable images like 26 // PE and ELF are organized into sections. Each section specifies offset and RVA 27 // ranges as: 28 // {Offset start, offset size, RVA start, RVA size}. 29 // This constitutes a basic unit to translate between offsets and RVAs. Note: 30 // |offset size| < |RVA size| is possible. For example, the .bss section can can 31 // have zero-filled statically-allocated data that have no corresponding bytes 32 // on image (to save space). This poses a problem for Zucchini, which stores 33 // addresses as offsets: now we'd have "dangling RVAs" that don't map to 34 // offsets! Some ways to handling this are: 35 // 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means 36 // some reference targets would escape detection and processing. 37 // 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data 38 // must not be read on these fake offsets, which are only valid as target 39 // addresses for reference matching. 40 // As for |RVA size| < |offset size|, the extra portion just gets ignored. 41 // 42 // Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to 43 // fake offsets by adding a large value. This value can be chosen as an 44 // exclusive upper bound of all offsets (i.e., image size). This allows them to 45 // be easily detected and processed as a special-case. 46 // TODO(huangs): Investigate option (1), now that the refactored code makes 47 // experimentation easier. 48 // TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t| 49 // ranges and create "fake" units to accommodate dangling RVAs. Then 50 // AddressTranslator can be simplified. 51 52 // Virtual Address relative to some base address (RVA). There's distinction 53 // between "valid RVA" and "existent RVA": 54 // - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|. 55 // - Existent RVA: An RVA that has semantic meaning in an image, and may 56 // translate to an offset in an image or (if a dangling RVA) a fake offset. 57 // All existent RVAs are valid RVAs. 58 using rva_t = uint32_t; 59 // Divide by 2 to match |kOffsetBound|. 60 constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2; 61 constexpr rva_t kInvalidRva = static_cast<rva_t>(-2); 62 63 // A utility to translate between offsets and RVAs in an image. 64 class AddressTranslator { 65 public: 66 // A basic unit for address translation, roughly maps to a section, but may 67 // be processed (e.g., merged) as an optimization. 68 struct Unit { offset_endUnit69 offset_t offset_end() const { return offset_begin + offset_size; } rva_endUnit70 rva_t rva_end() const { return rva_begin + rva_size; } IsEmptyUnit71 bool IsEmpty() const { 72 // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed 73 // yet, and once it is then it's empty. 74 // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but 75 // is not empty. 76 return rva_size == 0; 77 } CoversOffsetUnit78 bool CoversOffset(offset_t offset) const { 79 return RangeCovers(offset_begin, offset_size, offset); 80 } CoversRvaUnit81 bool CoversRva(rva_t rva) const { 82 return RangeCovers(rva_begin, rva_size, rva); 83 } CoversDanglingRvaUnit84 bool CoversDanglingRva(rva_t rva) const { 85 return CoversRva(rva) && rva - rva_begin >= offset_size; 86 } 87 // Assumes valid |offset| (*cannot* be fake offset). OffsetToRvaUnsafeUnit88 rva_t OffsetToRvaUnsafe(offset_t offset) const { 89 return offset - offset_begin + rva_begin; 90 } 91 // Assumes valid |rva| (*can* be danging RVA). RvaToOffsetUnsafeUnit92 offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const { 93 rva_t delta = rva - rva_begin; 94 return delta < offset_size ? delta + offset_begin 95 : fake_offset_begin + rva; 96 } HasDanglingRvaUnit97 bool HasDanglingRva() const { return rva_size > offset_size; } 98 friend bool operator==(const Unit& a, const Unit& b) { 99 return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) == 100 std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size); 101 } 102 103 offset_t offset_begin; 104 offset_t offset_size; 105 rva_t rva_begin; 106 rva_t rva_size; 107 }; 108 109 // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit 110 // found, to reduce the number of OffsetToUnit() calls for clustered queries. 111 class OffsetToRvaCache { 112 public: 113 // Embeds |translator| for use. Now object lifetime is tied to |translator| 114 // lifetime. 115 explicit OffsetToRvaCache(const AddressTranslator& translator); 116 OffsetToRvaCache(const OffsetToRvaCache&) = delete; 117 const OffsetToRvaCache& operator=(const OffsetToRvaCache&) = delete; 118 119 rva_t Convert(offset_t offset) const; 120 121 private: 122 const AddressTranslator& translator_; 123 mutable const AddressTranslator::Unit* cached_unit_ = nullptr; 124 }; 125 126 // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit 127 // found, to reduce the number of RvaToUnit() calls for clustered queries. 128 class RvaToOffsetCache { 129 public: 130 // Embeds |translator| for use. Now object lifetime is tied to |translator| 131 // lifetime. 132 explicit RvaToOffsetCache(const AddressTranslator& translator); 133 RvaToOffsetCache(const RvaToOffsetCache&) = delete; 134 const RvaToOffsetCache& operator=(const RvaToOffsetCache&) = delete; 135 136 bool IsValid(rva_t rva) const; 137 138 offset_t Convert(rva_t rva) const; 139 140 private: 141 const AddressTranslator& translator_; 142 mutable const AddressTranslator::Unit* cached_unit_ = nullptr; 143 }; 144 145 enum Status { 146 kSuccess = 0, 147 kErrorOverflow, 148 kErrorBadOverlap, 149 kErrorBadOverlapDanglingRva, 150 kErrorFakeOffsetBeginTooLarge, 151 }; 152 153 AddressTranslator(); 154 AddressTranslator(AddressTranslator&&); 155 AddressTranslator(const AddressTranslator&) = delete; 156 const AddressTranslator& operator=(const AddressTranslator&) = delete; 157 ~AddressTranslator(); 158 159 // Consumes |units| to populate data in this class. Performs consistency 160 // checks and overlapping Units. Returns Status to indicate success. 161 Status Initialize(std::vector<Unit>&& units); 162 163 // Returns the (possibly dangling) RVA corresponding to |offset|, or 164 // kInvalidRva if not found. 165 rva_t OffsetToRva(offset_t offset) const; 166 167 // Returns the (possibly fake) offset corresponding to |rva|, or 168 // kInvalidOffset if not found (i.e., |rva| is non-existent). 169 offset_t RvaToOffset(rva_t rva) const; 170 171 // For testing. fake_offset_begin()172 offset_t fake_offset_begin() const { return fake_offset_begin_; } 173 units_sorted_by_offset()174 const std::vector<Unit>& units_sorted_by_offset() const { 175 return units_sorted_by_offset_; 176 } 177 units_sorted_by_rva()178 const std::vector<Unit>& units_sorted_by_rva() const { 179 return units_sorted_by_rva_; 180 } 181 182 private: 183 // Helper to find the Unit that contains given |offset| or |rva|. Returns null 184 // if not found. 185 const Unit* OffsetToUnit(offset_t offset) const; 186 const Unit* RvaToUnit(rva_t rva) const; 187 188 // Storage of Units. All offset ranges are non-empty and disjoint. Likewise 189 // for all RVA ranges. 190 std::vector<Unit> units_sorted_by_offset_; 191 std::vector<Unit> units_sorted_by_rva_; 192 193 // Conversion factor to translate between dangling RVAs and fake offsets. 194 offset_t fake_offset_begin_; 195 }; 196 197 } // namespace zucchini 198 199 #endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ 200