1 #pragma once 2 3 #include <ATen/Tensor.h> 4 5 #include <utility> 6 7 namespace at::functionalization { 8 9 // See Note [Functionalization Pass In Core] 10 11 // ViewMeta is a class used by the functionalization pass to navigate between 12 // a base tensor and a view tensor. 13 // For example, if I call `b = a.view1(...)` 14 // the functionalization pass will generate and store a ViewMeta on b that looks 15 // like: 16 // 17 // ViewMeta( 18 // [<captures>](const Tensor& base, int64_t mutated_view_idx) { 19 // return base.view1(...); 20 // }, 21 // [<captures>](const at::Tensor& base, const at::Tensor& mutated_view, 22 // int64_t mutated_view_idx) -> at::Tensor { 23 // return at::functionalization::impl::view1_inverse(base, mutated_view, 24 // ...); 25 // } 26 // 27 // The forward_fn lambda describes how to replay view1 on a tensor. 28 // 29 // The reverse_fn lambda describes how, given a tensor that is already a view, 30 // how to get the corresponding base tensor. See Note [Functionalization Pass: 31 // View Inverses] for details. 32 struct ViewMeta { 33 ViewMeta( 34 std::function<Tensor(const Tensor&, int64_t)> forward, 35 std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse, 36 bool has_symbolic_inputs, 37 bool is_multi_output = false, 38 bool is_as_strided = false, 39 int64_t out_idx = 0) forward_fnViewMeta40 : forward_fn(std::move(forward)), 41 reverse_fn(std::move(reverse)), 42 out_index(out_idx), 43 is_multi_output(is_multi_output), 44 is_as_strided(is_as_strided), 45 has_symbolic_inputs(has_symbolic_inputs) {} 46 47 std::function<Tensor(const Tensor&, int64_t)> forward_fn; 48 std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse_fn; 49 // See Note [out_idx in ViewMeta] 50 int64_t out_index; 51 52 // Tells us if this is a multi-output view 53 bool is_multi_output; 54 55 bool is_as_strided; 56 57 // Tells us if this view operation has any symbolic inputs 58 bool has_symbolic_inputs; 59 60 // Returns a copy of the current ViewMeta, if out_idx matches the current 61 // out_index. Otherwise, returns a new ViewMeta with the same forward/reverse 62 // functions, but a new out index. 63 ViewMeta to_out_idx(int64_t out_idx); 64 }; 65 66 // FunctionalStorageImpl is a subclass of StorageImpl used by the 67 // functionalization pass. It has no underlying data (similar to meta storage). 68 // It also knows how to reflect mutations to tensors in the absence of a valid 69 // data pointer. 70 // 71 // A storage represents the state shared by (potentially multiple) views of the 72 // same tensor. For example, in the following code: 73 // 74 // b = a.view1(...) 75 // c = b.view2(...) 76 // b.add_(1) 77 // --> storage.add_update(b, {view1_meta}) 78 // 79 // The call to add_(1) will result in a call to alias.add_update(b, 80 // {view1_meta}), queueing up the mutation from b onto the alias. Later, suppose 81 // c is used in an expression (e.g. you try to print c, or pass it to an 82 // operator). Doing so will involve "syncing" c. First we apply any pending 83 // updates to the alias, and then we regenerate c by replaying its views off of 84 // the updated alias. E.g: 85 // 86 // print(str(c)) 87 // --> c.sync_() 88 // --> alias.apply_updates() // after this, the alias will be updated to 89 // reflect the mutation to b 90 struct TORCH_API FunctionalStorageImpl : public c10::StorageImpl { 91 public: 92 struct Update { 93 // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) 94 const at::Tensor new_val; 95 // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) 96 const std::vector<ViewMeta> view_metas; 97 }; 98 99 explicit FunctionalStorageImpl(const Tensor& value); 100 101 void add_update( 102 const Tensor& updated_val, 103 const std::vector<ViewMeta>& view_metas); 104 bool apply_updates(); baseFunctionalStorageImpl105 const Tensor& base() { 106 return base_; 107 } generationFunctionalStorageImpl108 size_t generation() const { 109 return generation_; 110 } freezeFunctionalStorageImpl111 void freeze() { 112 frozen_ = true; 113 } 114 get_storage_sizeFunctionalStorageImpl115 c10::SymInt get_storage_size(bool before) { 116 if (before) { 117 return original_storage_size_; 118 } else { 119 return curr_storage_size_; 120 } 121 } 122 123 ~FunctionalStorageImpl() override = default; 124 mark_mutationFunctionalStorageImpl125 void mark_mutation() { 126 mutation_counter_++; 127 } mark_mutation_during_no_grad_or_inference_modeFunctionalStorageImpl128 void mark_mutation_during_no_grad_or_inference_mode() { 129 mutation_counter_during_no_grad_or_inference_mode_++; 130 } mark_mutation_hidden_from_autogradFunctionalStorageImpl131 void mark_mutation_hidden_from_autograd() { 132 mutation_counter_hidden_from_autograd_++; 133 } 134 are_all_mutations_under_no_grad_or_inference_modeFunctionalStorageImpl135 bool are_all_mutations_under_no_grad_or_inference_mode() const { 136 auto non_autograd_mutations = 137 mutation_counter_during_no_grad_or_inference_mode_ + 138 mutation_counter_hidden_from_autograd_; 139 // The <= is because both counters will technically be incremented, if we 140 // perform e.g. a triton kernel mutation under no_grad 141 return mutation_counter_ <= non_autograd_mutations; 142 } 143 are_all_mutations_hidden_from_autogradFunctionalStorageImpl144 bool are_all_mutations_hidden_from_autograd() const { 145 // mutations under no_grad / inference_mode are technically not hidden from 146 // autograd - they change the version counter 147 return mutation_counter_ <= mutation_counter_hidden_from_autograd_; 148 } 149 mark_inductor_storage_resizeFunctionalStorageImpl150 void mark_inductor_storage_resize(c10::SymInt new_size) { 151 inductor_storage_resized_ = true; 152 curr_storage_size_ = std::move(new_size); 153 } 154 was_inductor_storage_resizedFunctionalStorageImpl155 bool was_inductor_storage_resized() { 156 return inductor_storage_resized_; 157 } 158 159 private: 160 // NB: base_ should always point to a tensor BELOW the current 161 // functionalization layer. This is mainly to avoid reference cycles. e.g. 162 // given `b = a.view(...)` Both a.storage_ and b.storage_ are a 163 // FunctionStorageImpl containing an Walualias, with contains a Tensor 164 // `base_`. In this case (where a and b are FunctionalTensorWrapper's), base_ 165 // should point not to a, but to a's unwrapped value, a.value_` See Note 166 // [Functionalization: Walualias Removal] for a diagram that shows this 167 // visually. 168 at::Tensor base_; 169 std::vector<Update> updates_; 170 // generation_ gets incremented every time a mutation is queued onto the 171 // alias. It is used to determine if a given tensor is "up to date", or if it 172 // needs to be regenerated from the alias. 173 size_t generation_ = 0; 174 // If frozen, no more mutations are allowed on this storage. Once frozen, a 175 // storage cannot be unfrozen. 176 bool frozen_ = false; 177 178 // These mutation counters are bumped on the storage 179 // whenever a FunctionalTensorWrapper experiences a mutation. 180 // When the mutation is under no_grad, or comes from a triton kernel, we also 181 // bump the corresponding during_no_grad or hidden_from_autograd counters. Why 182 // do we need to detect these two situations separately from "normal" input 183 // mutations? (1) "normal" input mutations can mutate autograd metadata like 184 // .grad_fn, 185 // in which case they need to be replayed outside of the compiled graph 186 // (2) "no_grad" input mutations are generally safe to keep in the graph (and 187 // compile), 188 // but they bump the tensor's VC, so we need to mark_dirty() on the inputs 189 // in torch.compile 190 // (3) mutations that are fully hidden from autograd (e.g. from a triton 191 // kernel) 192 // do not mutate any autograd state, and be fully kept in the graph 193 // When we detect that an input was mutated, we need to be able to tell if: 194 // (1) all of the mutations were from triton kernels 195 // (2) all of the mutations were under no_grad 196 uint64_t mutation_counter_during_no_grad_or_inference_mode_ = 0; 197 uint64_t mutation_counter_ = 0; 198 uint64_t mutation_counter_hidden_from_autograd_ = 0; 199 200 // Used to tell if: 201 // (1) There were any storage resizes on a graph input 202 // (2) The original/curr storage size tell us if these resizes result in a nop 203 bool inductor_storage_resized_ = false; 204 c10::SymInt original_storage_size_; 205 c10::SymInt curr_storage_size_; 206 }; 207 208 } // namespace at::functionalization 209