xref: /aosp_15_r20/external/pytorch/aten/src/ATen/FunctionalStorageImpl.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <ATen/Tensor.h>
4 
5 #include <utility>
6 
7 namespace at::functionalization {
8 
9 // See Note [Functionalization Pass In Core]
10 
11 // ViewMeta is a class used by the functionalization pass to navigate between
12 // a base tensor and a view tensor.
13 // For example, if I call `b = a.view1(...)`
14 // the functionalization pass will generate and store a ViewMeta on b that looks
15 // like:
16 //
17 // ViewMeta(
18 //   [<captures>](const Tensor& base, int64_t mutated_view_idx) {
19 //     return base.view1(...);
20 //   },
21 //   [<captures>](const at::Tensor& base, const at::Tensor& mutated_view,
22 //   int64_t mutated_view_idx) -> at::Tensor {
23 //     return at::functionalization::impl::view1_inverse(base, mutated_view,
24 //     ...);
25 //   }
26 //
27 // The forward_fn lambda describes how to replay view1 on a tensor.
28 //
29 // The reverse_fn lambda describes how, given a tensor that is already a view,
30 // how to get the corresponding base tensor. See Note [Functionalization Pass:
31 // View Inverses] for details.
32 struct ViewMeta {
33   ViewMeta(
34       std::function<Tensor(const Tensor&, int64_t)> forward,
35       std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse,
36       bool has_symbolic_inputs,
37       bool is_multi_output = false,
38       bool is_as_strided = false,
39       int64_t out_idx = 0)
forward_fnViewMeta40       : forward_fn(std::move(forward)),
41         reverse_fn(std::move(reverse)),
42         out_index(out_idx),
43         is_multi_output(is_multi_output),
44         is_as_strided(is_as_strided),
45         has_symbolic_inputs(has_symbolic_inputs) {}
46 
47   std::function<Tensor(const Tensor&, int64_t)> forward_fn;
48   std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse_fn;
49   // See Note [out_idx in ViewMeta]
50   int64_t out_index;
51 
52   // Tells us if this is a multi-output view
53   bool is_multi_output;
54 
55   bool is_as_strided;
56 
57   // Tells us if this view operation has any symbolic inputs
58   bool has_symbolic_inputs;
59 
60   // Returns a copy of the current ViewMeta, if out_idx matches the current
61   // out_index. Otherwise, returns a new ViewMeta with the same forward/reverse
62   // functions, but a new out index.
63   ViewMeta to_out_idx(int64_t out_idx);
64 };
65 
66 // FunctionalStorageImpl is a subclass of StorageImpl used by the
67 // functionalization pass. It has no underlying data (similar to meta storage).
68 // It also knows how to reflect mutations to tensors in the absence of a valid
69 // data pointer.
70 //
71 // A storage represents the state shared by (potentially multiple) views of the
72 // same tensor. For example, in the following code:
73 //
74 // b = a.view1(...)
75 // c = b.view2(...)
76 // b.add_(1)
77 // --> storage.add_update(b, {view1_meta})
78 //
79 // The call to add_(1) will result in a call to alias.add_update(b,
80 // {view1_meta}), queueing up the mutation from b onto the alias. Later, suppose
81 // c is used in an expression (e.g. you try to print c, or pass it to an
82 // operator). Doing so will involve "syncing" c. First we apply any pending
83 // updates to the alias, and then we regenerate c by replaying its views off of
84 // the updated alias. E.g:
85 //
86 // print(str(c))
87 // --> c.sync_()
88 //     --> alias.apply_updates() // after this, the alias will be updated to
89 //     reflect the mutation to b
90 struct TORCH_API FunctionalStorageImpl : public c10::StorageImpl {
91  public:
92   struct Update {
93     // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
94     const at::Tensor new_val;
95     // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
96     const std::vector<ViewMeta> view_metas;
97   };
98 
99   explicit FunctionalStorageImpl(const Tensor& value);
100 
101   void add_update(
102       const Tensor& updated_val,
103       const std::vector<ViewMeta>& view_metas);
104   bool apply_updates();
baseFunctionalStorageImpl105   const Tensor& base() {
106     return base_;
107   }
generationFunctionalStorageImpl108   size_t generation() const {
109     return generation_;
110   }
freezeFunctionalStorageImpl111   void freeze() {
112     frozen_ = true;
113   }
114 
get_storage_sizeFunctionalStorageImpl115   c10::SymInt get_storage_size(bool before) {
116     if (before) {
117       return original_storage_size_;
118     } else {
119       return curr_storage_size_;
120     }
121   }
122 
123   ~FunctionalStorageImpl() override = default;
124 
mark_mutationFunctionalStorageImpl125   void mark_mutation() {
126     mutation_counter_++;
127   }
mark_mutation_during_no_grad_or_inference_modeFunctionalStorageImpl128   void mark_mutation_during_no_grad_or_inference_mode() {
129     mutation_counter_during_no_grad_or_inference_mode_++;
130   }
mark_mutation_hidden_from_autogradFunctionalStorageImpl131   void mark_mutation_hidden_from_autograd() {
132     mutation_counter_hidden_from_autograd_++;
133   }
134 
are_all_mutations_under_no_grad_or_inference_modeFunctionalStorageImpl135   bool are_all_mutations_under_no_grad_or_inference_mode() const {
136     auto non_autograd_mutations =
137         mutation_counter_during_no_grad_or_inference_mode_ +
138         mutation_counter_hidden_from_autograd_;
139     // The <= is because both counters will technically be incremented, if we
140     // perform e.g. a triton kernel mutation under no_grad
141     return mutation_counter_ <= non_autograd_mutations;
142   }
143 
are_all_mutations_hidden_from_autogradFunctionalStorageImpl144   bool are_all_mutations_hidden_from_autograd() const {
145     // mutations under no_grad / inference_mode are technically not hidden from
146     // autograd - they change the version counter
147     return mutation_counter_ <= mutation_counter_hidden_from_autograd_;
148   }
149 
mark_inductor_storage_resizeFunctionalStorageImpl150   void mark_inductor_storage_resize(c10::SymInt new_size) {
151     inductor_storage_resized_ = true;
152     curr_storage_size_ = std::move(new_size);
153   }
154 
was_inductor_storage_resizedFunctionalStorageImpl155   bool was_inductor_storage_resized() {
156     return inductor_storage_resized_;
157   }
158 
159  private:
160   // NB: base_ should always point to a tensor BELOW the current
161   // functionalization layer. This is mainly to avoid reference cycles. e.g.
162   // given `b = a.view(...)` Both a.storage_ and b.storage_ are a
163   // FunctionStorageImpl containing an Walualias, with contains a Tensor
164   // `base_`. In this case (where a and b are FunctionalTensorWrapper's), base_
165   // should point not to a, but to a's unwrapped value, a.value_` See Note
166   // [Functionalization: Walualias Removal] for a diagram that shows this
167   // visually.
168   at::Tensor base_;
169   std::vector<Update> updates_;
170   // generation_ gets incremented every time a mutation is queued onto the
171   // alias. It is used to determine if a given tensor is "up to date", or if it
172   // needs to be regenerated from the alias.
173   size_t generation_ = 0;
174   // If frozen, no more mutations are allowed on this storage.  Once frozen, a
175   // storage cannot be unfrozen.
176   bool frozen_ = false;
177 
178   // These mutation counters are bumped on the storage
179   // whenever a FunctionalTensorWrapper experiences a mutation.
180   // When the mutation is under no_grad, or comes from a triton kernel, we also
181   // bump the corresponding during_no_grad or hidden_from_autograd counters. Why
182   // do we need to detect these two situations separately from "normal" input
183   // mutations? (1) "normal" input mutations can mutate autograd metadata like
184   // .grad_fn,
185   //     in which case they need to be replayed outside of the compiled graph
186   // (2) "no_grad" input mutations are generally safe to keep in the graph (and
187   // compile),
188   //     but they bump the tensor's VC, so we need to mark_dirty() on the inputs
189   //     in torch.compile
190   // (3) mutations that are fully hidden from autograd (e.g. from a triton
191   // kernel)
192   //     do not mutate any autograd state, and be fully kept in the graph
193   // When we detect that an input was mutated, we need to be able to tell if:
194   // (1) all of the mutations were from triton kernels
195   // (2) all of the mutations were under no_grad
196   uint64_t mutation_counter_during_no_grad_or_inference_mode_ = 0;
197   uint64_t mutation_counter_ = 0;
198   uint64_t mutation_counter_hidden_from_autograd_ = 0;
199 
200   // Used to tell if:
201   // (1) There were any storage resizes on a graph input
202   // (2) The original/curr storage size tell us if these resizes result in a nop
203   bool inductor_storage_resized_ = false;
204   c10::SymInt original_storage_size_;
205   c10::SymInt curr_storage_size_;
206 };
207 
208 } // namespace at::functionalization
209