xref: /aosp_15_r20/external/executorch/backends/vulkan/runtime/graph/ComputeGraph.h (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #pragma once
10 
11 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
12 
13 #include <optional>
14 #include <stack>
15 
16 #include <executorch/backends/vulkan/runtime/api/api.h>
17 
18 #include <executorch/backends/vulkan/runtime/graph/GraphConfig.h>
19 
20 #include <executorch/backends/vulkan/runtime/graph/containers/SharedObject.h>
21 #include <executorch/backends/vulkan/runtime/graph/containers/Value.h>
22 
23 #include <executorch/backends/vulkan/runtime/graph/ops/ExecuteNode.h>
24 #include <executorch/backends/vulkan/runtime/graph/ops/PrepackNode.h>
25 
26 namespace vkcompute {
27 
28 // Define valid scalar types that the Value class can
29 // accept
30 template <typename T>
31 struct is_valid_scalar_type : std::false_type {};
32 
33 template <>
34 struct is_valid_scalar_type<int64_t> : std::true_type {};
35 
36 template <>
37 struct is_valid_scalar_type<double> : std::true_type {};
38 
39 template <>
40 struct is_valid_scalar_type<bool> : std::true_type {};
41 
42 //
43 // Guarded Pointer Classes
44 //
45 
46 class ComputeGraph;
47 
48 #define DECL_VALUE_PTR_CLASS(classname, ctype)                         \
49   class classname final {                                              \
50     ComputeGraph* const graph_;                                        \
51     ctype* ptr_;                                                       \
52                                                                        \
53    public:                                                             \
54     explicit classname(ComputeGraph* const graph, const ValueRef idx); \
55     ctype* operator->() const;                                         \
56     ctype& operator*() const;                                          \
57     ~classname();                                                      \
58   };
59 
60 DECL_VALUE_PTR_CLASS(vTensorPtr, api::vTensor)
61 DECL_VALUE_PTR_CLASS(TensorRefPtr, TensorRef)
62 DECL_VALUE_PTR_CLASS(StagingPtr, api::StagingBuffer)
63 DECL_VALUE_PTR_CLASS(IntListPtr, std::vector<int64_t>)
64 DECL_VALUE_PTR_CLASS(DoubleListPtr, std::vector<double>)
65 DECL_VALUE_PTR_CLASS(BoolListPtr, std::vector<bool>)
66 DECL_VALUE_PTR_CLASS(ValueListPtr, std::vector<ValueRef>)
67 DECL_VALUE_PTR_CLASS(SymIntPtr, SymInt);
68 
69 #undef DECL_VALUE_PTR_CLASS
70 
71 //
72 // TmpTensor
73 //
74 
75 /*
76  * This struct is used to recycle the memory of temporary tensors that are
77  * created during the execution of a node. Upon construction, this struct will
78  * check the `tmp_shared_object_idxs_` of the provided `ComputeGraph` instance
79  * if any shared objects are available; if not, then a new one is created. A
80  * tensor value is then added to the `ComputeGraph` instance with the requested
81  * specifications. Upon destruction, the shared object index of the temporary
82  * tensor is returned to `tmp_shared_object_idxs_`.
83  *
84  * Note that instances of this struct can be used as if they were `ValueRef` due
85  * to implementation of a custom casting operator.
86  *
87  * This class should only be used to create tensors whose lifetimes exist only
88  * in a well defined scope (i.e. within a function).
89  */
90 struct TmpTensor {
91   ComputeGraph* graph_p;
92   int64_t sobj_idx;
93   ValueRef vref;
94 
95   //
96   // Match all available overloads of `add_tensor`
97   //
98 
99   TmpTensor(
100       ComputeGraph* const graph_ptr,
101       const std::vector<int64_t>& sizes,
102       const vkapi::ScalarType dtype,
103       const utils::StorageType storage_type,
104       const utils::GPUMemoryLayout memory_layout);
105 
106   TmpTensor(
107       ComputeGraph* const graph_ptr,
108       const std::vector<int64_t>& sizes,
109       const vkapi::ScalarType dtype,
110       const utils::StorageType storage_type);
111 
112   TmpTensor(
113       ComputeGraph* const graph_ptr,
114       const std::vector<int64_t>& sizes,
115       const vkapi::ScalarType dtype,
116       const utils::GPUMemoryLayout memory_layout);
117 
118   TmpTensor(
119       ComputeGraph* const graph_ptr,
120       const std::vector<int64_t>& sizes,
121       const vkapi::ScalarType dtype);
122 
123   // No copy construction or assignment
124   TmpTensor(TmpTensor& other) = delete;
125   TmpTensor& operator=(TmpTensor& other) = delete;
126 
127   // No move construction or assignment
128   TmpTensor(TmpTensor&& other) = delete;
129   TmpTensor& operator=(TmpTensor&& other) = delete;
130 
131   // Custom cast to ValueRef
132   operator ValueRef() const {
133     return vref;
134   };
135 
136   ~TmpTensor();
137 
138  private:
139   // Helper function to get first available shared object index or request a new
140   // one to be created.
141   int64_t get_sobj_idx();
142 };
143 
144 //
145 // ComputeGraph
146 //
147 
148 /*
149  * This is the core data structure used to execute Vulkan models in graph mode.
150  * As opposed to ATen/eager mode where a command buffer is encoded every
151  * inference (since ops are executed with the model), in graph mode the ops that
152  * compose the model are intended to be parsed only once, upon which a command
153  * buffer will be encoded. Model inference will then execute the cached command
154  * buffer without needing to encode a new one.
155  */
156 class ComputeGraph final {
157  public:
158   explicit ComputeGraph(GraphConfig config);
159 
160   ComputeGraph(ComputeGraph&&) = default;
161   ComputeGraph& operator=(ComputeGraph&&) = default;
162 
163   ~ComputeGraph();
164 
165  private:
166   GraphConfig config_;
167   vkapi::DescriptorPoolConfig prepack_descriptor_counts_;
168   vkapi::DescriptorPoolConfig execute_descriptor_counts_;
169 
170   std::unique_ptr<api::Context> context_;
171 
172   std::vector<SharedObject> shared_objects_;
173   // This stack is used by `TmpTensor` instances to recycle shared objects
174   // for temporary tensors. See the comments of `TmpTensor` for more details
175   std::stack<int64_t> tmp_shared_object_idxs_;
176 
177   std::vector<Value> values_;
178   std::vector<api::ParamsBuffer> param_ubos_;
179 
180   std::vector<std::unique_ptr<PrepackNode>> prepack_nodes_;
181   std::vector<std::unique_ptr<ExecuteNode>> execute_nodes_;
182 
183   std::vector<IOValueRef> inputs_;
184   std::vector<IOValueRef> outputs_;
185 
186  protected:
187   size_t values_in_use_ = 0;
188 
189  public:
190   //
191   // Accessors
192   //
193 
194   inline api::Context* context() {
195     return context_.get();
196   }
197 
198   inline std::vector<IOValueRef>& inputs() {
199     return inputs_;
200   }
201 
202   inline std::vector<IOValueRef>& outputs() {
203     return outputs_;
204   }
205 
206   inline std::vector<std::unique_ptr<PrepackNode>>& prepack_nodes() {
207     return prepack_nodes_;
208   }
209 
210   inline std::vector<std::unique_ptr<ExecuteNode>>& execute_nodes() {
211     return execute_nodes_;
212   }
213 
214   inline GraphConfig& graphconfig() {
215     return config_;
216   }
217 
218   //
219   // Value Extraction
220   //
221 
222 #define GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(ptr_type, short_name, type_name) \
223   inline ptr_type get_##short_name(const ValueRef idx) {                   \
224     return ptr_type(this, idx);                                            \
225   }                                                                        \
226   inline bool val_is_##short_name(const ValueRef idx) {                    \
227     return values_.at(idx).is##type_name();                                \
228   }
229 
230   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(vTensorPtr, tensor, Tensor)
231   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(TensorRefPtr, tref, TensorRef)
232   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(StagingPtr, staging, Staging)
233   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(IntListPtr, int_list, IntList)
234   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(DoubleListPtr, double_list, DoubleList)
235   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(BoolListPtr, bool_list, BoolList)
236   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(ValueListPtr, value_list, ValueList)
237   GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(SymIntPtr, symint, SymInt);
238 
239 #undef GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS
240 
241 #define GET_AND_CHECK_VAL_AS_TYPE_FNS(ctype, short_name, type_name) \
242   inline ctype get_##short_name(const ValueRef idx) {               \
243     return values_.at(idx).to##type_name();                         \
244   }                                                                 \
245   inline bool val_is_##short_name(const ValueRef idx) {             \
246     return values_.at(idx).is##type_name();                         \
247   }
248 
249   GET_AND_CHECK_VAL_AS_TYPE_FNS(int64_t, int, Int)
250   GET_AND_CHECK_VAL_AS_TYPE_FNS(double, double, Double)
251   GET_AND_CHECK_VAL_AS_TYPE_FNS(bool, bool, Bool)
252   GET_AND_CHECK_VAL_AS_TYPE_FNS(std::string, string, String)
253 
254 #undef GET_AND_CHECK_VAL_AS_TYPE_FNS
255 
256   inline bool val_is_none(const ValueRef idx) {
257     return idx == kDummyValueRef ? true : values_.at(idx).isNone();
258   }
259 
260   inline TypeTag get_val_type(const ValueRef idx) {
261     return values_.at(idx).type();
262   }
263 
264   //
265   // Tensor Properties Accessors
266   //
267 
268   std::vector<int64_t> sizes_of(const ValueRef idx) const;
269 
270   /*
271    * Returns the size of the tensor at `idx` along the specified dimension.
272    * Negative indexing is allowed.
273    */
274   template <typename T>
275   T size_at(const int64_t dim, const ValueRef idx) const {
276     const Value& val = values_.at(idx);
277     if (val.isTensor()) {
278       return static_cast<T>(utils::val_at(dim, val.toConstTensor().sizes()));
279     } else if (val.isTensorRef()) {
280       return static_cast<T>(utils::val_at(dim, val.toConstTensorRef().sizes));
281     }
282     VK_THROW("Could not get sizes of value with type ", val.type());
283   }
284 
285   int64_t dim_of(const ValueRef idx) const;
286 
287   std::vector<int64_t> dim_order_of(const ValueRef idx) const;
288 
289   std::vector<int64_t> strides_of(const ValueRef idx) const;
290 
291   vkapi::ScalarType dtype_of(const ValueRef idx) const;
292 
293   inline const utils::ivec3& logical_limits_of(const ValueRef idx) const {
294     return values_.at(idx).toConstTensor().logical_limits();
295   }
296 
297   inline int32_t numel_of(const ValueRef idx) const {
298     return values_.at(idx).toConstTensor().numel();
299   }
300 
301   inline utils::StorageType storage_type_of(const ValueRef idx) const {
302     return values_.at(idx).toConstTensor().storage_type();
303   }
304 
305   inline bool is_buffer_storage(const ValueRef idx) const {
306     return values_.at(idx).toConstTensor().has_buffer_storage();
307   }
308 
309   inline bool val_is_view_of(const ValueRef maybe_view, const ValueRef base)
310       const {
311     return values_.at(maybe_view)
312         .toConstTensor()
313         .is_view_of(values_.at(base).toConstTensor());
314   }
315 
316   inline utils::GPUMemoryLayout estimate_memory_layout_of(
317       const ValueRef idx) const {
318     return values_.at(idx).toConstTensor().estimate_memory_layout();
319   }
320 
321   inline int32_t hashed_layout_of(const ValueRef idx) const {
322     return values_.at(idx).toConstTensor().hashed_layout();
323   }
324 
325   inline int32_t packed_dim_of(const ValueRef idx) const {
326     return values_.at(idx).toConstTensor().packed_dim();
327   }
328 
329   inline int32_t concat_dim_of(const ValueRef idx) const {
330     return values_.at(idx).toConstTensor().concat_dim();
331   }
332 
333   inline vkapi::BufferBindInfo sizes_ubo(const ValueRef idx) {
334     return values_.at(idx).toTensor().sizes_ubo();
335   }
336 
337   inline vkapi::BufferBindInfo strides_ubo(const ValueRef idx) {
338     return values_.at(idx).toTensor().strides_ubo();
339   }
340 
341   inline vkapi::BufferBindInfo numel_ubo(const ValueRef idx) {
342     return values_.at(idx).toTensor().numel_ubo();
343   }
344 
345   inline bool has_standard_axis_map(const ValueRef idx) {
346     return values_.at(idx).toTensor().has_standard_axis_map();
347   }
348 
349   inline vkapi::BufferBindInfo logical_limits_ubo(const ValueRef idx) {
350     return values_.at(idx).toTensor().logical_limits_ubo();
351   }
352 
353   //
354   // Scalar Value Extraction
355   //
356 
357   template <typename T>
358   T extract_scalar(const ValueRef idx) {
359     Value& value = values_.at(idx);
360     if (value.isInt()) {
361       return static_cast<T>(value.toInt());
362     }
363     if (value.isDouble()) {
364       return static_cast<T>(value.toDouble());
365     }
366     if (value.isBool()) {
367       return static_cast<T>(value.toBool());
368     }
369     VK_THROW("Cannot extract scalar from Value with type ", value.type());
370   }
371 
372   template <typename T>
373   std::optional<T> extract_optional_scalar(const ValueRef idx) {
374     if (val_is_none(idx)) {
375       return ::std::nullopt;
376     } else {
377       return extract_scalar<T>(idx);
378     }
379   }
380 
381   std::string extract_string(const ValueRef idx) {
382     return values_.at(idx).toString();
383   }
384 
385   template <
386       typename T,
387       typename std::enable_if<
388           std::is_integral<T>::value && std::is_signed<T>::value,
389           int>::type = 0>
390   T extract_whcn_dim(const ValueRef idx, const int64_t ndim) {
391     T dim = extract_scalar<T>(idx);
392     // Normalize dim to account for negative indexing
393     dim = (dim % ndim + ndim) % ndim;
394     // Assume original value is NCHW ordering, obtain the WHCN ordering
395     return ndim - 1 - dim;
396   }
397 
398   //
399   // Utility functions
400   //
401 
402   /*
403    * Returns a suggested storage type (i.e. buffer or texture) that can be used
404    * to construct `api::vTensor`s. The storage type is typically determined by
405    * the GPU reported by the Vulkan context, unless a storage type override is
406    * defined in the graph configuration. Some GPU architectures work better with
407    * buffer storage, and others with texture storage. Current only texture
408    * storage is supported.
409    */
410   utils::StorageType suggested_storage_type();
411 
412   /*
413    * Returns a suggested memory layout (i.e. channels, width, or height packed)
414    * that can be used to construct `api::vTensor`s. The memory layout impacts
415    * which dimension will be treated as the vectorized dimension. For texture
416    * storage, elements along the vectorized dimension are packed into texels.
417    * The suggested memory layout is determined based on the sizes of the tensor,
418    * unless a memory layout override is defined in the graph configuration.
419    */
420   utils::GPUMemoryLayout suggested_memory_layout(
421       const std::vector<int64_t>& sizes);
422 
423   //
424   // Graph Building
425   //
426 
427  private:
428   void check_no_active_value_ptrs();
429 
430  public:
431   /*
432    * Add a `api::vTensor` value to the graph with the specified properties.
433    * There are various convenience overloads of this function that may be used
434    * instead.
435    */
436   ValueRef add_tensor(
437       const std::vector<int64_t>& sizes,
438       const vkapi::ScalarType dtype,
439       const utils::StorageType storage_type,
440       const utils::GPUMemoryLayout memory_layout,
441       const int64_t shared_object_idx = -1);
442 
443   /*
444    * Add a `api::vTensor` value to the graph with the specified properties. The
445    * suggested memory layout will be used to construct the `api::vTensor`.
446    */
447   ValueRef add_tensor(
448       const std::vector<int64_t>& sizes,
449       const vkapi::ScalarType dtype,
450       const utils::StorageType storage_type,
451       const int64_t shared_object_idx = -1);
452 
453   /*
454    * Add a `api::vTensor` value to the graph with the specified properties. The
455    * suggested storage type will be used to construct the `api::vTensor`.
456    */
457   ValueRef add_tensor(
458       const std::vector<int64_t>& sizes,
459       const vkapi::ScalarType dtype,
460       const utils::GPUMemoryLayout memory_layout,
461       const int64_t shared_object_idx = -1);
462 
463   /*
464    * Add a `api::vTensor` value to the graph with the specified properties. The
465    * suggested storage type and memory layout will be used to construct the
466    * `api::vTensor`.
467    */
468   ValueRef add_tensor(
469       const std::vector<int64_t>& sizes,
470       const vkapi::ScalarType dtype,
471       const int64_t shared_object_idx = -1);
472 
473   /*
474    * Add a `api::vTensor` value to the graph with the specified image.
475    */
476   ValueRef add_tensor(const vkapi::VulkanImage& image);
477 
478   /*
479    * Add a `api::vTensor` value to the graph with the properties of `vref`.
480    */
481   ValueRef add_tensor_like(
482       const ValueRef vref,
483       const utils::StorageType storage_type,
484       const utils::GPUMemoryLayout memory_layout);
485 
486   /*
487    * Add a `api::vTensor` value to the graph with the properties of `vref`. The
488    * suggested storage type will be used to construct the `api::vTensor`.
489    */
490   ValueRef add_tensor_like(
491       const ValueRef vref,
492       const utils::GPUMemoryLayout memory_layout);
493 
494   /*
495    * Use the copy constructor of `api::vTensor` to create a "view" of the
496    * `vTensor` value at `vref`. See the copy constructor of `api::vTensor` for
497    * more details.
498    */
499   ValueRef add_tensor_view(const ValueRef vref);
500 
501   /*
502    * Use the copy constructor of `api::vTensor` to create a "view" of the
503    * `vTensor` value at `vref` with different sizes and dim order. See the copy
504    * constructor of `api::vTensor` for more details.
505    */
506   ValueRef add_tensor_view(
507       const ValueRef vref,
508       const std::vector<int64_t>& sizes,
509       const std::vector<int64_t>& dim_order,
510       const size_t offset_numel = 0);
511 
512   /*
513    * Add a `TensorRef` value to the graph with the specific properties. A
514    * `TensorRef` is a reference to a `api::vTensor` whose data is stored in an
515    * external CPU buffer.
516    */
517   ValueRef add_tensorref(
518       const std::vector<int64_t>& sizes,
519       const vkapi::ScalarType dtype,
520       const void* const data);
521 
522   /*
523    * Add a staging buffer to the graph. Staging buffers are data buffers that
524    * use memory that is visible to both the CPU and GPU, and therefore is used
525    * as a intermediary when transferring data between the CPU and GPU.
526    */
527   ValueRef add_staging(const vkapi::ScalarType dtype, const size_t numel);
528 
529   ValueRef add_none();
530 
531   template <typename T>
532   typename std::enable_if<is_valid_scalar_type<T>::value, ValueRef>::type
533   add_scalar(T value);
534 
535   template <typename T>
536   typename std::enable_if<is_valid_scalar_type<T>::value, ValueRef>::type
537   add_scalar_list(std::vector<T>&& value);
538 
539   ValueRef add_value_list(std::vector<ValueRef>&& value);
540 
541   ValueRef add_string(std::string&& str);
542 
543   ValueRef add_symint(const int32_t val);
544 
545   ValueRef set_input_tensor(const ValueRef idx, const bool use_staging = true);
546   ValueRef set_output_tensor(const ValueRef idx, const bool use_staging = true);
547 
548   template <typename Block>
549   vkapi::BufferBindInfo create_params_buffer(const Block& data) {
550     param_ubos_.emplace_back(api::ParamsBuffer(context_.get(), data));
551     return vkapi::BufferBindInfo(param_ubos_.back().buffer());
552   }
553 
554   /*
555    * Given a ValueRef, do the following depending on the type of the Value:
556    * - If it is a SymInt, return the BufferBindInfo of the ParamsBuffer object
557    *   backing the SymInt.
558    * - If it is a regular Int, create a new ParamsBuffer using the integer value
559    *   and return the BufferBindInfo of the created ParamsBuffer.
560    */
561   vkapi::BufferBindInfo get_or_create_int_param_buffer(const ValueRef idx);
562 
563   void set_symint(const ValueRef idx, const int32_t val);
564 
565   int32_t read_symint(const ValueRef idx);
566 
567   inline void set_val_as_input(const ValueRef idx) {
568     inputs_.push_back({idx, kDummyValueRef});
569   }
570 
571   inline void set_val_as_output(const ValueRef idx) {
572     outputs_.push_back({idx, kDummyValueRef});
573   }
574 
575   /*
576    * Convenience function to add an input tensor along with its staging buffer
577    */
578   inline IOValueRef add_input_tensor(
579       const std::vector<int64_t>& sizes,
580       const vkapi::ScalarType dtype,
581       const int64_t shared_object_idx = -1) {
582     ValueRef t = add_tensor(sizes, dtype, shared_object_idx);
583     ValueRef staging = set_input_tensor(t);
584     return {t, staging};
585   }
586 
587   /*
588    * Convenience function to add an input tensor with a specific memory layout
589    * along with its staging buffer
590    */
591   inline IOValueRef add_input_tensor(
592       const std::vector<int64_t>& sizes,
593       const vkapi::ScalarType dtype,
594       const utils::GPUMemoryLayout memory_layout,
595       const int64_t shared_object_idx = -1) {
596     ValueRef t = add_tensor(sizes, dtype, memory_layout, shared_object_idx);
597     ValueRef staging = set_input_tensor(t);
598     return {t, staging};
599   }
600 
601   /*
602    * Convenience function to add an input tensor with a specific storage type
603    * along with its staging buffer
604    */
605   inline IOValueRef add_input_tensor(
606       const std::vector<int64_t>& sizes,
607       const vkapi::ScalarType dtype,
608       const utils::StorageType storage_type,
609       const int64_t shared_object_idx = -1) {
610     ValueRef t = add_tensor(sizes, dtype, storage_type, shared_object_idx);
611     ValueRef staging = set_input_tensor(t);
612     return {t, staging};
613   }
614 
615   /*
616    * Add an input tensor with the specified properties along with its staging
617    * buffer.
618    */
619   inline IOValueRef add_input_tensor(
620       const std::vector<int64_t>& sizes,
621       const vkapi::ScalarType dtype,
622       const utils::StorageType storage_type,
623       const utils::GPUMemoryLayout memory_layout,
624       const int64_t shared_object_idx = -1) {
625     ValueRef t = add_tensor(
626         sizes, dtype, storage_type, memory_layout, shared_object_idx);
627     ValueRef staging = set_input_tensor(t);
628     return {t, staging};
629   }
630 
631   SharedObject& get_shared_object(const int64_t idx);
632 
633   //
634   // Graph Preparation
635   //
636 
637   void update_descriptor_counts(
638       const vkapi::ShaderInfo& shader_info,
639       bool execute);
640 
641   void prepare();
642 
643   //
644   // Dispatch Utilities
645   //
646 
647   /*
648    * Create a global workgroup size for a given `api::vTensor` value assuming
649    * that every shader invocation calculates one texel element of the output
650    * tensor.
651    *
652    * For tensors that use texture storage, the image extents of the
653    * `api::vTensor` will be used to set the global workgroup size.
654    *
655    * For tensor that use buffer storage, the number of texels in the texel
656    * buffer will be used to set the x component of the global workgroup size.
657    * All other components will be set to 1 (i.e. {ntexels, 1, 1} will be
658    * returned).
659    */
660   utils::uvec3 create_global_wg_size(const ValueRef idx);
661 
662   /*
663    * Suggest a local workgroup size for a given global workgroup size.
664    *
665    * The local workgroup size will be formed to try and minimize the number of
666    * inactive invocations.
667    *
668    * Currently, the local workgroup size is hard-coded to contain a total of 64
669    * shader invocations. In the future, this value can be configured.
670    */
671   utils::uvec3 create_local_wg_size(const utils::uvec3 global_wg_size);
672 
673   /*
674    * Convenience function to suggest a local workgroup size for a given
675    * `api::vTensor` value, assuming that every shader invocation calculates one
676    * texel element of the output tensor.
677    */
678   utils::uvec3 create_local_wg_size(const ValueRef idx);
679 
680   //
681   // Input/Output
682   //
683 
684   void
685   copy_into_staging(const ValueRef idx, const void* data, const size_t numel);
686   void copy_from_staging(const ValueRef idx, void* data, const size_t numel);
687 
688   //
689   // Graph Prepacking
690   //
691 
692   void encode_prepack();
693   void prepack() const;
694 
695   //
696   // Graph Execution
697   //
698 
699   void encode_execute();
700   void execute() const;
701 
702   //
703   // Dynamic Shape support
704   //
705 
706   void resize_input(const int64_t idx, const std::vector<int64_t>& new_sizes);
707   void propagate_resize();
708 
709   //
710   // Miscellaneous Utilities
711   //
712 
713   inline bool int16_shader_types_enabled() const {
714     return context_->adapter_ptr()->supports_int16_shader_types();
715   }
716 
717   /*
718    * Check whether the GPU supports 8 bit buffers.
719    */
720   inline bool int8_buffers_enabled() const {
721     return context_->adapter_ptr()->has_full_int8_buffers_support();
722   }
723 
724   //
725   // Debug support (implemented in Logging.cpp)
726   //
727 
728   void print_readable();
729 
730   //
731   // Friend classes
732   //
733 
734   friend class vTensorPtr;
735   friend class TensorRefPtr;
736   friend class StagingPtr;
737   friend class IntListPtr;
738   friend class DoubleListPtr;
739   friend class BoolListPtr;
740   friend class ValueListPtr;
741   friend class SymIntPtr;
742 
743   friend struct TmpTensor;
744 };
745 
746 template <typename T>
747 inline typename std::enable_if<is_valid_scalar_type<T>::value, ValueRef>::type
748 ComputeGraph::add_scalar(T value) {
749   ValueRef idx(static_cast<int>(values_.size()));
750   check_no_active_value_ptrs();
751   values_.emplace_back(value);
752   return idx;
753 }
754 
755 template <typename T>
756 inline typename std::enable_if<is_valid_scalar_type<T>::value, ValueRef>::type
757 ComputeGraph::add_scalar_list(std::vector<T>&& value) {
758   ValueRef idx(static_cast<int>(values_.size()));
759   check_no_active_value_ptrs();
760   values_.emplace_back(std::move(value));
761   return idx;
762 }
763 
764 } // namespace vkcompute
765