1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 9 #pragma once 10 11 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName 12 13 #include <optional> 14 #include <stack> 15 16 #include <executorch/backends/vulkan/runtime/api/api.h> 17 18 #include <executorch/backends/vulkan/runtime/graph/GraphConfig.h> 19 20 #include <executorch/backends/vulkan/runtime/graph/containers/SharedObject.h> 21 #include <executorch/backends/vulkan/runtime/graph/containers/Value.h> 22 23 #include <executorch/backends/vulkan/runtime/graph/ops/ExecuteNode.h> 24 #include <executorch/backends/vulkan/runtime/graph/ops/PrepackNode.h> 25 26 namespace vkcompute { 27 28 // Define valid scalar types that the Value class can 29 // accept 30 template <typename T> 31 struct is_valid_scalar_type : std::false_type {}; 32 33 template <> 34 struct is_valid_scalar_type<int64_t> : std::true_type {}; 35 36 template <> 37 struct is_valid_scalar_type<double> : std::true_type {}; 38 39 template <> 40 struct is_valid_scalar_type<bool> : std::true_type {}; 41 42 // 43 // Guarded Pointer Classes 44 // 45 46 class ComputeGraph; 47 48 #define DECL_VALUE_PTR_CLASS(classname, ctype) \ 49 class classname final { \ 50 ComputeGraph* const graph_; \ 51 ctype* ptr_; \ 52 \ 53 public: \ 54 explicit classname(ComputeGraph* const graph, const ValueRef idx); \ 55 ctype* operator->() const; \ 56 ctype& operator*() const; \ 57 ~classname(); \ 58 }; 59 60 DECL_VALUE_PTR_CLASS(vTensorPtr, api::vTensor) 61 DECL_VALUE_PTR_CLASS(TensorRefPtr, TensorRef) 62 DECL_VALUE_PTR_CLASS(StagingPtr, api::StagingBuffer) 63 DECL_VALUE_PTR_CLASS(IntListPtr, std::vector<int64_t>) 64 DECL_VALUE_PTR_CLASS(DoubleListPtr, std::vector<double>) 65 DECL_VALUE_PTR_CLASS(BoolListPtr, std::vector<bool>) 66 DECL_VALUE_PTR_CLASS(ValueListPtr, std::vector<ValueRef>) 67 DECL_VALUE_PTR_CLASS(SymIntPtr, SymInt); 68 69 #undef DECL_VALUE_PTR_CLASS 70 71 // 72 // TmpTensor 73 // 74 75 /* 76 * This struct is used to recycle the memory of temporary tensors that are 77 * created during the execution of a node. Upon construction, this struct will 78 * check the `tmp_shared_object_idxs_` of the provided `ComputeGraph` instance 79 * if any shared objects are available; if not, then a new one is created. A 80 * tensor value is then added to the `ComputeGraph` instance with the requested 81 * specifications. Upon destruction, the shared object index of the temporary 82 * tensor is returned to `tmp_shared_object_idxs_`. 83 * 84 * Note that instances of this struct can be used as if they were `ValueRef` due 85 * to implementation of a custom casting operator. 86 * 87 * This class should only be used to create tensors whose lifetimes exist only 88 * in a well defined scope (i.e. within a function). 89 */ 90 struct TmpTensor { 91 ComputeGraph* graph_p; 92 int64_t sobj_idx; 93 ValueRef vref; 94 95 // 96 // Match all available overloads of `add_tensor` 97 // 98 99 TmpTensor( 100 ComputeGraph* const graph_ptr, 101 const std::vector<int64_t>& sizes, 102 const vkapi::ScalarType dtype, 103 const utils::StorageType storage_type, 104 const utils::GPUMemoryLayout memory_layout); 105 106 TmpTensor( 107 ComputeGraph* const graph_ptr, 108 const std::vector<int64_t>& sizes, 109 const vkapi::ScalarType dtype, 110 const utils::StorageType storage_type); 111 112 TmpTensor( 113 ComputeGraph* const graph_ptr, 114 const std::vector<int64_t>& sizes, 115 const vkapi::ScalarType dtype, 116 const utils::GPUMemoryLayout memory_layout); 117 118 TmpTensor( 119 ComputeGraph* const graph_ptr, 120 const std::vector<int64_t>& sizes, 121 const vkapi::ScalarType dtype); 122 123 // No copy construction or assignment 124 TmpTensor(TmpTensor& other) = delete; 125 TmpTensor& operator=(TmpTensor& other) = delete; 126 127 // No move construction or assignment 128 TmpTensor(TmpTensor&& other) = delete; 129 TmpTensor& operator=(TmpTensor&& other) = delete; 130 131 // Custom cast to ValueRef 132 operator ValueRef() const { 133 return vref; 134 }; 135 136 ~TmpTensor(); 137 138 private: 139 // Helper function to get first available shared object index or request a new 140 // one to be created. 141 int64_t get_sobj_idx(); 142 }; 143 144 // 145 // ComputeGraph 146 // 147 148 /* 149 * This is the core data structure used to execute Vulkan models in graph mode. 150 * As opposed to ATen/eager mode where a command buffer is encoded every 151 * inference (since ops are executed with the model), in graph mode the ops that 152 * compose the model are intended to be parsed only once, upon which a command 153 * buffer will be encoded. Model inference will then execute the cached command 154 * buffer without needing to encode a new one. 155 */ 156 class ComputeGraph final { 157 public: 158 explicit ComputeGraph(GraphConfig config); 159 160 ComputeGraph(ComputeGraph&&) = default; 161 ComputeGraph& operator=(ComputeGraph&&) = default; 162 163 ~ComputeGraph(); 164 165 private: 166 GraphConfig config_; 167 vkapi::DescriptorPoolConfig prepack_descriptor_counts_; 168 vkapi::DescriptorPoolConfig execute_descriptor_counts_; 169 170 std::unique_ptr<api::Context> context_; 171 172 std::vector<SharedObject> shared_objects_; 173 // This stack is used by `TmpTensor` instances to recycle shared objects 174 // for temporary tensors. See the comments of `TmpTensor` for more details 175 std::stack<int64_t> tmp_shared_object_idxs_; 176 177 std::vector<Value> values_; 178 std::vector<api::ParamsBuffer> param_ubos_; 179 180 std::vector<std::unique_ptr<PrepackNode>> prepack_nodes_; 181 std::vector<std::unique_ptr<ExecuteNode>> execute_nodes_; 182 183 std::vector<IOValueRef> inputs_; 184 std::vector<IOValueRef> outputs_; 185 186 protected: 187 size_t values_in_use_ = 0; 188 189 public: 190 // 191 // Accessors 192 // 193 194 inline api::Context* context() { 195 return context_.get(); 196 } 197 198 inline std::vector<IOValueRef>& inputs() { 199 return inputs_; 200 } 201 202 inline std::vector<IOValueRef>& outputs() { 203 return outputs_; 204 } 205 206 inline std::vector<std::unique_ptr<PrepackNode>>& prepack_nodes() { 207 return prepack_nodes_; 208 } 209 210 inline std::vector<std::unique_ptr<ExecuteNode>>& execute_nodes() { 211 return execute_nodes_; 212 } 213 214 inline GraphConfig& graphconfig() { 215 return config_; 216 } 217 218 // 219 // Value Extraction 220 // 221 222 #define GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(ptr_type, short_name, type_name) \ 223 inline ptr_type get_##short_name(const ValueRef idx) { \ 224 return ptr_type(this, idx); \ 225 } \ 226 inline bool val_is_##short_name(const ValueRef idx) { \ 227 return values_.at(idx).is##type_name(); \ 228 } 229 230 GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(vTensorPtr, tensor, Tensor) 231 GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(TensorRefPtr, tref, TensorRef) 232 GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(StagingPtr, staging, Staging) 233 GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(IntListPtr, int_list, IntList) 234 GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(DoubleListPtr, double_list, DoubleList) 235 GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(BoolListPtr, bool_list, BoolList) 236 GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(ValueListPtr, value_list, ValueList) 237 GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(SymIntPtr, symint, SymInt); 238 239 #undef GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS 240 241 #define GET_AND_CHECK_VAL_AS_TYPE_FNS(ctype, short_name, type_name) \ 242 inline ctype get_##short_name(const ValueRef idx) { \ 243 return values_.at(idx).to##type_name(); \ 244 } \ 245 inline bool val_is_##short_name(const ValueRef idx) { \ 246 return values_.at(idx).is##type_name(); \ 247 } 248 249 GET_AND_CHECK_VAL_AS_TYPE_FNS(int64_t, int, Int) 250 GET_AND_CHECK_VAL_AS_TYPE_FNS(double, double, Double) 251 GET_AND_CHECK_VAL_AS_TYPE_FNS(bool, bool, Bool) 252 GET_AND_CHECK_VAL_AS_TYPE_FNS(std::string, string, String) 253 254 #undef GET_AND_CHECK_VAL_AS_TYPE_FNS 255 256 inline bool val_is_none(const ValueRef idx) { 257 return idx == kDummyValueRef ? true : values_.at(idx).isNone(); 258 } 259 260 inline TypeTag get_val_type(const ValueRef idx) { 261 return values_.at(idx).type(); 262 } 263 264 // 265 // Tensor Properties Accessors 266 // 267 268 std::vector<int64_t> sizes_of(const ValueRef idx) const; 269 270 /* 271 * Returns the size of the tensor at `idx` along the specified dimension. 272 * Negative indexing is allowed. 273 */ 274 template <typename T> 275 T size_at(const int64_t dim, const ValueRef idx) const { 276 const Value& val = values_.at(idx); 277 if (val.isTensor()) { 278 return static_cast<T>(utils::val_at(dim, val.toConstTensor().sizes())); 279 } else if (val.isTensorRef()) { 280 return static_cast<T>(utils::val_at(dim, val.toConstTensorRef().sizes)); 281 } 282 VK_THROW("Could not get sizes of value with type ", val.type()); 283 } 284 285 int64_t dim_of(const ValueRef idx) const; 286 287 std::vector<int64_t> dim_order_of(const ValueRef idx) const; 288 289 std::vector<int64_t> strides_of(const ValueRef idx) const; 290 291 vkapi::ScalarType dtype_of(const ValueRef idx) const; 292 293 inline const utils::ivec3& logical_limits_of(const ValueRef idx) const { 294 return values_.at(idx).toConstTensor().logical_limits(); 295 } 296 297 inline int32_t numel_of(const ValueRef idx) const { 298 return values_.at(idx).toConstTensor().numel(); 299 } 300 301 inline utils::StorageType storage_type_of(const ValueRef idx) const { 302 return values_.at(idx).toConstTensor().storage_type(); 303 } 304 305 inline bool is_buffer_storage(const ValueRef idx) const { 306 return values_.at(idx).toConstTensor().has_buffer_storage(); 307 } 308 309 inline bool val_is_view_of(const ValueRef maybe_view, const ValueRef base) 310 const { 311 return values_.at(maybe_view) 312 .toConstTensor() 313 .is_view_of(values_.at(base).toConstTensor()); 314 } 315 316 inline utils::GPUMemoryLayout estimate_memory_layout_of( 317 const ValueRef idx) const { 318 return values_.at(idx).toConstTensor().estimate_memory_layout(); 319 } 320 321 inline int32_t hashed_layout_of(const ValueRef idx) const { 322 return values_.at(idx).toConstTensor().hashed_layout(); 323 } 324 325 inline int32_t packed_dim_of(const ValueRef idx) const { 326 return values_.at(idx).toConstTensor().packed_dim(); 327 } 328 329 inline int32_t concat_dim_of(const ValueRef idx) const { 330 return values_.at(idx).toConstTensor().concat_dim(); 331 } 332 333 inline vkapi::BufferBindInfo sizes_ubo(const ValueRef idx) { 334 return values_.at(idx).toTensor().sizes_ubo(); 335 } 336 337 inline vkapi::BufferBindInfo strides_ubo(const ValueRef idx) { 338 return values_.at(idx).toTensor().strides_ubo(); 339 } 340 341 inline vkapi::BufferBindInfo numel_ubo(const ValueRef idx) { 342 return values_.at(idx).toTensor().numel_ubo(); 343 } 344 345 inline bool has_standard_axis_map(const ValueRef idx) { 346 return values_.at(idx).toTensor().has_standard_axis_map(); 347 } 348 349 inline vkapi::BufferBindInfo logical_limits_ubo(const ValueRef idx) { 350 return values_.at(idx).toTensor().logical_limits_ubo(); 351 } 352 353 // 354 // Scalar Value Extraction 355 // 356 357 template <typename T> 358 T extract_scalar(const ValueRef idx) { 359 Value& value = values_.at(idx); 360 if (value.isInt()) { 361 return static_cast<T>(value.toInt()); 362 } 363 if (value.isDouble()) { 364 return static_cast<T>(value.toDouble()); 365 } 366 if (value.isBool()) { 367 return static_cast<T>(value.toBool()); 368 } 369 VK_THROW("Cannot extract scalar from Value with type ", value.type()); 370 } 371 372 template <typename T> 373 std::optional<T> extract_optional_scalar(const ValueRef idx) { 374 if (val_is_none(idx)) { 375 return ::std::nullopt; 376 } else { 377 return extract_scalar<T>(idx); 378 } 379 } 380 381 std::string extract_string(const ValueRef idx) { 382 return values_.at(idx).toString(); 383 } 384 385 template < 386 typename T, 387 typename std::enable_if< 388 std::is_integral<T>::value && std::is_signed<T>::value, 389 int>::type = 0> 390 T extract_whcn_dim(const ValueRef idx, const int64_t ndim) { 391 T dim = extract_scalar<T>(idx); 392 // Normalize dim to account for negative indexing 393 dim = (dim % ndim + ndim) % ndim; 394 // Assume original value is NCHW ordering, obtain the WHCN ordering 395 return ndim - 1 - dim; 396 } 397 398 // 399 // Utility functions 400 // 401 402 /* 403 * Returns a suggested storage type (i.e. buffer or texture) that can be used 404 * to construct `api::vTensor`s. The storage type is typically determined by 405 * the GPU reported by the Vulkan context, unless a storage type override is 406 * defined in the graph configuration. Some GPU architectures work better with 407 * buffer storage, and others with texture storage. Current only texture 408 * storage is supported. 409 */ 410 utils::StorageType suggested_storage_type(); 411 412 /* 413 * Returns a suggested memory layout (i.e. channels, width, or height packed) 414 * that can be used to construct `api::vTensor`s. The memory layout impacts 415 * which dimension will be treated as the vectorized dimension. For texture 416 * storage, elements along the vectorized dimension are packed into texels. 417 * The suggested memory layout is determined based on the sizes of the tensor, 418 * unless a memory layout override is defined in the graph configuration. 419 */ 420 utils::GPUMemoryLayout suggested_memory_layout( 421 const std::vector<int64_t>& sizes); 422 423 // 424 // Graph Building 425 // 426 427 private: 428 void check_no_active_value_ptrs(); 429 430 public: 431 /* 432 * Add a `api::vTensor` value to the graph with the specified properties. 433 * There are various convenience overloads of this function that may be used 434 * instead. 435 */ 436 ValueRef add_tensor( 437 const std::vector<int64_t>& sizes, 438 const vkapi::ScalarType dtype, 439 const utils::StorageType storage_type, 440 const utils::GPUMemoryLayout memory_layout, 441 const int64_t shared_object_idx = -1); 442 443 /* 444 * Add a `api::vTensor` value to the graph with the specified properties. The 445 * suggested memory layout will be used to construct the `api::vTensor`. 446 */ 447 ValueRef add_tensor( 448 const std::vector<int64_t>& sizes, 449 const vkapi::ScalarType dtype, 450 const utils::StorageType storage_type, 451 const int64_t shared_object_idx = -1); 452 453 /* 454 * Add a `api::vTensor` value to the graph with the specified properties. The 455 * suggested storage type will be used to construct the `api::vTensor`. 456 */ 457 ValueRef add_tensor( 458 const std::vector<int64_t>& sizes, 459 const vkapi::ScalarType dtype, 460 const utils::GPUMemoryLayout memory_layout, 461 const int64_t shared_object_idx = -1); 462 463 /* 464 * Add a `api::vTensor` value to the graph with the specified properties. The 465 * suggested storage type and memory layout will be used to construct the 466 * `api::vTensor`. 467 */ 468 ValueRef add_tensor( 469 const std::vector<int64_t>& sizes, 470 const vkapi::ScalarType dtype, 471 const int64_t shared_object_idx = -1); 472 473 /* 474 * Add a `api::vTensor` value to the graph with the specified image. 475 */ 476 ValueRef add_tensor(const vkapi::VulkanImage& image); 477 478 /* 479 * Add a `api::vTensor` value to the graph with the properties of `vref`. 480 */ 481 ValueRef add_tensor_like( 482 const ValueRef vref, 483 const utils::StorageType storage_type, 484 const utils::GPUMemoryLayout memory_layout); 485 486 /* 487 * Add a `api::vTensor` value to the graph with the properties of `vref`. The 488 * suggested storage type will be used to construct the `api::vTensor`. 489 */ 490 ValueRef add_tensor_like( 491 const ValueRef vref, 492 const utils::GPUMemoryLayout memory_layout); 493 494 /* 495 * Use the copy constructor of `api::vTensor` to create a "view" of the 496 * `vTensor` value at `vref`. See the copy constructor of `api::vTensor` for 497 * more details. 498 */ 499 ValueRef add_tensor_view(const ValueRef vref); 500 501 /* 502 * Use the copy constructor of `api::vTensor` to create a "view" of the 503 * `vTensor` value at `vref` with different sizes and dim order. See the copy 504 * constructor of `api::vTensor` for more details. 505 */ 506 ValueRef add_tensor_view( 507 const ValueRef vref, 508 const std::vector<int64_t>& sizes, 509 const std::vector<int64_t>& dim_order, 510 const size_t offset_numel = 0); 511 512 /* 513 * Add a `TensorRef` value to the graph with the specific properties. A 514 * `TensorRef` is a reference to a `api::vTensor` whose data is stored in an 515 * external CPU buffer. 516 */ 517 ValueRef add_tensorref( 518 const std::vector<int64_t>& sizes, 519 const vkapi::ScalarType dtype, 520 const void* const data); 521 522 /* 523 * Add a staging buffer to the graph. Staging buffers are data buffers that 524 * use memory that is visible to both the CPU and GPU, and therefore is used 525 * as a intermediary when transferring data between the CPU and GPU. 526 */ 527 ValueRef add_staging(const vkapi::ScalarType dtype, const size_t numel); 528 529 ValueRef add_none(); 530 531 template <typename T> 532 typename std::enable_if<is_valid_scalar_type<T>::value, ValueRef>::type 533 add_scalar(T value); 534 535 template <typename T> 536 typename std::enable_if<is_valid_scalar_type<T>::value, ValueRef>::type 537 add_scalar_list(std::vector<T>&& value); 538 539 ValueRef add_value_list(std::vector<ValueRef>&& value); 540 541 ValueRef add_string(std::string&& str); 542 543 ValueRef add_symint(const int32_t val); 544 545 ValueRef set_input_tensor(const ValueRef idx, const bool use_staging = true); 546 ValueRef set_output_tensor(const ValueRef idx, const bool use_staging = true); 547 548 template <typename Block> 549 vkapi::BufferBindInfo create_params_buffer(const Block& data) { 550 param_ubos_.emplace_back(api::ParamsBuffer(context_.get(), data)); 551 return vkapi::BufferBindInfo(param_ubos_.back().buffer()); 552 } 553 554 /* 555 * Given a ValueRef, do the following depending on the type of the Value: 556 * - If it is a SymInt, return the BufferBindInfo of the ParamsBuffer object 557 * backing the SymInt. 558 * - If it is a regular Int, create a new ParamsBuffer using the integer value 559 * and return the BufferBindInfo of the created ParamsBuffer. 560 */ 561 vkapi::BufferBindInfo get_or_create_int_param_buffer(const ValueRef idx); 562 563 void set_symint(const ValueRef idx, const int32_t val); 564 565 int32_t read_symint(const ValueRef idx); 566 567 inline void set_val_as_input(const ValueRef idx) { 568 inputs_.push_back({idx, kDummyValueRef}); 569 } 570 571 inline void set_val_as_output(const ValueRef idx) { 572 outputs_.push_back({idx, kDummyValueRef}); 573 } 574 575 /* 576 * Convenience function to add an input tensor along with its staging buffer 577 */ 578 inline IOValueRef add_input_tensor( 579 const std::vector<int64_t>& sizes, 580 const vkapi::ScalarType dtype, 581 const int64_t shared_object_idx = -1) { 582 ValueRef t = add_tensor(sizes, dtype, shared_object_idx); 583 ValueRef staging = set_input_tensor(t); 584 return {t, staging}; 585 } 586 587 /* 588 * Convenience function to add an input tensor with a specific memory layout 589 * along with its staging buffer 590 */ 591 inline IOValueRef add_input_tensor( 592 const std::vector<int64_t>& sizes, 593 const vkapi::ScalarType dtype, 594 const utils::GPUMemoryLayout memory_layout, 595 const int64_t shared_object_idx = -1) { 596 ValueRef t = add_tensor(sizes, dtype, memory_layout, shared_object_idx); 597 ValueRef staging = set_input_tensor(t); 598 return {t, staging}; 599 } 600 601 /* 602 * Convenience function to add an input tensor with a specific storage type 603 * along with its staging buffer 604 */ 605 inline IOValueRef add_input_tensor( 606 const std::vector<int64_t>& sizes, 607 const vkapi::ScalarType dtype, 608 const utils::StorageType storage_type, 609 const int64_t shared_object_idx = -1) { 610 ValueRef t = add_tensor(sizes, dtype, storage_type, shared_object_idx); 611 ValueRef staging = set_input_tensor(t); 612 return {t, staging}; 613 } 614 615 /* 616 * Add an input tensor with the specified properties along with its staging 617 * buffer. 618 */ 619 inline IOValueRef add_input_tensor( 620 const std::vector<int64_t>& sizes, 621 const vkapi::ScalarType dtype, 622 const utils::StorageType storage_type, 623 const utils::GPUMemoryLayout memory_layout, 624 const int64_t shared_object_idx = -1) { 625 ValueRef t = add_tensor( 626 sizes, dtype, storage_type, memory_layout, shared_object_idx); 627 ValueRef staging = set_input_tensor(t); 628 return {t, staging}; 629 } 630 631 SharedObject& get_shared_object(const int64_t idx); 632 633 // 634 // Graph Preparation 635 // 636 637 void update_descriptor_counts( 638 const vkapi::ShaderInfo& shader_info, 639 bool execute); 640 641 void prepare(); 642 643 // 644 // Dispatch Utilities 645 // 646 647 /* 648 * Create a global workgroup size for a given `api::vTensor` value assuming 649 * that every shader invocation calculates one texel element of the output 650 * tensor. 651 * 652 * For tensors that use texture storage, the image extents of the 653 * `api::vTensor` will be used to set the global workgroup size. 654 * 655 * For tensor that use buffer storage, the number of texels in the texel 656 * buffer will be used to set the x component of the global workgroup size. 657 * All other components will be set to 1 (i.e. {ntexels, 1, 1} will be 658 * returned). 659 */ 660 utils::uvec3 create_global_wg_size(const ValueRef idx); 661 662 /* 663 * Suggest a local workgroup size for a given global workgroup size. 664 * 665 * The local workgroup size will be formed to try and minimize the number of 666 * inactive invocations. 667 * 668 * Currently, the local workgroup size is hard-coded to contain a total of 64 669 * shader invocations. In the future, this value can be configured. 670 */ 671 utils::uvec3 create_local_wg_size(const utils::uvec3 global_wg_size); 672 673 /* 674 * Convenience function to suggest a local workgroup size for a given 675 * `api::vTensor` value, assuming that every shader invocation calculates one 676 * texel element of the output tensor. 677 */ 678 utils::uvec3 create_local_wg_size(const ValueRef idx); 679 680 // 681 // Input/Output 682 // 683 684 void 685 copy_into_staging(const ValueRef idx, const void* data, const size_t numel); 686 void copy_from_staging(const ValueRef idx, void* data, const size_t numel); 687 688 // 689 // Graph Prepacking 690 // 691 692 void encode_prepack(); 693 void prepack() const; 694 695 // 696 // Graph Execution 697 // 698 699 void encode_execute(); 700 void execute() const; 701 702 // 703 // Dynamic Shape support 704 // 705 706 void resize_input(const int64_t idx, const std::vector<int64_t>& new_sizes); 707 void propagate_resize(); 708 709 // 710 // Miscellaneous Utilities 711 // 712 713 inline bool int16_shader_types_enabled() const { 714 return context_->adapter_ptr()->supports_int16_shader_types(); 715 } 716 717 /* 718 * Check whether the GPU supports 8 bit buffers. 719 */ 720 inline bool int8_buffers_enabled() const { 721 return context_->adapter_ptr()->has_full_int8_buffers_support(); 722 } 723 724 // 725 // Debug support (implemented in Logging.cpp) 726 // 727 728 void print_readable(); 729 730 // 731 // Friend classes 732 // 733 734 friend class vTensorPtr; 735 friend class TensorRefPtr; 736 friend class StagingPtr; 737 friend class IntListPtr; 738 friend class DoubleListPtr; 739 friend class BoolListPtr; 740 friend class ValueListPtr; 741 friend class SymIntPtr; 742 743 friend struct TmpTensor; 744 }; 745 746 template <typename T> 747 inline typename std::enable_if<is_valid_scalar_type<T>::value, ValueRef>::type 748 ComputeGraph::add_scalar(T value) { 749 ValueRef idx(static_cast<int>(values_.size())); 750 check_no_active_value_ptrs(); 751 values_.emplace_back(value); 752 return idx; 753 } 754 755 template <typename T> 756 inline typename std::enable_if<is_valid_scalar_type<T>::value, ValueRef>::type 757 ComputeGraph::add_scalar_list(std::vector<T>&& value) { 758 ValueRef idx(static_cast<int>(values_.size())); 759 check_no_active_value_ptrs(); 760 values_.emplace_back(std::move(value)); 761 return idx; 762 } 763 764 } // namespace vkcompute 765