xref: /aosp_15_r20/external/executorch/backends/vulkan/runtime/graph/ops/PrepackNode.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <executorch/backends/vulkan/runtime/graph/ops/DispatchNode.h>
10 
11 #include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>
12 
13 #include <executorch/backends/vulkan/runtime/graph/ops/utils/BindingUtils.h>
14 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
15 #include <executorch/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h>
16 
17 namespace vkcompute {
18 
get_noop_shader(ComputeGraph & graph,const ValueRef packed)19 vkapi::ShaderInfo get_noop_shader(ComputeGraph& graph, const ValueRef packed) {
20   std::string noop_shader_name("no_op");
21   vTensorPtr t_packed = graph.get_tensor(packed);
22   add_dtype_suffix(noop_shader_name, *t_packed);
23   add_storage_type_suffix(noop_shader_name, *t_packed);
24   return VK_KERNEL_FROM_STR(noop_shader_name);
25 }
26 
PrepackNode(ComputeGraph & graph,const vkapi::ShaderInfo & shader,const utils::uvec3 & global_workgroup_size,const utils::uvec3 & local_workgroup_size,const ValueRef tref,const ValueRef packed,const vkapi::ParamsBindList & params,const vkapi::SpecVarList & spec_vars)27 PrepackNode::PrepackNode(
28     ComputeGraph& graph,
29     const vkapi::ShaderInfo& shader,
30     const utils::uvec3& global_workgroup_size,
31     const utils::uvec3& local_workgroup_size,
32     const ValueRef tref,
33     const ValueRef packed,
34     const vkapi::ParamsBindList& params,
35     const vkapi::SpecVarList& spec_vars)
36     : shader_(shader),
37       noop_shader_(get_noop_shader(graph, packed)),
38       global_workgroup_size_(global_workgroup_size),
39       local_workgroup_size_(local_workgroup_size),
40       tref_(tref),
41       packed_(packed),
42       params_(params),
43       spec_vars_(spec_vars) {
44   graph.update_descriptor_counts(shader, /*execute = */ false);
45   graph.update_descriptor_counts(noop_shader_, /*execute = */ false);
46 }
47 
create_staging_buffer(ComputeGraph * graph)48 api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
49   vTensorPtr packed = graph->get_tensor(packed_);
50 
51   // If no TensorRef is provided, create a staging buffer of zeros according to
52   // the vkapi::vTensor metadata.
53   if (graph->val_is_none(tref_)) {
54     size_t numel = utils::multiply_integers(packed->sizes());
55     api::StagingBuffer staging(graph->context(), packed->dtype(), numel);
56     staging.set_staging_zeros();
57     return staging;
58   }
59 
60   TensorRefPtr tref = graph->get_tref(tref_);
61   size_t numel = utils::multiply_integers(tref->sizes);
62   api::StagingBuffer staging(graph->context(), tref->dtype, numel);
63   size_t nbytes = numel * vkapi::element_size(tref->dtype);
64   staging.copy_from(tref->data, nbytes);
65   return staging;
66 }
67 
encode(ComputeGraph * graph)68 void PrepackNode::encode(ComputeGraph* graph) {
69   api::Context* const context = graph->context();
70 
71   vTensorPtr packed = graph->get_tensor(packed_);
72   api::StagingBuffer staging = create_staging_buffer(graph);
73 
74   std::unique_lock<std::mutex> cmd_lock = context->dispatch_lock();
75 
76   {
77     vkapi::PipelineBarrier pipeline_barrier{};
78     vkapi::DescriptorSet descriptor_set =
79         context->get_descriptor_set(shader_, local_workgroup_size_, spec_vars_);
80 
81     uint32_t idx = 0;
82     bind_tensor_to_descriptor_set(
83         *packed,
84         pipeline_barrier,
85         vkapi::MemoryAccessType::WRITE,
86         descriptor_set,
87         idx++);
88     bind_staging_to_descriptor_set(staging, descriptor_set, idx++);
89     bind_params_to_descriptor_set(params_, descriptor_set, idx);
90 
91     context->register_shader_dispatch(
92         descriptor_set, pipeline_barrier, shader_, global_workgroup_size_);
93   }
94 
95   // Submit a compute shader that performs a no-op with the packed tensor in
96   // order to trigger an image layout transition from GENERAL to
97   // READ_ONLY_OPTIMAL. This ensures that future uses of the tensor will be
98   // bound with the correct image layout.
99   {
100     vkapi::PipelineBarrier pipeline_barrier{};
101     vkapi::DescriptorSet descriptor_set =
102         context->get_descriptor_set(noop_shader_, {1, 1, 1});
103 
104     bind_tensor_to_descriptor_set(
105         *packed,
106         pipeline_barrier,
107         vkapi::MemoryAccessType::READ,
108         descriptor_set,
109         0);
110 
111     context->register_shader_dispatch(
112         descriptor_set, pipeline_barrier, noop_shader_, {1, 1, 1});
113   }
114 }
115 
116 } // namespace vkcompute
117