xref: /aosp_15_r20/external/executorch/extension/pybindings/pybindings.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <algorithm>
10 #include <cstdio>
11 #include <iostream>
12 #include <memory>
13 #include <stdexcept>
14 #include <unordered_map>
15 
16 #include <pybind11/iostream.h>
17 #include <pybind11/pybind11.h>
18 #include <pybind11/stl.h>
19 
20 #include <executorch/devtools/bundled_program/bundled_program.h>
21 #include <executorch/devtools/bundled_program/schema/bundled_program_schema_generated.h>
22 #include <executorch/devtools/etdump/etdump_flatcc.h>
23 #include <executorch/extension/data_loader/buffer_data_loader.h>
24 #include <executorch/extension/data_loader/mmap_data_loader.h>
25 #include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
26 #include <executorch/runtime/core/data_loader.h>
27 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
28 #include <executorch/runtime/executor/method.h>
29 #include <executorch/runtime/executor/program.h>
30 #include <executorch/runtime/kernel/operator_registry.h>
31 #include <executorch/runtime/platform/assert.h>
32 #include <executorch/runtime/platform/platform.h>
33 #include <executorch/runtime/platform/profiler.h>
34 #include <executorch/runtime/platform/runtime.h>
35 
36 #include <ATen/Functions.h>
37 #include <ATen/Tensor.h>
38 #include <ATen/core/functional.h>
39 #include <c10/core/ScalarTypeToTypeMeta.h>
40 #include <torch/csrc/utils/pybind.h>
41 #include <torch/python.h>
42 
43 #ifndef USE_ATEN_LIB
44 #include <c10/core/impl/LocalDispatchKeySet.h>
45 #include <executorch/extension/aten_util/aten_bridge.h>
46 #endif
47 
48 /// Throws a runtime_error with the provided message if `error` is not `Ok`.
49 #define THROW_IF_ERROR(error, message, ...)                       \
50   ({                                                              \
51     if ((error) != Error::Ok) {                                   \
52       char msg_buf[128];                                          \
53       snprintf(msg_buf, sizeof(msg_buf), message, ##__VA_ARGS__); \
54       /* pybind will convert this to a python exception. */       \
55       throw std::runtime_error(msg_buf);                          \
56     }                                                             \
57   })
58 
59 #define THROW_INDEX_IF_ERROR(error, message, ...)                 \
60   ({                                                              \
61     if ((error) != Error::Ok) {                                   \
62       char msg_buf[128];                                          \
63       snprintf(msg_buf, sizeof(msg_buf), message, ##__VA_ARGS__); \
64       /* pybind will convert this to a python exception. */       \
65       throw std::out_of_range(msg_buf);                           \
66     }                                                             \
67   })
68 
69 // Our logs work by writing to stderr. By default this is done through fprintf
70 // (as defined in posix.cpp) which then does not show up in python environments.
71 // Here we override the pal to use std::cerr which can be properly redirected by
72 // scoped_estream_redirect.
et_pal_emit_log_message(et_timestamp_t timestamp,et_pal_log_level_t level,const char * filename,ET_UNUSED const char * function,size_t line,const char * message,ET_UNUSED size_t length)73 void et_pal_emit_log_message(
74     et_timestamp_t timestamp,
75     et_pal_log_level_t level,
76     const char* filename,
77     ET_UNUSED const char* function,
78     size_t line,
79     const char* message,
80     ET_UNUSED size_t length) {
81   std::cerr << "[" << filename << ":" << line << "] " << message << std::endl;
82 }
83 
84 namespace py = pybind11;
85 using executorch::bundled_program::verify_method_outputs;
86 using ::executorch::extension::BufferDataLoader;
87 using ::executorch::extension::MallocMemoryAllocator;
88 using ::executorch::extension::MmapDataLoader;
89 using ::executorch::runtime::ArrayRef;
90 using ::executorch::runtime::DataLoader;
91 using ::executorch::runtime::Error;
92 using ::executorch::runtime::EValue;
93 using ::executorch::runtime::EventTracerDebugLogLevel;
94 using ::executorch::runtime::get_registered_kernels;
95 using ::executorch::runtime::HierarchicalAllocator;
96 using ::executorch::runtime::Kernel;
97 using ::executorch::runtime::MemoryAllocator;
98 using ::executorch::runtime::MemoryManager;
99 using ::executorch::runtime::Method;
100 using ::executorch::runtime::prof_result_t;
101 using ::executorch::runtime::Program;
102 using ::executorch::runtime::Result;
103 using ::executorch::runtime::Span;
104 using ::executorch::runtime::Tag;
105 using torch::executor::etdump_result;
106 using torch::executor::ETDumpGen;
107 
108 #ifndef USE_ATEN_LIB
109 using ::executorch::extension::alias_attensor_to_etensor;
110 using ::executorch::extension::alias_etensor_to_attensor;
111 using ::executorch::extension::torch_to_executorch_scalar_type;
112 #endif // !USE_ATEN_LIB
113 
114 namespace executorch {
115 namespace extension {
116 namespace pybindings {
117 
118 namespace {
119 
write_data_to_file(const std::string & path,void * buf,size_t size)120 void write_data_to_file(const std::string& path, void* buf, size_t size) {
121   FILE* f = fopen(path.c_str(), "w+");
122   if (!f) {
123     throw std::runtime_error(
124         "Failed to open file " + path + ": " + strerror(errno));
125   }
126   size_t num_written = fwrite(buf, 1, size, f);
127   if (num_written != size) {
128     fclose(f);
129     throw std::runtime_error("Failed to write etdump to file " + path);
130   }
131   int err = fclose(f);
132   if (err) {
133     throw std::runtime_error(
134         "Failed to close etdump file " + path + ": " + strerror(err));
135   }
136 }
137 
setup_output_storage(Method & method,const std::vector<Span<uint8_t>> & output_storages)138 void setup_output_storage(
139     Method& method,
140     const std::vector<Span<uint8_t>>& output_storages) {
141   if (output_storages.size() != method.outputs_size()) {
142     THROW_IF_ERROR(
143         Error::InvalidArgument,
144         "number of output storages %zu does not match number of outputs %zu",
145         output_storages.size(),
146         method.outputs_size());
147   }
148   for (size_t i = 0; i < output_storages.size(); ++i) {
149     if (output_storages[i].size() == 0) {
150       // Skip empty output storages, this would happen for non-tensor outputs
151       // and memory planned outputs.
152       continue;
153     }
154     Error output_status = method.set_output_data_ptr(
155         output_storages[i].data(), output_storages[i].size(), i);
156     // We already should be skipping non-tensor outputs, and memory planned
157     // outputs so any error is real.
158     THROW_IF_ERROR(
159         output_status,
160         "set_output_data_ptr failed for output %zu with error 0x%" PRIx32,
161         i,
162         static_cast<uint32_t>(output_status));
163   }
164 }
165 
166 class Module final {
167  public:
Module(std::unique_ptr<DataLoader> loader,std::unique_ptr<ETDumpGen> tracer=nullptr,size_t debug_buffer_size=0,Program::Verification program_verification=Program::Verification::InternalConsistency)168   explicit Module(
169       std::unique_ptr<DataLoader> loader,
170       std::unique_ptr<ETDumpGen> tracer = nullptr,
171       size_t debug_buffer_size = 0,
172       Program::Verification program_verification =
173           Program::Verification::InternalConsistency)
174       : loader_(std::move(loader)),
175         event_tracer_(std::move(tracer)),
176         debug_buffer_size_(debug_buffer_size) {
177     ::executorch::runtime::runtime_init();
178     Result<Program> program =
179         Program::load(loader_.get(), program_verification);
180     THROW_IF_ERROR(
181         program.error(),
182         "loading program failed with error: 0x%" PRIx32,
183         static_cast<uint32_t>(program.error()));
184     program_ = std::make_unique<Program>(std::move(program.get()));
185 
186     // Figure out the size of each non_const layer we need to support every
187     // method in the program. Map will be easier to use than a list because we
188     // dont know how many non_const arenas there will be
189     std::map<size_t, int64_t> non_const_buffer_sizes;
190     for (size_t i = 0; i < program_->num_methods(); ++i) {
191       auto name = program_->get_method_name(i).get();
192       auto method_meta = program_->method_meta(name).get();
193       for (size_t j = 0; j < method_meta.num_non_const_buffers(); j++) {
194         int64_t buffer_size = method_meta.non_const_buffer_size(j).get();
195         if (non_const_buffer_sizes.find(j) == non_const_buffer_sizes.end()) {
196           non_const_buffer_sizes.insert({j, buffer_size});
197         } else {
198           non_const_buffer_sizes[j] =
199               std::max(non_const_buffer_sizes[j], buffer_size);
200         }
201       }
202     }
203 
204     // Allocate the arenas. Using vector because we need to remember the size as
205     // well, so vector is easier then unique_ptr.
206     std::vector<std::vector<uint8_t>> non_const_buffers_;
207     for (std::map<size_t, int64_t>::iterator i = non_const_buffer_sizes.begin();
208          i != non_const_buffer_sizes.end();
209          i++) {
210       non_const_buffers_.push_back(std::vector<uint8_t>(i->second));
211     }
212 
213     memory_ = std::make_unique<Memory>(std::move(non_const_buffers_));
214     if (event_tracer_ && debug_buffer_size > 0) {
215       // If a debug buffer was requested for the ETDump, allocate it and make
216       // sure its lifetime is as long as the event_tracer.
217       debug_buffer_ = std::make_unique<uint8_t[]>(debug_buffer_size);
218       event_tracer_->set_debug_buffer(get_etdump_debug_buffer());
219       event_tracer_->set_event_tracer_debug_level(
220           EventTracerDebugLogLevel::kIntermediateOutputs);
221     }
222 
223     // Load methods
224     for (size_t i = 0; i < program_->num_methods(); ++i) {
225       auto name = program_->get_method_name(i).get();
226       // It's safe to use the same memory manager for all modules because
227       // we can guarantee that only one will be executing at a time.
228       // Everything in this module runs on a single thread.
229       Result<Method> method = program_->load_method(
230           name, memory_->mem_manager(), event_tracer_.get());
231       THROW_IF_ERROR(
232           method.error(),
233           "loading method %s failed with error 0x%" PRIx32,
234           name,
235           static_cast<uint32_t>(method.error()));
236       methods_.insert(
237           {std::string(name),
238            std::make_unique<Method>(std::move(method.get()))});
239     }
240   }
241 
242   Module(const Module&) = delete;
243   Module& operator=(const Module&) = delete;
244   Module(Module&&) = default;
245   Module& operator=(Module&&) = default;
246 
247   /// Executes the specified method on the provided inputs and returns its
248   /// outputs.
run_method(const std::string & method_name,const std::vector<EValue> & args,const std::optional<std::vector<Span<uint8_t>>> & output_storages=std::nullopt)249   std::vector<EValue> run_method(
250       const std::string& method_name,
251       const std::vector<EValue>& args,
252       const std::optional<std::vector<Span<uint8_t>>>& output_storages =
253           std::nullopt) {
254     auto& method = get_method(method_name);
255     exec_aten::ArrayRef<EValue> input_evalue_list(args.data(), args.size());
256 
257     Error set_inputs_status = method.set_inputs(input_evalue_list);
258     THROW_IF_ERROR(
259         set_inputs_status,
260         "method->set_inputs() for method '%s' failed with error 0x%" PRIx32,
261         method_name.c_str(),
262         static_cast<uint32_t>(set_inputs_status));
263 
264 #ifdef USE_ATEN_LIB
265     // [TLS handling] This is to workaround an assertion failure
266     // (https://fburl.com/code/302jyn8d) running `gelu` in ATen mode in fbcode
267     // (such as bento). The problem is ExecuTorch ATen mode doesn't have
268     // Thread Local State, but `torch-cpp` is assuming tls init is done. There
269     // are two more checks: MKLDNN disabled and C10_MOBILE, if any of them is
270     // true we won't be hitting this assertion error. However in `torch-cpp`
271     // lib both checks are false. Production impact: this should not make any
272     // impact in production environment, given that in xplat we are depending
273     // on a library that enables C10_MOBILE (`torch_mobile_core`).
274     c10::impl::ExcludeDispatchKeyGuard no_autograd(
275         c10::autograd_dispatch_keyset);
276 #endif
277     if (output_storages) {
278       setup_output_storage(method, *output_storages);
279     }
280     Error execute_status = method.execute();
281     THROW_IF_ERROR(
282         execute_status,
283         "method->execute() failed with error 0x%" PRIx32,
284         static_cast<uint32_t>(execute_status));
285     // process outputs
286     return get_outputs(method_name);
287   }
288 
get_outputs(const std::string & method_name)289   std::vector<EValue> get_outputs(const std::string& method_name) {
290     auto& method = methods_[method_name];
291     std::vector<EValue> result(method->outputs_size());
292 
293     Error get_outputs_status =
294         method->get_outputs(result.data(), method->outputs_size());
295     THROW_IF_ERROR(
296         get_outputs_status,
297         "method->get_outputs() for method '%s' failed with error 0x%" PRIx32,
298         method_name.c_str(),
299         static_cast<uint32_t>(get_outputs_status));
300 
301     return result;
302   }
303 
get_method(const std::string & method_name)304   Method& get_method(const std::string& method_name) {
305     if (methods_.count(method_name) == 0) {
306       THROW_IF_ERROR(
307           Error::InvalidArgument,
308           "no such method in program: %s",
309           method_name.c_str());
310     }
311     return *methods_[method_name].get();
312   }
313 
314   /// Returns the names of all methods in the program.
method_names() const315   std::vector<std::string> method_names() const {
316     std::vector<std::string> names;
317     for (const auto& method : methods_) {
318       names.push_back(method.first);
319     }
320     return names;
321   }
322 
has_etdump()323   bool has_etdump() {
324     return static_cast<bool>(event_tracer_);
325   }
326 
etdump()327   ETDumpGen& etdump() {
328     return *event_tracer_;
329   }
330 
has_etdump_debug_buffer() const331   bool has_etdump_debug_buffer() const {
332     return static_cast<bool>(debug_buffer_);
333   }
334 
get_etdump_debug_buffer()335   Span<uint8_t> get_etdump_debug_buffer() {
336     return Span<uint8_t>(debug_buffer_.get(), debug_buffer_size_);
337   }
338 
339  private:
340   /// A wrapper/util class for executorch memory allocations/manager.
341   class Memory {
342    public:
Memory(std::vector<std::vector<uint8_t>> && non_const_buffers)343     explicit Memory(std::vector<std::vector<uint8_t>>&& non_const_buffers)
344         : runtime_allocator_(),
345           non_const_buffers_(std::move(non_const_buffers)),
346           non_const_spans_(create_non_const_spans()),
347           non_const_allocator_(
348               {non_const_spans_.data(), non_const_spans_.size()}),
349           mem_manager_(
350               &const_allocator_,
351               &non_const_allocator_,
352               &runtime_allocator_,
353               &temp_allocator_) {}
354 
355     /// Returns a pointer to the internal memory manager, the Memory instance
356     /// must outlive this pointer.
mem_manager()357     MemoryManager* mem_manager() {
358       return &mem_manager_;
359     }
360 
361     Memory(const Memory&) = delete;
362     Memory& operator=(const Memory&) = delete;
363 
364    private:
365     MemoryAllocator const_allocator_{MemoryAllocator(0, nullptr)};
366 
367     MallocMemoryAllocator runtime_allocator_;
368 
369     MemoryAllocator temp_allocator_{MemoryAllocator(0, nullptr)};
370 
371     std::vector<std::vector<uint8_t>> non_const_buffers_;
372 
373     std::vector<Span<uint8_t>> non_const_spans_;
374 
375     HierarchicalAllocator non_const_allocator_;
376 
377     MemoryManager mem_manager_;
378 
create_non_const_spans()379     std::vector<Span<uint8_t>> create_non_const_spans() {
380       std::vector<Span<uint8_t>> result;
381       for (size_t i = 0; i < non_const_buffers_.size(); i++) {
382         result.push_back(
383             {non_const_buffers_[i].data(), non_const_buffers_[i].size()});
384       }
385       return result;
386     }
387   };
388 
389   std::unique_ptr<Memory> memory_;
390   std::unique_ptr<DataLoader> loader_; // program_ points to this.
391   std::unique_ptr<const Program> program_; // methods_ entries points to this.
392   std::unordered_map<std::string, std::unique_ptr<Method>> methods_;
393   std::unique_ptr<ETDumpGen> event_tracer_;
394   std::unique_ptr<uint8_t[]> debug_buffer_;
395   size_t debug_buffer_size_;
396 };
397 
load_module_from_buffer(const void * ptr,size_t ptr_len,bool enable_etdump,size_t debug_buffer_size,Program::Verification program_verification)398 inline std::unique_ptr<Module> load_module_from_buffer(
399     const void* ptr,
400     size_t ptr_len,
401     bool enable_etdump,
402     size_t debug_buffer_size,
403     Program::Verification program_verification) {
404   EXECUTORCH_SCOPE_PROF("load_module_from_buffer");
405   auto loader = std::make_unique<BufferDataLoader>(ptr, ptr_len);
406   return std::make_unique<Module>(
407       std::move(loader),
408       enable_etdump ? std::make_unique<torch::executor::ETDumpGen>() : nullptr,
409       debug_buffer_size,
410       program_verification);
411 }
412 
load_module_from_file(const std::string & path,bool enable_etdump,size_t debug_buffer_size,Program::Verification program_verification)413 inline std::unique_ptr<Module> load_module_from_file(
414     const std::string& path,
415     bool enable_etdump,
416     size_t debug_buffer_size,
417     Program::Verification program_verification) {
418   EXECUTORCH_SCOPE_PROF("load_module_from_file");
419 
420   Result<MmapDataLoader> res = MmapDataLoader::from(
421       path.c_str(), MmapDataLoader::MlockConfig::UseMlockIgnoreErrors);
422   THROW_IF_ERROR(
423       res.error(),
424       "Failed to create MmapDataLoader from file %s, error: 0x:%" PRIx32,
425       path.c_str(),
426       static_cast<uint32_t>(res.error()));
427 
428   auto loader = std::make_unique<MmapDataLoader>(std::move(res.get()));
429   return std::make_unique<Module>(
430       std::move(loader),
431       enable_etdump ? std::make_unique<torch::executor::ETDumpGen>() : nullptr,
432       debug_buffer_size,
433       program_verification);
434 }
435 
436 static constexpr size_t kDEFAULT_BUNDLED_INPUT_POOL_SIZE = 16 * 1024U;
437 
438 struct PyBundledModule final {
PyBundledModuleexecutorch::extension::pybindings::__anon7828bc6a0111::PyBundledModule439   explicit PyBundledModule(
440       const py::bytes& buffer,
441       uint32_t bundled_input_pool_size)
442       : bundled_program_ptr_(buffer),
443         program_ptr_(static_cast<const void*>(
444             bundled_program_flatbuffer::GetBundledProgram(
445                 get_bundled_program_ptr())
446                 ->program()
447                 ->data())),
448         program_len_(bundled_program_flatbuffer::GetBundledProgram(
449                          get_bundled_program_ptr())
450                          ->program()
451                          ->size()) {}
452 
load_from_bufferexecutorch::extension::pybindings::__anon7828bc6a0111::PyBundledModule453   static std::unique_ptr<PyBundledModule> load_from_buffer(
454       const py::bytes& buffer,
455       uint32_t bundled_input_pool_size) {
456     return std::make_unique<PyBundledModule>(buffer, bundled_input_pool_size);
457   }
458 
get_bundled_program_ptrexecutorch::extension::pybindings::__anon7828bc6a0111::PyBundledModule459   const void* get_bundled_program_ptr() {
460     return bundled_program_ptr_.cast<std::string_view>().data();
461   }
462 
get_program_ptrexecutorch::extension::pybindings::__anon7828bc6a0111::PyBundledModule463   const void* get_program_ptr() {
464     return program_ptr_;
465   }
466 
get_program_lenexecutorch::extension::pybindings::__anon7828bc6a0111::PyBundledModule467   size_t get_program_len() {
468     return program_len_;
469   }
470 
471  private:
472   // Store the bytes object instead of a raw pointer so that this module will
473   // keep the bytes alive.
474   const py::bytes bundled_program_ptr_;
475   const void* program_ptr_;
476   size_t program_len_;
477 };
478 
479 /// Expose a subset of TensorInfo information to python.
480 struct PyTensorInfo final {
PyTensorInfoexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo481   explicit PyTensorInfo(
482       std::shared_ptr<Module> module,
483       torch::executor::TensorInfo info)
484       : module_(std::move(module)), info_(info) {}
485 
sizesexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo486   py::tuple sizes() const {
487     const auto shape = info_.sizes();
488     py::tuple tup(shape.size());
489     for (size_t i = 0; i < shape.size(); ++i) {
490       tup[i] = py::cast(shape[i]);
491     }
492     return tup;
493   }
494 
dtypeexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo495   int8_t dtype() const {
496     return static_cast<std::underlying_type<exec_aten::ScalarType>::type>(
497         info_.scalar_type());
498   }
499 
is_memory_plannedexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo500   bool is_memory_planned() const {
501     return info_.is_memory_planned();
502   }
503 
nbytesexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo504   size_t nbytes() const {
505     return info_.nbytes();
506   }
507 
reprexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo508   std::string repr() const {
509     std::string size_str = "[";
510     for (const auto& d : info_.sizes()) {
511       size_str.append(std::to_string(d));
512       size_str.append(", ");
513     }
514     if (size_str.length() >= 2) {
515       // Pop the last two characters (command and space) and add close bracket.
516       size_str.pop_back();
517       size_str.pop_back();
518     }
519     size_str.append("]");
520     return "TensorInfo(sizes=" + size_str + ", dtype=" +
521         std::string(executorch::runtime::toString(info_.scalar_type())) +
522         ", is_memory_planned=" +
523         (info_.is_memory_planned() ? "True" : "False") +
524         ", nbytes=" + std::to_string(info_.nbytes()) + ")";
525   }
526 
527  private:
528   // TensorInfo relies on module to be alive.
529   std::shared_ptr<Module> module_;
530   torch::executor::TensorInfo info_;
531 };
532 
533 /// Expose a subset of MethodMeta information to python.
534 struct PyMethodMeta final {
PyMethodMetaexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta535   explicit PyMethodMeta(
536       std::shared_ptr<Module> module,
537       torch::executor::MethodMeta meta)
538       : module_(std::move(module)), meta_(meta) {}
539 
nameexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta540   const char* name() const {
541     return meta_.name();
542   }
543 
num_inputsexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta544   size_t num_inputs() const {
545     return meta_.num_inputs();
546   }
547 
input_tensor_metaexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta548   std::unique_ptr<PyTensorInfo> input_tensor_meta(size_t index) const {
549     const auto result = meta_.input_tensor_meta(index);
550     THROW_INDEX_IF_ERROR(
551         result.error(), "Cannot get input tensor meta at %zu", index);
552     return std::make_unique<PyTensorInfo>(module_, result.get());
553   }
554 
num_outputsexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta555   size_t num_outputs() const {
556     return meta_.num_outputs();
557   }
558 
output_tensor_metaexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta559   std::unique_ptr<PyTensorInfo> output_tensor_meta(size_t index) const {
560     const auto result = meta_.output_tensor_meta(index);
561     THROW_INDEX_IF_ERROR(
562         result.error(), "Cannot get output tensor meta at %zu", index);
563     return std::make_unique<PyTensorInfo>(module_, result.get());
564   }
565 
reprexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta566   py::str repr() const {
567     py::list input_meta_strs;
568     for (size_t i = 0; i < meta_.num_inputs(); ++i) {
569       input_meta_strs.append(py::str(input_tensor_meta(i)->repr()));
570     }
571     py::list output_meta_strs;
572     for (size_t i = 0; i < meta_.num_outputs(); ++i) {
573       output_meta_strs.append(py::str(output_tensor_meta(i)->repr()));
574     }
575     // Add quotes to be more similar to Python's repr for strings.
576     py::str format =
577         "MethodMeta(name='{}', num_inputs={}, input_tensor_meta={}, num_outputs={}, output_tensor_meta={})";
578     return format.format(
579         std::string(meta_.name()),
580         std::to_string(meta_.num_inputs()),
581         input_meta_strs,
582         std::to_string(meta_.num_outputs()),
583         output_meta_strs);
584   }
585 
586  private:
587   // Must keep the Module object alive or else the meta object is invalidated.
588   std::shared_ptr<Module> module_;
589   torch::executor::MethodMeta meta_;
590 };
591 
592 struct PyModule final {
PyModuleexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule593   explicit PyModule(
594       const py::bytes& buffer,
595       bool enable_etdump,
596       size_t debug_buffer_size = 0,
597       Program::Verification program_verification =
598           Program::Verification::InternalConsistency)
599       : module_(load_module_from_buffer(
600             buffer.cast<std::string_view>().data(),
601             py::len(buffer),
602             enable_etdump,
603             debug_buffer_size,
604             program_verification)) {}
605 
PyModuleexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule606   explicit PyModule(
607       const void* ptr,
608       size_t ptr_len,
609       bool enable_etdump,
610       size_t debug_buffer_size = 0,
611       Program::Verification program_verification =
612           Program::Verification::InternalConsistency)
613       : module_(load_module_from_buffer(
614             ptr,
615             ptr_len,
616             enable_etdump,
617             debug_buffer_size,
618             program_verification)) {}
619 
PyModuleexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule620   explicit PyModule(
621       const std::string& path,
622       bool enable_etdump,
623       size_t debug_buffer_size = 0,
624       Program::Verification program_verification =
625           Program::Verification::InternalConsistency)
626       : module_(load_module_from_file(
627             path,
628             enable_etdump,
629             debug_buffer_size,
630             program_verification)) {}
631 
632   PyModule(const PyModule&) = delete;
633   PyModule& operator=(const PyModule&) = delete;
634   PyModule(PyModule&&) = default;
635   PyModule& operator=(PyModule&&) = default;
636 
637   // Module is only valid as long as the python buffer is alive.
load_from_bufferexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule638   static std::unique_ptr<PyModule> load_from_buffer(
639       const py::bytes& buffer,
640       bool enable_etdump,
641       size_t debug_buffer_size = 0,
642       Program::Verification program_verification =
643           Program::Verification::InternalConsistency) {
644     return std::make_unique<PyModule>(
645         buffer, enable_etdump, debug_buffer_size, program_verification);
646   }
load_from_fileexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule647   static std::unique_ptr<PyModule> load_from_file(
648       const std::string& path,
649       bool enable_etdump,
650       size_t debug_buffer_size = 0,
651       Program::Verification program_verification =
652           Program::Verification::InternalConsistency) {
653     return std::make_unique<PyModule>(
654         path, enable_etdump, debug_buffer_size, program_verification);
655   }
656 
load_from_bundled_programexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule657   static std::unique_ptr<PyModule> load_from_bundled_program(
658       PyBundledModule& m,
659       bool enable_etdump,
660       size_t debug_buffer_size = 0) {
661     return std::make_unique<PyModule>(
662         m.get_program_ptr(),
663         m.get_program_len(),
664         enable_etdump,
665         debug_buffer_size);
666   }
667 
run_methodexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule668   py::list run_method(
669       const std::string& method_name,
670       const py::sequence& inputs,
671       bool clone_outputs = true) {
672     const auto inputs_size = py::len(inputs);
673     std::vector<EValue> cpp_inputs;
674     cpp_inputs.reserve(inputs_size);
675 
676 #ifndef USE_ATEN_LIB // Portable mode
677     // So the ETensors and their metadata stay in scope for
678     // Module->run_method.
679     std::vector<torch::executor::TensorImpl> input_tensors;
680     std::vector<std::vector<torch::executor::Tensor::SizesType>> input_sizes;
681     std::vector<std::vector<torch::executor::Tensor::StridesType>>
682         input_strides;
683     std::vector<std::vector<torch::executor::Tensor::DimOrderType>>
684         input_dim_order;
685     // We store pointers to these vector elements so important to reserve so
686     // that we don't lose those on a vector resize. Don't need to do this for
687     // the others since they are vectors of vectors, and we don't store a
688     // pointer to the root level vector data.
689     input_tensors.reserve(inputs_size);
690 #endif
691 
692     // Convert python objects into EValues.
693     for (size_t i = 0; i < inputs_size; ++i) {
694       auto python_input = inputs[i];
695       const std::string& type_str = py::str(python_input.get_type());
696       if (type_str == "<class 'torch.Tensor'>") {
697         auto at_tensor = python_input.cast<at::Tensor>();
698         // alias_etensor_to_attensor will assert on this later, so to better
699         // propogate up to python we check early and throw an exception.
700         if (!at_tensor.is_contiguous()) {
701           auto error_msg = "Input " + std::to_string(i) + "for method " +
702               method_name + " is not contiguous.";
703           throw std::runtime_error(error_msg);
704         }
705 
706 #ifdef USE_ATEN_LIB
707         EValue evalue(at_tensor);
708 #else
709         // convert at::Tensor to torch::executor::Tensor
710         auto type =
711             torch_to_executorch_scalar_type(at_tensor.options().dtype());
712         size_t dim = at_tensor.dim();
713         // cant directly alias at::Tensor sizes and strides due to int64 vs
714         // int32 typing conflict
715         input_sizes.emplace_back(
716             at_tensor.sizes().begin(), at_tensor.sizes().end());
717         input_strides.emplace_back(
718             at_tensor.strides().begin(), at_tensor.strides().end());
719 
720         // Only works for MemoryFormat::Contiguous inputs
721         std::vector<torch::executor::Tensor::DimOrderType> dim_order;
722         for (size_t cur_dim = 0; cur_dim < dim; cur_dim++) {
723           dim_order.push_back(cur_dim);
724         }
725         input_dim_order.push_back(std::move(dim_order));
726         input_tensors.emplace_back(
727             type,
728             dim,
729             input_sizes.back().data(),
730             nullptr,
731             input_dim_order.back().data(),
732             input_strides.back().data());
733 
734         torch::executor::Tensor temp =
735             torch::executor::Tensor(&input_tensors.back());
736         alias_etensor_to_attensor(at_tensor, temp);
737         EValue evalue(temp);
738 #endif
739 
740         cpp_inputs.push_back(evalue);
741       } else if (py::isinstance<py::none>(python_input)) {
742         cpp_inputs.push_back(EValue());
743       } else if (py::isinstance<py::bool_>(python_input)) {
744         cpp_inputs.push_back(EValue(py::cast<bool>(python_input)));
745       } else if (py::isinstance<py::int_>(python_input)) {
746         cpp_inputs.push_back(EValue(py::cast<int64_t>(python_input)));
747       } else {
748         ET_ASSERT_UNREACHABLE_MSG("Unsupported pytype: %s", type_str.c_str());
749       }
750     }
751 
752     const auto& method = module_->get_method(method_name);
753     const auto num_outputs = method.outputs_size();
754     output_storages_ = make_output_storages(method);
755     std::vector<Span<uint8_t>> output_storage_spans(num_outputs);
756     for (int i = 0; i < output_storages_.size(); ++i) {
757       output_storage_spans[i] =
758           Span<uint8_t>(output_storages_[i].data(), output_storages_[i].size());
759     }
760     auto outputs =
761         module_->run_method(method_name, cpp_inputs, output_storage_spans);
762 
763     // Retrieve outputs
764     return get_outputs_as_py_list(outputs, clone_outputs);
765   }
766 
forwardexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule767   py::list forward(const py::sequence& inputs, bool clone_outputs = true) {
768     return run_method("forward", inputs, clone_outputs);
769   }
770 
forward_single_inputexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule771   py::list forward_single_input(
772       const torch::Tensor& inputTensor,
773       bool clone_outputs = true) {
774     py::list py_list;
775     py_list.append(py::cast(inputTensor));
776     return run_method("forward", py_list, clone_outputs);
777   }
778 
has_etdumpexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule779   bool has_etdump() {
780     return module_->has_etdump();
781   }
782 
write_etdump_result_to_fileexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule783   void write_etdump_result_to_file(
784       const std::string& path,
785       const py::object& debug_buffer_path) {
786     if (!has_etdump()) {
787       throw std::runtime_error("No etdump found");
788     }
789     auto& etdump = module_->etdump();
790     etdump_result result = etdump.get_etdump_data();
791     if (result.buf != nullptr && result.size > 0) {
792       write_data_to_file(path, result.buf, result.size);
793       free(result.buf);
794       if (module_->has_etdump_debug_buffer() &&
795           py::isinstance<py::str>(debug_buffer_path)) {
796         // Also write out the debug buffer to a separate file if requested.
797         std::string debug_buffer_path_str =
798             py::cast<py::str>(debug_buffer_path);
799         const auto debug_buffer = module_->get_etdump_debug_buffer();
800         write_data_to_file(
801             debug_buffer_path_str, debug_buffer.data(), debug_buffer.size());
802       }
803     } else {
804       ET_LOG(
805           Info,
806           "No etdump data found, try rebuilding with "
807           "the CMake option EXECUTORCH_ENABLE_EVENT_TRACER or with "
808           "buck run --config executorch.event_tracer_enabled=true");
809     }
810   }
811 
load_bundled_inputexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule812   void load_bundled_input(
813       PyBundledModule& m,
814       const std::string method_name,
815       size_t testset_idx) {
816     const void* bundled_program_ptr = m.get_bundled_program_ptr();
817     Error status = executorch::bundled_program::load_bundled_input(
818         module_->get_method(method_name), bundled_program_ptr, testset_idx);
819     THROW_IF_ERROR(
820         status,
821         "load_bundled_input failed with status 0x%" PRIx32,
822         static_cast<uint32_t>(status));
823   }
824 
verify_result_with_bundled_expected_outputexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule825   py::list verify_result_with_bundled_expected_output(
826       PyBundledModule& m,
827       const std::string method_name,
828       size_t testset_idx,
829       double rtol = 1e-5,
830       double atol = 1e-8) {
831     const void* bundled_program_ptr = m.get_bundled_program_ptr();
832     auto& method = module_->get_method(method_name);
833     Error status = executorch::bundled_program::load_bundled_input(
834         method, bundled_program_ptr, testset_idx);
835     THROW_IF_ERROR(
836         status,
837         "load_bundled_input failed with status 0x%" PRIx32,
838         static_cast<uint32_t>(status));
839     py::list outputs = plan_execute(method_name);
840     status = executorch::bundled_program::verify_method_outputs(
841         method, bundled_program_ptr, testset_idx, rtol, atol);
842     THROW_IF_ERROR(
843         status,
844         "Result verification failed with status %" PRIu32,
845         static_cast<uint32_t>(status));
846     return outputs;
847   }
848 
plan_executeexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule849   py::list plan_execute(
850       const std::string method_name,
851       bool clone_outputs = true) {
852     auto& method = module_->get_method(method_name);
853     // Need to pre-allocate space for outputs just like in run_method.
854     const auto num_outputs = method.outputs_size();
855     output_storages_ = make_output_storages(method);
856     std::vector<Span<uint8_t>> output_storage_spans(num_outputs);
857     for (int i = 0; i < output_storages_.size(); ++i) {
858       output_storage_spans[i] =
859           Span<uint8_t>(output_storages_[i].data(), output_storages_[i].size());
860     }
861     setup_output_storage(method, output_storage_spans);
862     auto status = method.execute();
863     THROW_IF_ERROR(
864         status,
865         "executing execution plan for method 'forward' failed with error: 0x%" PRIx32,
866         static_cast<uint32_t>(status));
867     const auto outputs = module_->get_outputs(method_name);
868     return get_outputs_as_py_list(outputs, clone_outputs);
869   }
870 
get_outputs_as_py_listexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule871   py::list get_outputs_as_py_list(
872       const std::vector<EValue>& outputs,
873       bool clone_outputs = true) {
874     const auto outputs_size = outputs.size();
875     py::list list(outputs_size);
876     for (size_t i = 0; i < outputs_size; ++i) {
877       auto& v = outputs[i];
878       if (Tag::None == v.tag) {
879         list[i] = py::none();
880       } else if (Tag::Int == v.tag) {
881         list[i] = py::cast(v.toInt());
882       } else if (Tag::Double == v.tag) {
883         list[i] = py::cast(v.toDouble());
884       } else if (Tag::Bool == v.tag) {
885         list[i] = py::cast(v.toBool());
886       } else if (Tag::String == v.tag) {
887         list[i] = py::cast(std::string(v.toString().data()));
888       } else if (Tag::Tensor == v.tag) {
889 #ifdef USE_ATEN_LIB
890         // Clone so the outputs in python do not share a lifetime with the
891         // module object
892         if (clone_outputs) {
893           list[i] = py::cast(v.toTensor().clone());
894         } else {
895           list[i] = py::cast(v.toTensor());
896         }
897 #else
898         if (clone_outputs) {
899           list[i] = py::cast(alias_attensor_to_etensor(v.toTensor()).clone());
900         } else {
901           list[i] = py::cast(alias_attensor_to_etensor(v.toTensor()));
902         }
903 #endif
904       } else {
905         ET_ASSERT_UNREACHABLE_MSG("Invalid model output type");
906       }
907     }
908     return list;
909   }
910 
method_metaexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule911   std::unique_ptr<PyMethodMeta> method_meta(const std::string method_name) {
912     auto& method = module_->get_method(method_name);
913     return std::make_unique<PyMethodMeta>(module_, method.method_meta());
914   }
915 
method_namesexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule916   std::vector<std::string> method_names() {
917     return module_->method_names();
918   }
919 
920  private:
921   std::shared_ptr<Module> module_;
922   // Need to keep-alive output storages until they can be compared in case of
923   // bundled programs.
924   std::vector<std::vector<uint8_t>> output_storages_;
925 
make_output_storagesexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule926   std::vector<std::vector<uint8_t>> make_output_storages(const Method& method) {
927     const auto num_outputs = method.outputs_size();
928     // Create a buffer for each output tensor. Memory planned outputs and non
929     // tensor outputs get an empty buffer in this list which is ignored later.
930     std::vector<std::vector<uint8_t>> output_storages;
931     output_storages_.reserve(num_outputs);
932     auto meta = method.method_meta();
933     for (size_t i = 0; i < num_outputs; ++i) {
934       auto output_type = meta.output_tag(i);
935       THROW_IF_ERROR(
936           output_type.error(), "Failed to get output type for output %zu", i);
937       if (output_type.get() != Tag::Tensor) {
938         // Skip allocating storage for non-tensor outputs.
939         output_storages.emplace_back();
940         continue;
941       }
942       const auto& output_tensor_meta =
943           method.method_meta().output_tensor_meta(i);
944       THROW_IF_ERROR(
945           output_tensor_meta.error(),
946           "Failed to get output tensor meta for output %zu",
947           i);
948       if (output_tensor_meta.get().is_memory_planned()) {
949         // Skip allocating storage for planned memory outputs.
950         output_storages.emplace_back();
951         continue;
952       }
953       // Allocate storage for the output tensor.
954       const size_t output_size = output_tensor_meta.get().nbytes();
955       output_storages.emplace_back(output_size);
956     }
957     return output_storages;
958   }
959 };
960 
create_profile_block(const std::string & name)961 void create_profile_block(const std::string& name) {
962   EXECUTORCH_PROFILE_CREATE_BLOCK(name.c_str());
963 }
964 
get_operator_names()965 py::list get_operator_names() {
966   Span<const Kernel> kernels = get_registered_kernels();
967   py::list res;
968   for (const Kernel& k : kernels) {
969     if (k.name_ != nullptr) {
970       res.append(py::cast(k.name_));
971     }
972   }
973   return res;
974 }
975 
976 } // namespace
977 
PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME,m)978 PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME, m) {
979   // Redirects cout and cerr for function calls this guards to the python env.
980   auto call_guard = py::
981       call_guard<py::scoped_ostream_redirect, py::scoped_estream_redirect>();
982 
983   // Bind the verification enum to python.
984   py::enum_<Program::Verification>(m, "Verification")
985       .value("Minimal", Program::Verification::Minimal)
986       .value("InternalConsistency", Program::Verification::InternalConsistency);
987 
988   m.def(
989       "_load_for_executorch",
990       PyModule::load_from_file,
991       py::arg("path"),
992       py::arg("enable_etdump") = false,
993       py::arg("debug_buffer_size") = 0,
994       py::arg("program_verification") =
995           Program::Verification::InternalConsistency,
996       call_guard);
997   m.def(
998       "_load_for_executorch_from_buffer",
999       &PyModule::load_from_buffer,
1000       py::arg("buffer"),
1001       py::arg("enable_etdump") = false,
1002       py::arg("debug_buffer_size") = 0,
1003       py::arg("program_verification") =
1004           Program::Verification::InternalConsistency,
1005       call_guard);
1006   m.def(
1007       "_load_for_executorch_from_bundled_program",
1008       &PyModule::load_from_bundled_program,
1009       py::arg("ptr"),
1010       py::arg("enable_etdump") = false,
1011       py::arg("debug_buffer_size") = 0,
1012       call_guard);
1013   m.def(
1014       "_load_bundled_program_from_buffer",
1015       &PyBundledModule::load_from_buffer,
1016       py::arg("buffer"),
1017       py::arg("non_const_pool_size") = kDEFAULT_BUNDLED_INPUT_POOL_SIZE,
1018       call_guard);
1019   m.def(
1020       "_dump_profile_results",
1021       []() {
1022         prof_result_t prof_result;
1023         EXECUTORCH_DUMP_PROFILE_RESULTS(&prof_result);
1024         return py::bytes(
1025             reinterpret_cast<const char*>(prof_result.prof_data),
1026             prof_result.num_bytes);
1027       },
1028       call_guard);
1029   m.def("_get_operator_names", &get_operator_names);
1030   m.def("_create_profile_block", &create_profile_block, call_guard);
1031   m.def(
1032       "_reset_profile_results",
1033       []() { EXECUTORCH_RESET_PROFILE_RESULTS(); },
1034       call_guard);
1035 
1036   py::class_<PyModule>(m, "ExecuTorchModule")
1037       .def("load_bundled_input", &PyModule::load_bundled_input, call_guard)
1038       .def(
1039           "verify_result_with_bundled_expected_output",
1040           &PyModule::verify_result_with_bundled_expected_output,
1041           py::arg("bundle"),
1042           py::arg("method_name"),
1043           py::arg("testset_idx"),
1044           py::arg("rtol") = 1e-5,
1045           py::arg("atol") = 1e-8,
1046           call_guard)
1047       .def(
1048           "plan_execute",
1049           &PyModule::plan_execute,
1050           py::arg("method_name"),
1051           py::arg("clone_outputs") = true,
1052           call_guard)
1053       .def(
1054           "method_meta",
1055           &PyModule::method_meta,
1056           py::arg("method_name"),
1057           call_guard)
1058       .def("method_names", &PyModule::method_names, call_guard)
1059       .def(
1060           "run_method",
1061           &PyModule::run_method,
1062           py::arg("method_name"),
1063           py::arg("inputs") = py::list(),
1064           py::arg("clone_outputs") = true,
1065           call_guard)
1066       .def(
1067           "forward",
1068           &PyModule::forward,
1069           py::arg("inputs") = py::list(),
1070           py::arg("clone_outputs") = true,
1071           call_guard)
1072       .def("has_etdump", &PyModule::has_etdump, call_guard)
1073       .def(
1074           "write_etdump_result_to_file",
1075           &PyModule::write_etdump_result_to_file,
1076           py::arg("path"),
1077           py::arg("debug_buffer_path") = py::none(),
1078           call_guard)
1079       .def(
1080           "__call__",
1081           &PyModule::forward,
1082           py::arg("inputs") = py::list(),
1083           py::arg("clone_outputs") = true,
1084           call_guard)
1085       .def(
1086           "__call__",
1087           &PyModule::forward_single_input,
1088           py::arg("inputs") = py::list(),
1089           py::arg("clone_outputs") = true,
1090           call_guard);
1091 
1092   py::class_<PyBundledModule>(m, "BundledModule");
1093   py::class_<PyTensorInfo>(m, "TensorInfo")
1094       .def("sizes", &PyTensorInfo::sizes, call_guard)
1095       .def("dtype", &PyTensorInfo::dtype, call_guard)
1096       .def("is_memory_planned", &PyTensorInfo::is_memory_planned, call_guard)
1097       .def("nbytes", &PyTensorInfo::nbytes, call_guard)
1098       .def("__repr__", &PyTensorInfo::repr, call_guard);
1099   py::class_<PyMethodMeta>(m, "MethodMeta")
1100       .def("name", &PyMethodMeta::name, call_guard)
1101       .def("num_inputs", &PyMethodMeta::num_inputs, call_guard)
1102       .def("num_outputs", &PyMethodMeta::num_outputs, call_guard)
1103       .def(
1104           "input_tensor_meta",
1105           &PyMethodMeta::input_tensor_meta,
1106           py::arg("index"),
1107           call_guard)
1108       .def(
1109           "output_tensor_meta",
1110           &PyMethodMeta::output_tensor_meta,
1111           py::arg("index"),
1112           call_guard)
1113       .def("__repr__", &PyMethodMeta::repr, call_guard);
1114 }
1115 
1116 } // namespace pybindings
1117 } // namespace extension
1118 } // namespace executorch
1119