1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <algorithm>
10 #include <cstdio>
11 #include <iostream>
12 #include <memory>
13 #include <stdexcept>
14 #include <unordered_map>
15
16 #include <pybind11/iostream.h>
17 #include <pybind11/pybind11.h>
18 #include <pybind11/stl.h>
19
20 #include <executorch/devtools/bundled_program/bundled_program.h>
21 #include <executorch/devtools/bundled_program/schema/bundled_program_schema_generated.h>
22 #include <executorch/devtools/etdump/etdump_flatcc.h>
23 #include <executorch/extension/data_loader/buffer_data_loader.h>
24 #include <executorch/extension/data_loader/mmap_data_loader.h>
25 #include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
26 #include <executorch/runtime/core/data_loader.h>
27 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
28 #include <executorch/runtime/executor/method.h>
29 #include <executorch/runtime/executor/program.h>
30 #include <executorch/runtime/kernel/operator_registry.h>
31 #include <executorch/runtime/platform/assert.h>
32 #include <executorch/runtime/platform/platform.h>
33 #include <executorch/runtime/platform/profiler.h>
34 #include <executorch/runtime/platform/runtime.h>
35
36 #include <ATen/Functions.h>
37 #include <ATen/Tensor.h>
38 #include <ATen/core/functional.h>
39 #include <c10/core/ScalarTypeToTypeMeta.h>
40 #include <torch/csrc/utils/pybind.h>
41 #include <torch/python.h>
42
43 #ifndef USE_ATEN_LIB
44 #include <c10/core/impl/LocalDispatchKeySet.h>
45 #include <executorch/extension/aten_util/aten_bridge.h>
46 #endif
47
48 /// Throws a runtime_error with the provided message if `error` is not `Ok`.
49 #define THROW_IF_ERROR(error, message, ...) \
50 ({ \
51 if ((error) != Error::Ok) { \
52 char msg_buf[128]; \
53 snprintf(msg_buf, sizeof(msg_buf), message, ##__VA_ARGS__); \
54 /* pybind will convert this to a python exception. */ \
55 throw std::runtime_error(msg_buf); \
56 } \
57 })
58
59 #define THROW_INDEX_IF_ERROR(error, message, ...) \
60 ({ \
61 if ((error) != Error::Ok) { \
62 char msg_buf[128]; \
63 snprintf(msg_buf, sizeof(msg_buf), message, ##__VA_ARGS__); \
64 /* pybind will convert this to a python exception. */ \
65 throw std::out_of_range(msg_buf); \
66 } \
67 })
68
69 // Our logs work by writing to stderr. By default this is done through fprintf
70 // (as defined in posix.cpp) which then does not show up in python environments.
71 // Here we override the pal to use std::cerr which can be properly redirected by
72 // scoped_estream_redirect.
et_pal_emit_log_message(et_timestamp_t timestamp,et_pal_log_level_t level,const char * filename,ET_UNUSED const char * function,size_t line,const char * message,ET_UNUSED size_t length)73 void et_pal_emit_log_message(
74 et_timestamp_t timestamp,
75 et_pal_log_level_t level,
76 const char* filename,
77 ET_UNUSED const char* function,
78 size_t line,
79 const char* message,
80 ET_UNUSED size_t length) {
81 std::cerr << "[" << filename << ":" << line << "] " << message << std::endl;
82 }
83
84 namespace py = pybind11;
85 using executorch::bundled_program::verify_method_outputs;
86 using ::executorch::extension::BufferDataLoader;
87 using ::executorch::extension::MallocMemoryAllocator;
88 using ::executorch::extension::MmapDataLoader;
89 using ::executorch::runtime::ArrayRef;
90 using ::executorch::runtime::DataLoader;
91 using ::executorch::runtime::Error;
92 using ::executorch::runtime::EValue;
93 using ::executorch::runtime::EventTracerDebugLogLevel;
94 using ::executorch::runtime::get_registered_kernels;
95 using ::executorch::runtime::HierarchicalAllocator;
96 using ::executorch::runtime::Kernel;
97 using ::executorch::runtime::MemoryAllocator;
98 using ::executorch::runtime::MemoryManager;
99 using ::executorch::runtime::Method;
100 using ::executorch::runtime::prof_result_t;
101 using ::executorch::runtime::Program;
102 using ::executorch::runtime::Result;
103 using ::executorch::runtime::Span;
104 using ::executorch::runtime::Tag;
105 using torch::executor::etdump_result;
106 using torch::executor::ETDumpGen;
107
108 #ifndef USE_ATEN_LIB
109 using ::executorch::extension::alias_attensor_to_etensor;
110 using ::executorch::extension::alias_etensor_to_attensor;
111 using ::executorch::extension::torch_to_executorch_scalar_type;
112 #endif // !USE_ATEN_LIB
113
114 namespace executorch {
115 namespace extension {
116 namespace pybindings {
117
118 namespace {
119
write_data_to_file(const std::string & path,void * buf,size_t size)120 void write_data_to_file(const std::string& path, void* buf, size_t size) {
121 FILE* f = fopen(path.c_str(), "w+");
122 if (!f) {
123 throw std::runtime_error(
124 "Failed to open file " + path + ": " + strerror(errno));
125 }
126 size_t num_written = fwrite(buf, 1, size, f);
127 if (num_written != size) {
128 fclose(f);
129 throw std::runtime_error("Failed to write etdump to file " + path);
130 }
131 int err = fclose(f);
132 if (err) {
133 throw std::runtime_error(
134 "Failed to close etdump file " + path + ": " + strerror(err));
135 }
136 }
137
setup_output_storage(Method & method,const std::vector<Span<uint8_t>> & output_storages)138 void setup_output_storage(
139 Method& method,
140 const std::vector<Span<uint8_t>>& output_storages) {
141 if (output_storages.size() != method.outputs_size()) {
142 THROW_IF_ERROR(
143 Error::InvalidArgument,
144 "number of output storages %zu does not match number of outputs %zu",
145 output_storages.size(),
146 method.outputs_size());
147 }
148 for (size_t i = 0; i < output_storages.size(); ++i) {
149 if (output_storages[i].size() == 0) {
150 // Skip empty output storages, this would happen for non-tensor outputs
151 // and memory planned outputs.
152 continue;
153 }
154 Error output_status = method.set_output_data_ptr(
155 output_storages[i].data(), output_storages[i].size(), i);
156 // We already should be skipping non-tensor outputs, and memory planned
157 // outputs so any error is real.
158 THROW_IF_ERROR(
159 output_status,
160 "set_output_data_ptr failed for output %zu with error 0x%" PRIx32,
161 i,
162 static_cast<uint32_t>(output_status));
163 }
164 }
165
166 class Module final {
167 public:
Module(std::unique_ptr<DataLoader> loader,std::unique_ptr<ETDumpGen> tracer=nullptr,size_t debug_buffer_size=0,Program::Verification program_verification=Program::Verification::InternalConsistency)168 explicit Module(
169 std::unique_ptr<DataLoader> loader,
170 std::unique_ptr<ETDumpGen> tracer = nullptr,
171 size_t debug_buffer_size = 0,
172 Program::Verification program_verification =
173 Program::Verification::InternalConsistency)
174 : loader_(std::move(loader)),
175 event_tracer_(std::move(tracer)),
176 debug_buffer_size_(debug_buffer_size) {
177 ::executorch::runtime::runtime_init();
178 Result<Program> program =
179 Program::load(loader_.get(), program_verification);
180 THROW_IF_ERROR(
181 program.error(),
182 "loading program failed with error: 0x%" PRIx32,
183 static_cast<uint32_t>(program.error()));
184 program_ = std::make_unique<Program>(std::move(program.get()));
185
186 // Figure out the size of each non_const layer we need to support every
187 // method in the program. Map will be easier to use than a list because we
188 // dont know how many non_const arenas there will be
189 std::map<size_t, int64_t> non_const_buffer_sizes;
190 for (size_t i = 0; i < program_->num_methods(); ++i) {
191 auto name = program_->get_method_name(i).get();
192 auto method_meta = program_->method_meta(name).get();
193 for (size_t j = 0; j < method_meta.num_non_const_buffers(); j++) {
194 int64_t buffer_size = method_meta.non_const_buffer_size(j).get();
195 if (non_const_buffer_sizes.find(j) == non_const_buffer_sizes.end()) {
196 non_const_buffer_sizes.insert({j, buffer_size});
197 } else {
198 non_const_buffer_sizes[j] =
199 std::max(non_const_buffer_sizes[j], buffer_size);
200 }
201 }
202 }
203
204 // Allocate the arenas. Using vector because we need to remember the size as
205 // well, so vector is easier then unique_ptr.
206 std::vector<std::vector<uint8_t>> non_const_buffers_;
207 for (std::map<size_t, int64_t>::iterator i = non_const_buffer_sizes.begin();
208 i != non_const_buffer_sizes.end();
209 i++) {
210 non_const_buffers_.push_back(std::vector<uint8_t>(i->second));
211 }
212
213 memory_ = std::make_unique<Memory>(std::move(non_const_buffers_));
214 if (event_tracer_ && debug_buffer_size > 0) {
215 // If a debug buffer was requested for the ETDump, allocate it and make
216 // sure its lifetime is as long as the event_tracer.
217 debug_buffer_ = std::make_unique<uint8_t[]>(debug_buffer_size);
218 event_tracer_->set_debug_buffer(get_etdump_debug_buffer());
219 event_tracer_->set_event_tracer_debug_level(
220 EventTracerDebugLogLevel::kIntermediateOutputs);
221 }
222
223 // Load methods
224 for (size_t i = 0; i < program_->num_methods(); ++i) {
225 auto name = program_->get_method_name(i).get();
226 // It's safe to use the same memory manager for all modules because
227 // we can guarantee that only one will be executing at a time.
228 // Everything in this module runs on a single thread.
229 Result<Method> method = program_->load_method(
230 name, memory_->mem_manager(), event_tracer_.get());
231 THROW_IF_ERROR(
232 method.error(),
233 "loading method %s failed with error 0x%" PRIx32,
234 name,
235 static_cast<uint32_t>(method.error()));
236 methods_.insert(
237 {std::string(name),
238 std::make_unique<Method>(std::move(method.get()))});
239 }
240 }
241
242 Module(const Module&) = delete;
243 Module& operator=(const Module&) = delete;
244 Module(Module&&) = default;
245 Module& operator=(Module&&) = default;
246
247 /// Executes the specified method on the provided inputs and returns its
248 /// outputs.
run_method(const std::string & method_name,const std::vector<EValue> & args,const std::optional<std::vector<Span<uint8_t>>> & output_storages=std::nullopt)249 std::vector<EValue> run_method(
250 const std::string& method_name,
251 const std::vector<EValue>& args,
252 const std::optional<std::vector<Span<uint8_t>>>& output_storages =
253 std::nullopt) {
254 auto& method = get_method(method_name);
255 exec_aten::ArrayRef<EValue> input_evalue_list(args.data(), args.size());
256
257 Error set_inputs_status = method.set_inputs(input_evalue_list);
258 THROW_IF_ERROR(
259 set_inputs_status,
260 "method->set_inputs() for method '%s' failed with error 0x%" PRIx32,
261 method_name.c_str(),
262 static_cast<uint32_t>(set_inputs_status));
263
264 #ifdef USE_ATEN_LIB
265 // [TLS handling] This is to workaround an assertion failure
266 // (https://fburl.com/code/302jyn8d) running `gelu` in ATen mode in fbcode
267 // (such as bento). The problem is ExecuTorch ATen mode doesn't have
268 // Thread Local State, but `torch-cpp` is assuming tls init is done. There
269 // are two more checks: MKLDNN disabled and C10_MOBILE, if any of them is
270 // true we won't be hitting this assertion error. However in `torch-cpp`
271 // lib both checks are false. Production impact: this should not make any
272 // impact in production environment, given that in xplat we are depending
273 // on a library that enables C10_MOBILE (`torch_mobile_core`).
274 c10::impl::ExcludeDispatchKeyGuard no_autograd(
275 c10::autograd_dispatch_keyset);
276 #endif
277 if (output_storages) {
278 setup_output_storage(method, *output_storages);
279 }
280 Error execute_status = method.execute();
281 THROW_IF_ERROR(
282 execute_status,
283 "method->execute() failed with error 0x%" PRIx32,
284 static_cast<uint32_t>(execute_status));
285 // process outputs
286 return get_outputs(method_name);
287 }
288
get_outputs(const std::string & method_name)289 std::vector<EValue> get_outputs(const std::string& method_name) {
290 auto& method = methods_[method_name];
291 std::vector<EValue> result(method->outputs_size());
292
293 Error get_outputs_status =
294 method->get_outputs(result.data(), method->outputs_size());
295 THROW_IF_ERROR(
296 get_outputs_status,
297 "method->get_outputs() for method '%s' failed with error 0x%" PRIx32,
298 method_name.c_str(),
299 static_cast<uint32_t>(get_outputs_status));
300
301 return result;
302 }
303
get_method(const std::string & method_name)304 Method& get_method(const std::string& method_name) {
305 if (methods_.count(method_name) == 0) {
306 THROW_IF_ERROR(
307 Error::InvalidArgument,
308 "no such method in program: %s",
309 method_name.c_str());
310 }
311 return *methods_[method_name].get();
312 }
313
314 /// Returns the names of all methods in the program.
method_names() const315 std::vector<std::string> method_names() const {
316 std::vector<std::string> names;
317 for (const auto& method : methods_) {
318 names.push_back(method.first);
319 }
320 return names;
321 }
322
has_etdump()323 bool has_etdump() {
324 return static_cast<bool>(event_tracer_);
325 }
326
etdump()327 ETDumpGen& etdump() {
328 return *event_tracer_;
329 }
330
has_etdump_debug_buffer() const331 bool has_etdump_debug_buffer() const {
332 return static_cast<bool>(debug_buffer_);
333 }
334
get_etdump_debug_buffer()335 Span<uint8_t> get_etdump_debug_buffer() {
336 return Span<uint8_t>(debug_buffer_.get(), debug_buffer_size_);
337 }
338
339 private:
340 /// A wrapper/util class for executorch memory allocations/manager.
341 class Memory {
342 public:
Memory(std::vector<std::vector<uint8_t>> && non_const_buffers)343 explicit Memory(std::vector<std::vector<uint8_t>>&& non_const_buffers)
344 : runtime_allocator_(),
345 non_const_buffers_(std::move(non_const_buffers)),
346 non_const_spans_(create_non_const_spans()),
347 non_const_allocator_(
348 {non_const_spans_.data(), non_const_spans_.size()}),
349 mem_manager_(
350 &const_allocator_,
351 &non_const_allocator_,
352 &runtime_allocator_,
353 &temp_allocator_) {}
354
355 /// Returns a pointer to the internal memory manager, the Memory instance
356 /// must outlive this pointer.
mem_manager()357 MemoryManager* mem_manager() {
358 return &mem_manager_;
359 }
360
361 Memory(const Memory&) = delete;
362 Memory& operator=(const Memory&) = delete;
363
364 private:
365 MemoryAllocator const_allocator_{MemoryAllocator(0, nullptr)};
366
367 MallocMemoryAllocator runtime_allocator_;
368
369 MemoryAllocator temp_allocator_{MemoryAllocator(0, nullptr)};
370
371 std::vector<std::vector<uint8_t>> non_const_buffers_;
372
373 std::vector<Span<uint8_t>> non_const_spans_;
374
375 HierarchicalAllocator non_const_allocator_;
376
377 MemoryManager mem_manager_;
378
create_non_const_spans()379 std::vector<Span<uint8_t>> create_non_const_spans() {
380 std::vector<Span<uint8_t>> result;
381 for (size_t i = 0; i < non_const_buffers_.size(); i++) {
382 result.push_back(
383 {non_const_buffers_[i].data(), non_const_buffers_[i].size()});
384 }
385 return result;
386 }
387 };
388
389 std::unique_ptr<Memory> memory_;
390 std::unique_ptr<DataLoader> loader_; // program_ points to this.
391 std::unique_ptr<const Program> program_; // methods_ entries points to this.
392 std::unordered_map<std::string, std::unique_ptr<Method>> methods_;
393 std::unique_ptr<ETDumpGen> event_tracer_;
394 std::unique_ptr<uint8_t[]> debug_buffer_;
395 size_t debug_buffer_size_;
396 };
397
load_module_from_buffer(const void * ptr,size_t ptr_len,bool enable_etdump,size_t debug_buffer_size,Program::Verification program_verification)398 inline std::unique_ptr<Module> load_module_from_buffer(
399 const void* ptr,
400 size_t ptr_len,
401 bool enable_etdump,
402 size_t debug_buffer_size,
403 Program::Verification program_verification) {
404 EXECUTORCH_SCOPE_PROF("load_module_from_buffer");
405 auto loader = std::make_unique<BufferDataLoader>(ptr, ptr_len);
406 return std::make_unique<Module>(
407 std::move(loader),
408 enable_etdump ? std::make_unique<torch::executor::ETDumpGen>() : nullptr,
409 debug_buffer_size,
410 program_verification);
411 }
412
load_module_from_file(const std::string & path,bool enable_etdump,size_t debug_buffer_size,Program::Verification program_verification)413 inline std::unique_ptr<Module> load_module_from_file(
414 const std::string& path,
415 bool enable_etdump,
416 size_t debug_buffer_size,
417 Program::Verification program_verification) {
418 EXECUTORCH_SCOPE_PROF("load_module_from_file");
419
420 Result<MmapDataLoader> res = MmapDataLoader::from(
421 path.c_str(), MmapDataLoader::MlockConfig::UseMlockIgnoreErrors);
422 THROW_IF_ERROR(
423 res.error(),
424 "Failed to create MmapDataLoader from file %s, error: 0x:%" PRIx32,
425 path.c_str(),
426 static_cast<uint32_t>(res.error()));
427
428 auto loader = std::make_unique<MmapDataLoader>(std::move(res.get()));
429 return std::make_unique<Module>(
430 std::move(loader),
431 enable_etdump ? std::make_unique<torch::executor::ETDumpGen>() : nullptr,
432 debug_buffer_size,
433 program_verification);
434 }
435
436 static constexpr size_t kDEFAULT_BUNDLED_INPUT_POOL_SIZE = 16 * 1024U;
437
438 struct PyBundledModule final {
PyBundledModuleexecutorch::extension::pybindings::__anon7828bc6a0111::PyBundledModule439 explicit PyBundledModule(
440 const py::bytes& buffer,
441 uint32_t bundled_input_pool_size)
442 : bundled_program_ptr_(buffer),
443 program_ptr_(static_cast<const void*>(
444 bundled_program_flatbuffer::GetBundledProgram(
445 get_bundled_program_ptr())
446 ->program()
447 ->data())),
448 program_len_(bundled_program_flatbuffer::GetBundledProgram(
449 get_bundled_program_ptr())
450 ->program()
451 ->size()) {}
452
load_from_bufferexecutorch::extension::pybindings::__anon7828bc6a0111::PyBundledModule453 static std::unique_ptr<PyBundledModule> load_from_buffer(
454 const py::bytes& buffer,
455 uint32_t bundled_input_pool_size) {
456 return std::make_unique<PyBundledModule>(buffer, bundled_input_pool_size);
457 }
458
get_bundled_program_ptrexecutorch::extension::pybindings::__anon7828bc6a0111::PyBundledModule459 const void* get_bundled_program_ptr() {
460 return bundled_program_ptr_.cast<std::string_view>().data();
461 }
462
get_program_ptrexecutorch::extension::pybindings::__anon7828bc6a0111::PyBundledModule463 const void* get_program_ptr() {
464 return program_ptr_;
465 }
466
get_program_lenexecutorch::extension::pybindings::__anon7828bc6a0111::PyBundledModule467 size_t get_program_len() {
468 return program_len_;
469 }
470
471 private:
472 // Store the bytes object instead of a raw pointer so that this module will
473 // keep the bytes alive.
474 const py::bytes bundled_program_ptr_;
475 const void* program_ptr_;
476 size_t program_len_;
477 };
478
479 /// Expose a subset of TensorInfo information to python.
480 struct PyTensorInfo final {
PyTensorInfoexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo481 explicit PyTensorInfo(
482 std::shared_ptr<Module> module,
483 torch::executor::TensorInfo info)
484 : module_(std::move(module)), info_(info) {}
485
sizesexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo486 py::tuple sizes() const {
487 const auto shape = info_.sizes();
488 py::tuple tup(shape.size());
489 for (size_t i = 0; i < shape.size(); ++i) {
490 tup[i] = py::cast(shape[i]);
491 }
492 return tup;
493 }
494
dtypeexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo495 int8_t dtype() const {
496 return static_cast<std::underlying_type<exec_aten::ScalarType>::type>(
497 info_.scalar_type());
498 }
499
is_memory_plannedexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo500 bool is_memory_planned() const {
501 return info_.is_memory_planned();
502 }
503
nbytesexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo504 size_t nbytes() const {
505 return info_.nbytes();
506 }
507
reprexecutorch::extension::pybindings::__anon7828bc6a0111::PyTensorInfo508 std::string repr() const {
509 std::string size_str = "[";
510 for (const auto& d : info_.sizes()) {
511 size_str.append(std::to_string(d));
512 size_str.append(", ");
513 }
514 if (size_str.length() >= 2) {
515 // Pop the last two characters (command and space) and add close bracket.
516 size_str.pop_back();
517 size_str.pop_back();
518 }
519 size_str.append("]");
520 return "TensorInfo(sizes=" + size_str + ", dtype=" +
521 std::string(executorch::runtime::toString(info_.scalar_type())) +
522 ", is_memory_planned=" +
523 (info_.is_memory_planned() ? "True" : "False") +
524 ", nbytes=" + std::to_string(info_.nbytes()) + ")";
525 }
526
527 private:
528 // TensorInfo relies on module to be alive.
529 std::shared_ptr<Module> module_;
530 torch::executor::TensorInfo info_;
531 };
532
533 /// Expose a subset of MethodMeta information to python.
534 struct PyMethodMeta final {
PyMethodMetaexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta535 explicit PyMethodMeta(
536 std::shared_ptr<Module> module,
537 torch::executor::MethodMeta meta)
538 : module_(std::move(module)), meta_(meta) {}
539
nameexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta540 const char* name() const {
541 return meta_.name();
542 }
543
num_inputsexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta544 size_t num_inputs() const {
545 return meta_.num_inputs();
546 }
547
input_tensor_metaexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta548 std::unique_ptr<PyTensorInfo> input_tensor_meta(size_t index) const {
549 const auto result = meta_.input_tensor_meta(index);
550 THROW_INDEX_IF_ERROR(
551 result.error(), "Cannot get input tensor meta at %zu", index);
552 return std::make_unique<PyTensorInfo>(module_, result.get());
553 }
554
num_outputsexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta555 size_t num_outputs() const {
556 return meta_.num_outputs();
557 }
558
output_tensor_metaexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta559 std::unique_ptr<PyTensorInfo> output_tensor_meta(size_t index) const {
560 const auto result = meta_.output_tensor_meta(index);
561 THROW_INDEX_IF_ERROR(
562 result.error(), "Cannot get output tensor meta at %zu", index);
563 return std::make_unique<PyTensorInfo>(module_, result.get());
564 }
565
reprexecutorch::extension::pybindings::__anon7828bc6a0111::PyMethodMeta566 py::str repr() const {
567 py::list input_meta_strs;
568 for (size_t i = 0; i < meta_.num_inputs(); ++i) {
569 input_meta_strs.append(py::str(input_tensor_meta(i)->repr()));
570 }
571 py::list output_meta_strs;
572 for (size_t i = 0; i < meta_.num_outputs(); ++i) {
573 output_meta_strs.append(py::str(output_tensor_meta(i)->repr()));
574 }
575 // Add quotes to be more similar to Python's repr for strings.
576 py::str format =
577 "MethodMeta(name='{}', num_inputs={}, input_tensor_meta={}, num_outputs={}, output_tensor_meta={})";
578 return format.format(
579 std::string(meta_.name()),
580 std::to_string(meta_.num_inputs()),
581 input_meta_strs,
582 std::to_string(meta_.num_outputs()),
583 output_meta_strs);
584 }
585
586 private:
587 // Must keep the Module object alive or else the meta object is invalidated.
588 std::shared_ptr<Module> module_;
589 torch::executor::MethodMeta meta_;
590 };
591
592 struct PyModule final {
PyModuleexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule593 explicit PyModule(
594 const py::bytes& buffer,
595 bool enable_etdump,
596 size_t debug_buffer_size = 0,
597 Program::Verification program_verification =
598 Program::Verification::InternalConsistency)
599 : module_(load_module_from_buffer(
600 buffer.cast<std::string_view>().data(),
601 py::len(buffer),
602 enable_etdump,
603 debug_buffer_size,
604 program_verification)) {}
605
PyModuleexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule606 explicit PyModule(
607 const void* ptr,
608 size_t ptr_len,
609 bool enable_etdump,
610 size_t debug_buffer_size = 0,
611 Program::Verification program_verification =
612 Program::Verification::InternalConsistency)
613 : module_(load_module_from_buffer(
614 ptr,
615 ptr_len,
616 enable_etdump,
617 debug_buffer_size,
618 program_verification)) {}
619
PyModuleexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule620 explicit PyModule(
621 const std::string& path,
622 bool enable_etdump,
623 size_t debug_buffer_size = 0,
624 Program::Verification program_verification =
625 Program::Verification::InternalConsistency)
626 : module_(load_module_from_file(
627 path,
628 enable_etdump,
629 debug_buffer_size,
630 program_verification)) {}
631
632 PyModule(const PyModule&) = delete;
633 PyModule& operator=(const PyModule&) = delete;
634 PyModule(PyModule&&) = default;
635 PyModule& operator=(PyModule&&) = default;
636
637 // Module is only valid as long as the python buffer is alive.
load_from_bufferexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule638 static std::unique_ptr<PyModule> load_from_buffer(
639 const py::bytes& buffer,
640 bool enable_etdump,
641 size_t debug_buffer_size = 0,
642 Program::Verification program_verification =
643 Program::Verification::InternalConsistency) {
644 return std::make_unique<PyModule>(
645 buffer, enable_etdump, debug_buffer_size, program_verification);
646 }
load_from_fileexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule647 static std::unique_ptr<PyModule> load_from_file(
648 const std::string& path,
649 bool enable_etdump,
650 size_t debug_buffer_size = 0,
651 Program::Verification program_verification =
652 Program::Verification::InternalConsistency) {
653 return std::make_unique<PyModule>(
654 path, enable_etdump, debug_buffer_size, program_verification);
655 }
656
load_from_bundled_programexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule657 static std::unique_ptr<PyModule> load_from_bundled_program(
658 PyBundledModule& m,
659 bool enable_etdump,
660 size_t debug_buffer_size = 0) {
661 return std::make_unique<PyModule>(
662 m.get_program_ptr(),
663 m.get_program_len(),
664 enable_etdump,
665 debug_buffer_size);
666 }
667
run_methodexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule668 py::list run_method(
669 const std::string& method_name,
670 const py::sequence& inputs,
671 bool clone_outputs = true) {
672 const auto inputs_size = py::len(inputs);
673 std::vector<EValue> cpp_inputs;
674 cpp_inputs.reserve(inputs_size);
675
676 #ifndef USE_ATEN_LIB // Portable mode
677 // So the ETensors and their metadata stay in scope for
678 // Module->run_method.
679 std::vector<torch::executor::TensorImpl> input_tensors;
680 std::vector<std::vector<torch::executor::Tensor::SizesType>> input_sizes;
681 std::vector<std::vector<torch::executor::Tensor::StridesType>>
682 input_strides;
683 std::vector<std::vector<torch::executor::Tensor::DimOrderType>>
684 input_dim_order;
685 // We store pointers to these vector elements so important to reserve so
686 // that we don't lose those on a vector resize. Don't need to do this for
687 // the others since they are vectors of vectors, and we don't store a
688 // pointer to the root level vector data.
689 input_tensors.reserve(inputs_size);
690 #endif
691
692 // Convert python objects into EValues.
693 for (size_t i = 0; i < inputs_size; ++i) {
694 auto python_input = inputs[i];
695 const std::string& type_str = py::str(python_input.get_type());
696 if (type_str == "<class 'torch.Tensor'>") {
697 auto at_tensor = python_input.cast<at::Tensor>();
698 // alias_etensor_to_attensor will assert on this later, so to better
699 // propogate up to python we check early and throw an exception.
700 if (!at_tensor.is_contiguous()) {
701 auto error_msg = "Input " + std::to_string(i) + "for method " +
702 method_name + " is not contiguous.";
703 throw std::runtime_error(error_msg);
704 }
705
706 #ifdef USE_ATEN_LIB
707 EValue evalue(at_tensor);
708 #else
709 // convert at::Tensor to torch::executor::Tensor
710 auto type =
711 torch_to_executorch_scalar_type(at_tensor.options().dtype());
712 size_t dim = at_tensor.dim();
713 // cant directly alias at::Tensor sizes and strides due to int64 vs
714 // int32 typing conflict
715 input_sizes.emplace_back(
716 at_tensor.sizes().begin(), at_tensor.sizes().end());
717 input_strides.emplace_back(
718 at_tensor.strides().begin(), at_tensor.strides().end());
719
720 // Only works for MemoryFormat::Contiguous inputs
721 std::vector<torch::executor::Tensor::DimOrderType> dim_order;
722 for (size_t cur_dim = 0; cur_dim < dim; cur_dim++) {
723 dim_order.push_back(cur_dim);
724 }
725 input_dim_order.push_back(std::move(dim_order));
726 input_tensors.emplace_back(
727 type,
728 dim,
729 input_sizes.back().data(),
730 nullptr,
731 input_dim_order.back().data(),
732 input_strides.back().data());
733
734 torch::executor::Tensor temp =
735 torch::executor::Tensor(&input_tensors.back());
736 alias_etensor_to_attensor(at_tensor, temp);
737 EValue evalue(temp);
738 #endif
739
740 cpp_inputs.push_back(evalue);
741 } else if (py::isinstance<py::none>(python_input)) {
742 cpp_inputs.push_back(EValue());
743 } else if (py::isinstance<py::bool_>(python_input)) {
744 cpp_inputs.push_back(EValue(py::cast<bool>(python_input)));
745 } else if (py::isinstance<py::int_>(python_input)) {
746 cpp_inputs.push_back(EValue(py::cast<int64_t>(python_input)));
747 } else {
748 ET_ASSERT_UNREACHABLE_MSG("Unsupported pytype: %s", type_str.c_str());
749 }
750 }
751
752 const auto& method = module_->get_method(method_name);
753 const auto num_outputs = method.outputs_size();
754 output_storages_ = make_output_storages(method);
755 std::vector<Span<uint8_t>> output_storage_spans(num_outputs);
756 for (int i = 0; i < output_storages_.size(); ++i) {
757 output_storage_spans[i] =
758 Span<uint8_t>(output_storages_[i].data(), output_storages_[i].size());
759 }
760 auto outputs =
761 module_->run_method(method_name, cpp_inputs, output_storage_spans);
762
763 // Retrieve outputs
764 return get_outputs_as_py_list(outputs, clone_outputs);
765 }
766
forwardexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule767 py::list forward(const py::sequence& inputs, bool clone_outputs = true) {
768 return run_method("forward", inputs, clone_outputs);
769 }
770
forward_single_inputexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule771 py::list forward_single_input(
772 const torch::Tensor& inputTensor,
773 bool clone_outputs = true) {
774 py::list py_list;
775 py_list.append(py::cast(inputTensor));
776 return run_method("forward", py_list, clone_outputs);
777 }
778
has_etdumpexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule779 bool has_etdump() {
780 return module_->has_etdump();
781 }
782
write_etdump_result_to_fileexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule783 void write_etdump_result_to_file(
784 const std::string& path,
785 const py::object& debug_buffer_path) {
786 if (!has_etdump()) {
787 throw std::runtime_error("No etdump found");
788 }
789 auto& etdump = module_->etdump();
790 etdump_result result = etdump.get_etdump_data();
791 if (result.buf != nullptr && result.size > 0) {
792 write_data_to_file(path, result.buf, result.size);
793 free(result.buf);
794 if (module_->has_etdump_debug_buffer() &&
795 py::isinstance<py::str>(debug_buffer_path)) {
796 // Also write out the debug buffer to a separate file if requested.
797 std::string debug_buffer_path_str =
798 py::cast<py::str>(debug_buffer_path);
799 const auto debug_buffer = module_->get_etdump_debug_buffer();
800 write_data_to_file(
801 debug_buffer_path_str, debug_buffer.data(), debug_buffer.size());
802 }
803 } else {
804 ET_LOG(
805 Info,
806 "No etdump data found, try rebuilding with "
807 "the CMake option EXECUTORCH_ENABLE_EVENT_TRACER or with "
808 "buck run --config executorch.event_tracer_enabled=true");
809 }
810 }
811
load_bundled_inputexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule812 void load_bundled_input(
813 PyBundledModule& m,
814 const std::string method_name,
815 size_t testset_idx) {
816 const void* bundled_program_ptr = m.get_bundled_program_ptr();
817 Error status = executorch::bundled_program::load_bundled_input(
818 module_->get_method(method_name), bundled_program_ptr, testset_idx);
819 THROW_IF_ERROR(
820 status,
821 "load_bundled_input failed with status 0x%" PRIx32,
822 static_cast<uint32_t>(status));
823 }
824
verify_result_with_bundled_expected_outputexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule825 py::list verify_result_with_bundled_expected_output(
826 PyBundledModule& m,
827 const std::string method_name,
828 size_t testset_idx,
829 double rtol = 1e-5,
830 double atol = 1e-8) {
831 const void* bundled_program_ptr = m.get_bundled_program_ptr();
832 auto& method = module_->get_method(method_name);
833 Error status = executorch::bundled_program::load_bundled_input(
834 method, bundled_program_ptr, testset_idx);
835 THROW_IF_ERROR(
836 status,
837 "load_bundled_input failed with status 0x%" PRIx32,
838 static_cast<uint32_t>(status));
839 py::list outputs = plan_execute(method_name);
840 status = executorch::bundled_program::verify_method_outputs(
841 method, bundled_program_ptr, testset_idx, rtol, atol);
842 THROW_IF_ERROR(
843 status,
844 "Result verification failed with status %" PRIu32,
845 static_cast<uint32_t>(status));
846 return outputs;
847 }
848
plan_executeexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule849 py::list plan_execute(
850 const std::string method_name,
851 bool clone_outputs = true) {
852 auto& method = module_->get_method(method_name);
853 // Need to pre-allocate space for outputs just like in run_method.
854 const auto num_outputs = method.outputs_size();
855 output_storages_ = make_output_storages(method);
856 std::vector<Span<uint8_t>> output_storage_spans(num_outputs);
857 for (int i = 0; i < output_storages_.size(); ++i) {
858 output_storage_spans[i] =
859 Span<uint8_t>(output_storages_[i].data(), output_storages_[i].size());
860 }
861 setup_output_storage(method, output_storage_spans);
862 auto status = method.execute();
863 THROW_IF_ERROR(
864 status,
865 "executing execution plan for method 'forward' failed with error: 0x%" PRIx32,
866 static_cast<uint32_t>(status));
867 const auto outputs = module_->get_outputs(method_name);
868 return get_outputs_as_py_list(outputs, clone_outputs);
869 }
870
get_outputs_as_py_listexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule871 py::list get_outputs_as_py_list(
872 const std::vector<EValue>& outputs,
873 bool clone_outputs = true) {
874 const auto outputs_size = outputs.size();
875 py::list list(outputs_size);
876 for (size_t i = 0; i < outputs_size; ++i) {
877 auto& v = outputs[i];
878 if (Tag::None == v.tag) {
879 list[i] = py::none();
880 } else if (Tag::Int == v.tag) {
881 list[i] = py::cast(v.toInt());
882 } else if (Tag::Double == v.tag) {
883 list[i] = py::cast(v.toDouble());
884 } else if (Tag::Bool == v.tag) {
885 list[i] = py::cast(v.toBool());
886 } else if (Tag::String == v.tag) {
887 list[i] = py::cast(std::string(v.toString().data()));
888 } else if (Tag::Tensor == v.tag) {
889 #ifdef USE_ATEN_LIB
890 // Clone so the outputs in python do not share a lifetime with the
891 // module object
892 if (clone_outputs) {
893 list[i] = py::cast(v.toTensor().clone());
894 } else {
895 list[i] = py::cast(v.toTensor());
896 }
897 #else
898 if (clone_outputs) {
899 list[i] = py::cast(alias_attensor_to_etensor(v.toTensor()).clone());
900 } else {
901 list[i] = py::cast(alias_attensor_to_etensor(v.toTensor()));
902 }
903 #endif
904 } else {
905 ET_ASSERT_UNREACHABLE_MSG("Invalid model output type");
906 }
907 }
908 return list;
909 }
910
method_metaexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule911 std::unique_ptr<PyMethodMeta> method_meta(const std::string method_name) {
912 auto& method = module_->get_method(method_name);
913 return std::make_unique<PyMethodMeta>(module_, method.method_meta());
914 }
915
method_namesexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule916 std::vector<std::string> method_names() {
917 return module_->method_names();
918 }
919
920 private:
921 std::shared_ptr<Module> module_;
922 // Need to keep-alive output storages until they can be compared in case of
923 // bundled programs.
924 std::vector<std::vector<uint8_t>> output_storages_;
925
make_output_storagesexecutorch::extension::pybindings::__anon7828bc6a0111::PyModule926 std::vector<std::vector<uint8_t>> make_output_storages(const Method& method) {
927 const auto num_outputs = method.outputs_size();
928 // Create a buffer for each output tensor. Memory planned outputs and non
929 // tensor outputs get an empty buffer in this list which is ignored later.
930 std::vector<std::vector<uint8_t>> output_storages;
931 output_storages_.reserve(num_outputs);
932 auto meta = method.method_meta();
933 for (size_t i = 0; i < num_outputs; ++i) {
934 auto output_type = meta.output_tag(i);
935 THROW_IF_ERROR(
936 output_type.error(), "Failed to get output type for output %zu", i);
937 if (output_type.get() != Tag::Tensor) {
938 // Skip allocating storage for non-tensor outputs.
939 output_storages.emplace_back();
940 continue;
941 }
942 const auto& output_tensor_meta =
943 method.method_meta().output_tensor_meta(i);
944 THROW_IF_ERROR(
945 output_tensor_meta.error(),
946 "Failed to get output tensor meta for output %zu",
947 i);
948 if (output_tensor_meta.get().is_memory_planned()) {
949 // Skip allocating storage for planned memory outputs.
950 output_storages.emplace_back();
951 continue;
952 }
953 // Allocate storage for the output tensor.
954 const size_t output_size = output_tensor_meta.get().nbytes();
955 output_storages.emplace_back(output_size);
956 }
957 return output_storages;
958 }
959 };
960
create_profile_block(const std::string & name)961 void create_profile_block(const std::string& name) {
962 EXECUTORCH_PROFILE_CREATE_BLOCK(name.c_str());
963 }
964
get_operator_names()965 py::list get_operator_names() {
966 Span<const Kernel> kernels = get_registered_kernels();
967 py::list res;
968 for (const Kernel& k : kernels) {
969 if (k.name_ != nullptr) {
970 res.append(py::cast(k.name_));
971 }
972 }
973 return res;
974 }
975
976 } // namespace
977
PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME,m)978 PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME, m) {
979 // Redirects cout and cerr for function calls this guards to the python env.
980 auto call_guard = py::
981 call_guard<py::scoped_ostream_redirect, py::scoped_estream_redirect>();
982
983 // Bind the verification enum to python.
984 py::enum_<Program::Verification>(m, "Verification")
985 .value("Minimal", Program::Verification::Minimal)
986 .value("InternalConsistency", Program::Verification::InternalConsistency);
987
988 m.def(
989 "_load_for_executorch",
990 PyModule::load_from_file,
991 py::arg("path"),
992 py::arg("enable_etdump") = false,
993 py::arg("debug_buffer_size") = 0,
994 py::arg("program_verification") =
995 Program::Verification::InternalConsistency,
996 call_guard);
997 m.def(
998 "_load_for_executorch_from_buffer",
999 &PyModule::load_from_buffer,
1000 py::arg("buffer"),
1001 py::arg("enable_etdump") = false,
1002 py::arg("debug_buffer_size") = 0,
1003 py::arg("program_verification") =
1004 Program::Verification::InternalConsistency,
1005 call_guard);
1006 m.def(
1007 "_load_for_executorch_from_bundled_program",
1008 &PyModule::load_from_bundled_program,
1009 py::arg("ptr"),
1010 py::arg("enable_etdump") = false,
1011 py::arg("debug_buffer_size") = 0,
1012 call_guard);
1013 m.def(
1014 "_load_bundled_program_from_buffer",
1015 &PyBundledModule::load_from_buffer,
1016 py::arg("buffer"),
1017 py::arg("non_const_pool_size") = kDEFAULT_BUNDLED_INPUT_POOL_SIZE,
1018 call_guard);
1019 m.def(
1020 "_dump_profile_results",
1021 []() {
1022 prof_result_t prof_result;
1023 EXECUTORCH_DUMP_PROFILE_RESULTS(&prof_result);
1024 return py::bytes(
1025 reinterpret_cast<const char*>(prof_result.prof_data),
1026 prof_result.num_bytes);
1027 },
1028 call_guard);
1029 m.def("_get_operator_names", &get_operator_names);
1030 m.def("_create_profile_block", &create_profile_block, call_guard);
1031 m.def(
1032 "_reset_profile_results",
1033 []() { EXECUTORCH_RESET_PROFILE_RESULTS(); },
1034 call_guard);
1035
1036 py::class_<PyModule>(m, "ExecuTorchModule")
1037 .def("load_bundled_input", &PyModule::load_bundled_input, call_guard)
1038 .def(
1039 "verify_result_with_bundled_expected_output",
1040 &PyModule::verify_result_with_bundled_expected_output,
1041 py::arg("bundle"),
1042 py::arg("method_name"),
1043 py::arg("testset_idx"),
1044 py::arg("rtol") = 1e-5,
1045 py::arg("atol") = 1e-8,
1046 call_guard)
1047 .def(
1048 "plan_execute",
1049 &PyModule::plan_execute,
1050 py::arg("method_name"),
1051 py::arg("clone_outputs") = true,
1052 call_guard)
1053 .def(
1054 "method_meta",
1055 &PyModule::method_meta,
1056 py::arg("method_name"),
1057 call_guard)
1058 .def("method_names", &PyModule::method_names, call_guard)
1059 .def(
1060 "run_method",
1061 &PyModule::run_method,
1062 py::arg("method_name"),
1063 py::arg("inputs") = py::list(),
1064 py::arg("clone_outputs") = true,
1065 call_guard)
1066 .def(
1067 "forward",
1068 &PyModule::forward,
1069 py::arg("inputs") = py::list(),
1070 py::arg("clone_outputs") = true,
1071 call_guard)
1072 .def("has_etdump", &PyModule::has_etdump, call_guard)
1073 .def(
1074 "write_etdump_result_to_file",
1075 &PyModule::write_etdump_result_to_file,
1076 py::arg("path"),
1077 py::arg("debug_buffer_path") = py::none(),
1078 call_guard)
1079 .def(
1080 "__call__",
1081 &PyModule::forward,
1082 py::arg("inputs") = py::list(),
1083 py::arg("clone_outputs") = true,
1084 call_guard)
1085 .def(
1086 "__call__",
1087 &PyModule::forward_single_input,
1088 py::arg("inputs") = py::list(),
1089 py::arg("clone_outputs") = true,
1090 call_guard);
1091
1092 py::class_<PyBundledModule>(m, "BundledModule");
1093 py::class_<PyTensorInfo>(m, "TensorInfo")
1094 .def("sizes", &PyTensorInfo::sizes, call_guard)
1095 .def("dtype", &PyTensorInfo::dtype, call_guard)
1096 .def("is_memory_planned", &PyTensorInfo::is_memory_planned, call_guard)
1097 .def("nbytes", &PyTensorInfo::nbytes, call_guard)
1098 .def("__repr__", &PyTensorInfo::repr, call_guard);
1099 py::class_<PyMethodMeta>(m, "MethodMeta")
1100 .def("name", &PyMethodMeta::name, call_guard)
1101 .def("num_inputs", &PyMethodMeta::num_inputs, call_guard)
1102 .def("num_outputs", &PyMethodMeta::num_outputs, call_guard)
1103 .def(
1104 "input_tensor_meta",
1105 &PyMethodMeta::input_tensor_meta,
1106 py::arg("index"),
1107 call_guard)
1108 .def(
1109 "output_tensor_meta",
1110 &PyMethodMeta::output_tensor_meta,
1111 py::arg("index"),
1112 call_guard)
1113 .def("__repr__", &PyMethodMeta::repr, call_guard);
1114 }
1115
1116 } // namespace pybindings
1117 } // namespace extension
1118 } // namespace executorch
1119