1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <executorch/examples/models/llava/runner/llava_runner.h>
10 #include <gflags/gflags.h>
11 #ifndef LLAVA_NO_TORCH_DUMMY_IMAGE
12 #include <torch/torch.h>
13 #else
14 #include <algorithm> // std::fill
15 #endif
16
17 #if defined(ET_USE_THREADPOOL)
18 #include <executorch/extension/threadpool/cpuinfo_utils.h>
19 #include <executorch/extension/threadpool/threadpool.h>
20 #endif
21
22 DEFINE_string(
23 model_path,
24 "llava.pte",
25 "Model serialized in flatbuffer format.");
26
27 DEFINE_string(tokenizer_path, "tokenizer.bin", "Tokenizer stuff.");
28
29 DEFINE_string(prompt, "The answer to the ultimate question is", "Prompt.");
30
31 DEFINE_string(
32 image_path,
33 "",
34 "The path to a .pt file, a serialized torch tensor for an image, longest edge resized to 336.");
35
36 DEFINE_double(
37 temperature,
38 0.8f,
39 "Temperature; Default is 0.8f. 0 = greedy argmax sampling (deterministic). Lower temperature = more deterministic");
40
41 DEFINE_int32(
42 seq_len,
43 1024,
44 "Total number of tokens to generate (prompt + output). Defaults to max_seq_len. If the number of input tokens + seq_len > max_seq_len, the output will be truncated to max_seq_len tokens.");
45
46 DEFINE_int32(
47 cpu_threads,
48 -1,
49 "Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");
50
51 using executorch::extension::llm::Image;
52
main(int32_t argc,char ** argv)53 int32_t main(int32_t argc, char** argv) {
54 gflags::ParseCommandLineFlags(&argc, &argv, true);
55
56 // Create a loader to get the data of the program file. There are other
57 // DataLoaders that use mmap() or point32_t to data that's already in memory,
58 // and users can create their own DataLoaders to load from arbitrary sources.
59 const char* model_path = FLAGS_model_path.c_str();
60
61 const char* tokenizer_path = FLAGS_tokenizer_path.c_str();
62
63 const char* prompt = FLAGS_prompt.c_str();
64
65 std::string image_path = FLAGS_image_path;
66
67 double temperature = FLAGS_temperature;
68
69 int32_t seq_len = FLAGS_seq_len;
70
71 int32_t cpu_threads = FLAGS_cpu_threads;
72
73 #if defined(ET_USE_THREADPOOL)
74 uint32_t num_performant_cores = cpu_threads == -1
75 ? ::executorch::extension::cpuinfo::get_num_performant_cores()
76 : static_cast<uint32_t>(cpu_threads);
77 ET_LOG(
78 Info, "Resetting threadpool with num threads = %d", num_performant_cores);
79 if (num_performant_cores > 0) {
80 ::executorch::extension::threadpool::get_threadpool()
81 ->_unsafe_reset_threadpool(num_performant_cores);
82 }
83 #endif
84 // create llama runner
85 example::LlavaRunner runner(model_path, tokenizer_path, temperature);
86
87 // read image and resize the longest edge to 336
88 std::vector<uint8_t> image_data;
89
90 #ifdef LLAVA_NO_TORCH_DUMMY_IMAGE
91 // Work without torch using a random data
92 image_data.resize(3 * 240 * 336);
93 std::fill(image_data.begin(), image_data.end(), 0); // black
94 std::array<int32_t, 3> image_shape = {3, 240, 336};
95 std::vector<Image> images = {
96 {.data = image_data, .width = image_shape[2], .height = image_shape[1]}};
97 #else // LLAVA_NO_TORCH_DUMMY_IMAGE
98 // cv::Mat image = cv::imread(image_path, cv::IMREAD_COLOR);
99 // int longest_edge = std::max(image.rows, image.cols);
100 // float scale_factor = 336.0f / longest_edge;
101 // cv::Size new_size(image.cols * scale_factor, image.rows * scale_factor);
102 // cv::Mat resized_image;
103 // cv::resize(image, resized_image, new_size);
104 // image_data.assign(resized_image.datastart, resized_image.dataend);
105 torch::Tensor image_tensor;
106 torch::load(image_tensor, image_path); // CHW
107 ET_LOG(
108 Info,
109 "image size(0): %" PRId64 ", size(1): %" PRId64 ", size(2): %" PRId64,
110 image_tensor.size(0),
111 image_tensor.size(1),
112 image_tensor.size(2));
113 image_data.assign(
114 image_tensor.data_ptr<uint8_t>(),
115 image_tensor.data_ptr<uint8_t>() + image_tensor.numel());
116 std::vector<Image> images = {
117 {.data = image_data,
118 .width = static_cast<int32_t>(image_tensor.size(2)),
119 .height = static_cast<int32_t>(image_tensor.size(1))}};
120 #endif // LLAVA_NO_TORCH_DUMMY_IMAGE
121
122 // generate
123 runner.generate(std::move(images), prompt, seq_len);
124 return 0;
125 }
126