xref: /aosp_15_r20/external/executorch/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.h (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Qualcomm Innovation Center, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // A simple diffusion runner that includes preprocessing and post processing
10 // logic. The module takes in a string as input and emites a tensor as output.
11 
12 #pragma once
13 
14 #include <string>
15 #include <unordered_map>
16 #include <vector>
17 
18 #include <executorch/extension/module/module.h>
19 
20 namespace example {
21 
22 class Runner {
23  public:
24   explicit Runner(
25       const std::vector<std::string>& models_path,
26       const int num_time_steps,
27       const float guidance_scale,
28       const float text_encoder_output_scale,
29       const int text_encoder_output_offset,
30       const float unet_input_latent_scale,
31       const int unet_input_latent_offset,
32       const float unet_input_text_emb_scale,
33       const float unet_input_text_emb_offset,
34       const float unet_output_scale,
35       const int unet_output_offset,
36       const float vae_input_scale,
37       const int vae_input_offset,
38       const float vae_output_scale,
39       const int vae_output_offset,
40       const std::string output_path,
41       const bool fix_latents);
42 
43   struct Stats {
44     // Scaling factor for timestamps - in this case, we use ms.
45     const long SCALING_FACTOR_UNITS_PER_SECOND = 1000;
46     // Time stamps for the different stages of the execution
47     // model_load_start_ms: Model loading time
48     long model_load_start_ms;
49     long model_load_end_ms;
50 
51     // tokenizer loading time
52     long tokenizer_load_start_ms = 0;
53     long tokenizer_load_end_ms = 0;
54 
55     // tokenizer parsing time
56     long tokenizer_parsing_start_ms = 0;
57     long tokenizer_parsing_end_ms = 0;
58 
59     // Total time to run generate
60     long generate_start_ms = 0;
61     long generate_end_ms = 0;
62 
63     // text encoder execution time
64     long text_encoder_execution_time = 0;
65 
66     // Unet aggregation execution time over n steps for cond + uncond
67     long unet_aggregate_execution_time = 0;
68 
69     // UNet aggregation post processing time over n steps for cond + uncond.
70     // This is the time from processing unet's output until feeding it into the
71     // next iteration.
72     long unet_aggregate_post_processing_time = 0;
73 
74     // VAE execution time
75     long vae_execution_time = 0;
76   };
77 
78   bool is_loaded() const;
79   executorch::runtime::Error load();
80   executorch::runtime::Error init_tokenizer(const std::string& vocab_json_path);
81   executorch::runtime::Error print_performance();
82   std::vector<int> tokenize(std::string prompt);
83   std::vector<float> gen_latent_from_file();
84   std::vector<float> gen_random_latent(float sigma);
85   void step(
86       const std::vector<float>& model_output,
87       const std::vector<float>& sigmas,
88       std::vector<float>& sample,
89       std::vector<float>& prev_sample,
90       int step_index);
91   std::vector<executorch::runtime::Result<executorch::runtime::MethodMeta>>
92   get_methods_meta();
93   std::vector<float> get_time_steps();
94   std::vector<float> get_sigmas(const std::vector<float>& time_steps);
95   void scale_model_input(
96       const std::vector<float>& vec,
97       std::vector<float>& latent_model_input,
98       float sigma);
99   executorch::runtime::Error parse_input_list(std::string& path);
100   executorch::runtime::Error generate(std::string prompt);
101   void quant_tensor(
102       const std::vector<float>& fp_vec,
103       std::vector<uint16_t>& quant_vec,
104       float scale,
105       int offset);
106   void dequant_tensor(
107       const std::vector<uint16_t>& quant_vec,
108       std::vector<float>& fp_vec,
109       float scale,
110       int offset);
111 
112  private:
113   Stats stats_;
114   std::vector<std::unique_ptr<executorch::extension::Module>> modules_;
115   std::vector<std::string> method_names_;
116   std::vector<std::vector<uint16_t>> time_emb_list_;
117   std::unordered_map<std::string, int32_t> vocab_to_token_map_;
118 
119   std::string output_path_;
120   int num_time_steps_;
121   float guidance_scale_;
122   float text_encoder_output_scale_;
123   int text_encoder_output_offset_;
124   float unet_input_latent_scale_;
125   int unet_input_latent_offset_;
126   float unet_input_text_emb_scale_;
127   int unet_input_text_emb_offset_;
128   float unet_output_scale_;
129   int unet_output_offset_;
130   float vae_input_scale_;
131   int vae_input_offset_;
132   float vae_output_scale_;
133   int vae_output_offset_;
134   const float beta_start_ = 0.00085;
135   const float beta_end_ = 0.012;
136   const int num_train_timesteps_ = 1000;
137   const int max_tokens_ = 77;
138   const bool fix_latents_ = false;
139 };
140 
141 } // namespace example
142