1 /* 2 * Copyright (c) Qualcomm Innovation Center, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 9 // A simple diffusion runner that includes preprocessing and post processing 10 // logic. The module takes in a string as input and emites a tensor as output. 11 12 #pragma once 13 14 #include <string> 15 #include <unordered_map> 16 #include <vector> 17 18 #include <executorch/extension/module/module.h> 19 20 namespace example { 21 22 class Runner { 23 public: 24 explicit Runner( 25 const std::vector<std::string>& models_path, 26 const int num_time_steps, 27 const float guidance_scale, 28 const float text_encoder_output_scale, 29 const int text_encoder_output_offset, 30 const float unet_input_latent_scale, 31 const int unet_input_latent_offset, 32 const float unet_input_text_emb_scale, 33 const float unet_input_text_emb_offset, 34 const float unet_output_scale, 35 const int unet_output_offset, 36 const float vae_input_scale, 37 const int vae_input_offset, 38 const float vae_output_scale, 39 const int vae_output_offset, 40 const std::string output_path, 41 const bool fix_latents); 42 43 struct Stats { 44 // Scaling factor for timestamps - in this case, we use ms. 45 const long SCALING_FACTOR_UNITS_PER_SECOND = 1000; 46 // Time stamps for the different stages of the execution 47 // model_load_start_ms: Model loading time 48 long model_load_start_ms; 49 long model_load_end_ms; 50 51 // tokenizer loading time 52 long tokenizer_load_start_ms = 0; 53 long tokenizer_load_end_ms = 0; 54 55 // tokenizer parsing time 56 long tokenizer_parsing_start_ms = 0; 57 long tokenizer_parsing_end_ms = 0; 58 59 // Total time to run generate 60 long generate_start_ms = 0; 61 long generate_end_ms = 0; 62 63 // text encoder execution time 64 long text_encoder_execution_time = 0; 65 66 // Unet aggregation execution time over n steps for cond + uncond 67 long unet_aggregate_execution_time = 0; 68 69 // UNet aggregation post processing time over n steps for cond + uncond. 70 // This is the time from processing unet's output until feeding it into the 71 // next iteration. 72 long unet_aggregate_post_processing_time = 0; 73 74 // VAE execution time 75 long vae_execution_time = 0; 76 }; 77 78 bool is_loaded() const; 79 executorch::runtime::Error load(); 80 executorch::runtime::Error init_tokenizer(const std::string& vocab_json_path); 81 executorch::runtime::Error print_performance(); 82 std::vector<int> tokenize(std::string prompt); 83 std::vector<float> gen_latent_from_file(); 84 std::vector<float> gen_random_latent(float sigma); 85 void step( 86 const std::vector<float>& model_output, 87 const std::vector<float>& sigmas, 88 std::vector<float>& sample, 89 std::vector<float>& prev_sample, 90 int step_index); 91 std::vector<executorch::runtime::Result<executorch::runtime::MethodMeta>> 92 get_methods_meta(); 93 std::vector<float> get_time_steps(); 94 std::vector<float> get_sigmas(const std::vector<float>& time_steps); 95 void scale_model_input( 96 const std::vector<float>& vec, 97 std::vector<float>& latent_model_input, 98 float sigma); 99 executorch::runtime::Error parse_input_list(std::string& path); 100 executorch::runtime::Error generate(std::string prompt); 101 void quant_tensor( 102 const std::vector<float>& fp_vec, 103 std::vector<uint16_t>& quant_vec, 104 float scale, 105 int offset); 106 void dequant_tensor( 107 const std::vector<uint16_t>& quant_vec, 108 std::vector<float>& fp_vec, 109 float scale, 110 int offset); 111 112 private: 113 Stats stats_; 114 std::vector<std::unique_ptr<executorch::extension::Module>> modules_; 115 std::vector<std::string> method_names_; 116 std::vector<std::vector<uint16_t>> time_emb_list_; 117 std::unordered_map<std::string, int32_t> vocab_to_token_map_; 118 119 std::string output_path_; 120 int num_time_steps_; 121 float guidance_scale_; 122 float text_encoder_output_scale_; 123 int text_encoder_output_offset_; 124 float unet_input_latent_scale_; 125 int unet_input_latent_offset_; 126 float unet_input_text_emb_scale_; 127 int unet_input_text_emb_offset_; 128 float unet_output_scale_; 129 int unet_output_offset_; 130 float vae_input_scale_; 131 int vae_input_offset_; 132 float vae_output_scale_; 133 int vae_output_offset_; 134 const float beta_start_ = 0.00085; 135 const float beta_end_ = 0.012; 136 const int num_train_timesteps_ = 1000; 137 const int max_tokens_ = 77; 138 const bool fix_latents_ = false; 139 }; 140 141 } // namespace example 142