xref: /aosp_15_r20/external/executorch/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Qualcomm Innovation Center, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // A simple stable diffusion runner that includes preprocessing and post
10 // processing logic. The module takes in a string as input and emits a tensor as
11 // output.
12 
13 #include <executorch/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.h>
14 #include <executorch/extension/llm/runner/util.h>
15 #include <executorch/extension/tensor/tensor.h>
16 
17 #include <ctime>
18 #include <fstream>
19 #include <random>
20 #include <regex>
21 
22 #include <executorch/runtime/core/exec_aten/exec_aten.h>
23 #include <executorch/runtime/platform/log.h>
24 
25 using executorch::extension::from_blob;
26 using executorch::extension::Module;
27 using executorch::extension::TensorPtr;
28 using executorch::extension::llm::time_in_ms;
29 using executorch::runtime::Error;
30 using executorch::runtime::MethodMeta;
31 using executorch::runtime::Result;
32 
33 namespace example {
34 
Runner(const std::vector<std::string> & models_path,const int num_time_steps,const float guidance_scale,const float text_encoder_output_scale,const int text_encoder_output_offset,const float unet_input_latent_scale,const int unet_input_latent_offset,const float unet_input_text_emb_scale,const float unet_input_text_emb_offset,const float unet_output_scale,const int unet_output_offset,const float vae_input_scale,const int vae_input_offset,const float vae_output_scale,const int vae_output_offset,const std::string output_path,const bool fix_latents)35 Runner::Runner(
36     const std::vector<std::string>& models_path,
37     const int num_time_steps,
38     const float guidance_scale,
39     const float text_encoder_output_scale,
40     const int text_encoder_output_offset,
41     const float unet_input_latent_scale,
42     const int unet_input_latent_offset,
43     const float unet_input_text_emb_scale,
44     const float unet_input_text_emb_offset,
45     const float unet_output_scale,
46     const int unet_output_offset,
47     const float vae_input_scale,
48     const int vae_input_offset,
49     const float vae_output_scale,
50     const int vae_output_offset,
51     const std::string output_path,
52     const bool fix_latents)
53     : num_time_steps_(num_time_steps),
54       guidance_scale_(guidance_scale),
55       text_encoder_output_scale_(text_encoder_output_scale),
56       text_encoder_output_offset_(text_encoder_output_offset),
57       unet_input_latent_scale_(unet_input_latent_scale),
58       unet_input_latent_offset_(unet_input_latent_offset),
59       unet_input_text_emb_scale_(unet_input_text_emb_scale),
60       unet_input_text_emb_offset_(unet_input_text_emb_offset),
61       unet_output_scale_(unet_output_scale),
62       unet_output_offset_(unet_output_offset),
63       vae_input_scale_(vae_input_scale),
64       vae_input_offset_(vae_input_offset),
65       vae_output_scale_(vae_output_scale),
66       vae_output_offset_(vae_output_offset),
67       output_path_(output_path),
68       fix_latents_(fix_latents) {
69   for (int i = 0; i < models_path.size(); i++) {
70     modules_.push_back(std::make_unique<Module>(
71         models_path[i], Module::LoadMode::MmapUseMlockIgnoreErrors));
72     ET_LOG(Info, "creating module: model_path=%s", models_path[i].c_str());
73   }
74 }
75 
get_methods_meta()76 std::vector<Result<MethodMeta>> Runner::get_methods_meta() {
77   std::vector<Result<MethodMeta>> methods_meta;
78   for (size_t i = 0; i < modules_.size(); ++i) {
79     methods_meta.emplace_back(modules_[i]->method_meta(method_names_[i]));
80   }
81   return methods_meta;
82 }
83 
is_loaded() const84 bool Runner::is_loaded() const {
85   bool loaded = true;
86   for (const std::unique_ptr<Module>& module : modules_) {
87     loaded &= module->is_loaded();
88   }
89   return loaded;
90 }
91 
load()92 Error Runner::load() {
93   if (is_loaded()) {
94     return Error::Ok;
95   }
96   stats_.model_load_start_ms = time_in_ms();
97   for (auto& module : modules_) {
98     method_names_.emplace_back(*module->method_names()->begin());
99     ET_CHECK_OK_OR_RETURN_ERROR(module->load_method(method_names_.back()));
100   }
101   stats_.model_load_end_ms = time_in_ms();
102   return Error::Ok;
103 }
104 
parse_input_list(std::string & path)105 Error Runner::parse_input_list(std::string& path) {
106   // Fill in data for input
107   std::ifstream input_list(path);
108   time_emb_list_.reserve(num_time_steps_);
109   ET_CHECK_MSG(input_list.is_open(), "Input list error opening file");
110   std::string time_emb_file;
111   for (int i = 0; i < num_time_steps_; i++) {
112     std::getline(input_list, time_emb_file);
113     std::ifstream is;
114     is.open(time_emb_file, std::ios::binary);
115     is.seekg(0, std::ios::end);
116     size_t filesize = is.tellg();
117     is.seekg(0, std::ios::beg);
118     std::vector<uint16_t> time_emb;
119     time_emb.resize(filesize / sizeof(uint16_t));
120     is.read(reinterpret_cast<char*>(time_emb.data()), filesize);
121     time_emb_list_.push_back(time_emb);
122   }
123   return Error::Ok;
124 }
125 
init_tokenizer(const std::string & vocab_json_path)126 Error Runner::init_tokenizer(const std::string& vocab_json_path) {
127   ET_LOG(Info, "Loading Tokenizer from json");
128   stats_.tokenizer_load_start_ms = time_in_ms();
129   std::ifstream fin(vocab_json_path);
130   auto update_map = [this](std::string& target, std::regex& re) {
131     std::smatch sm;
132     std::regex_search(target, sm, re);
133     // replace special character, please extend this if any cornor case found
134     std::string text = sm[1];
135     std::unordered_map<std::string, std::regex> post_process = {
136         {"\"", std::regex(R"(\\\")")},
137         {" ", std::regex(R"(</w>)")},
138         {"\\", std::regex(R"(\\\\)")}};
139     for (auto& p : post_process) {
140       text = std::regex_replace(text, p.second, p.first);
141     }
142     vocab_to_token_map_[text] = std::stoi(sm[2]);
143   };
144 
145   if (fin.is_open()) {
146     std::string line, text;
147     while (getline(fin, line)) {
148       text += line;
149     }
150     fin.close();
151 
152     std::regex re_anchor(R"(\d,\")");
153     std::regex re_pattern(R"(\{?\"(.*)\":([\d]+)\}?)");
154     auto begin = std::sregex_iterator(text.begin(), text.end(), re_anchor);
155     auto end = std::sregex_iterator();
156     size_t pos = 0;
157     for (std::sregex_iterator iter = begin; iter != end; ++iter) {
158       std::smatch match;
159       size_t len = iter->position() - pos + 1;
160       std::string target = text.substr(pos, len);
161       update_map(target, re_pattern);
162       pos = iter->position() + 1;
163     }
164     // process last vocabulary
165     std::string target = text.substr(pos);
166     update_map(target, re_pattern);
167   }
168   stats_.tokenizer_load_end_ms = time_in_ms();
169   return Error::Ok;
170 }
171 
tokenize(std::string prompt)172 std::vector<int> Runner::tokenize(std::string prompt) {
173   std::string bos("<|startoftext|>"), eos("<|endoftext|>");
174   std::vector<std::string> vocabs;
175   vocabs.reserve(max_tokens_);
176   std::vector<int32_t> tokens(1, vocab_to_token_map_[bos]);
177 
178   // pretokenize
179   // ref: https://github.com/monatis/clip.cpp
180   //      https://huggingface.co/openai/clip-vit-base-patch32
181   std::string text;
182   std::regex re(
183       R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)");
184   std::smatch sm;
185   while (std::regex_search(prompt, sm, re)) {
186     for (auto& v : sm) {
187       vocabs.push_back(v);
188     }
189     prompt = sm.suffix();
190   }
191   for (std::string& v : vocabs) {
192     std::string word = (v[0] == ' ') ? v.substr(1) : v;
193     word += " ";
194     auto iter = vocab_to_token_map_.find(word);
195     if (iter != vocab_to_token_map_.end()) {
196       tokens.push_back(iter->second);
197       continue;
198     }
199     for (int i = 0; i < v.size(); ++i) {
200       for (int j = v.size() - 1; j >= i; --j) {
201         std::string token = v.substr(i, j - 1 + 1);
202         auto iter = vocab_to_token_map_.find(token);
203         if (iter != vocab_to_token_map_.end()) {
204           tokens.push_back(iter->second);
205           i = j + 1;
206           break;
207         } else if (j == i) {
208           ET_LOG(Error, "unknown token found: %s", token.c_str());
209         }
210       }
211     }
212   }
213   tokens.push_back(vocab_to_token_map_[eos]);
214   return tokens;
215 }
216 
gen_latent_from_file()217 std::vector<float> Runner::gen_latent_from_file() {
218   std::vector<float> tensor_vector;
219   std::ifstream file("latents.raw", std::ios::binary);
220   if (!file.is_open()) {
221     ET_LOG(Error, "Error opening file!");
222     return tensor_vector;
223   }
224 
225   // Read the tensor data
226   float value;
227   while (file.read(reinterpret_cast<char*>(&value), sizeof(float))) {
228     tensor_vector.push_back(value);
229   }
230   file.close();
231   return tensor_vector;
232 }
233 
gen_random_latent(float sigma)234 std::vector<float> Runner::gen_random_latent(float sigma) {
235   std::random_device rnd_device;
236   std::mt19937 mersenne_engine{rnd_device()};
237   std::normal_distribution<float> dist{0.0f, 1.0f};
238 
239   constexpr int latent_size = 1 * 64 * 64 * 4;
240   std::vector<float> random_vector(latent_size);
241 
242   for (float& value : random_vector) {
243     value = dist(mersenne_engine) * sigma;
244   }
245   return random_vector;
246 }
247 
get_time_steps()248 std::vector<float> Runner::get_time_steps() {
249   std::vector<float> time_steps(num_time_steps_);
250   for (int i = 0; i < num_time_steps_; ++i) {
251     time_steps[i] = (num_train_timesteps_ - 1) *
252         (1.0f - static_cast<float>(i) / (num_time_steps_ - 1));
253   }
254   return time_steps;
255 }
256 
get_sigmas(const std::vector<float> & time_steps)257 std::vector<float> Runner::get_sigmas(const std::vector<float>& time_steps) {
258   float start = std::sqrt(beta_start_);
259   float end = std::sqrt(beta_end_);
260   std::vector<float> betas(num_train_timesteps_);
261   float step = (end - start) / (num_train_timesteps_ - 1);
262   for (int i = 0; i < num_train_timesteps_; ++i) {
263     float value = start + i * step;
264     betas[i] = 1 - (value * value);
265   }
266 
267   std::vector<float> alphas_cumprod(num_train_timesteps_);
268   float cumprod = 1.0;
269   for (int i = 0; i < num_train_timesteps_; ++i) {
270     cumprod *= betas[i];
271     alphas_cumprod[i] = cumprod;
272   }
273 
274   std::vector<float> sigmas(num_train_timesteps_);
275   for (int i = 0; i < num_train_timesteps_; ++i) {
276     sigmas[i] = std::sqrt((1.0 - alphas_cumprod[i]) / alphas_cumprod[i]);
277   }
278 
279   std::vector<float> res(time_steps.size());
280   for (size_t i = 0; i < time_steps.size(); ++i) {
281     float index =
282         static_cast<float>(i) * (sigmas.size() - 1) / (time_steps.size() - 1);
283     size_t lower_index = static_cast<size_t>(std::floor(index));
284     size_t upper_index = static_cast<size_t>(std::ceil(index));
285 
286     float weight = index - lower_index;
287     res[i] =
288         (1.0 - weight) * sigmas[lower_index] + weight * sigmas[upper_index];
289   }
290   std::reverse(res.begin(), res.end());
291   res.push_back(0);
292 
293   return res;
294 }
295 
scale_model_input(const std::vector<float> & latents,std::vector<float> & latent_model_input,float sigma)296 void Runner::scale_model_input(
297     const std::vector<float>& latents,
298     std::vector<float>& latent_model_input,
299     float sigma) {
300   for (int i = 0; i < latents.size(); i++) {
301     latent_model_input[i] = (latents[i] / std::sqrt(sigma * sigma + 1));
302   }
303 }
304 
quant_tensor(const std::vector<float> & fp_vec,std::vector<uint16_t> & quant_vec,float scale,int offset)305 void Runner::quant_tensor(
306     const std::vector<float>& fp_vec,
307     std::vector<uint16_t>& quant_vec,
308     float scale,
309     int offset) {
310   offset = abs(offset);
311   for (int i = 0; i < fp_vec.size(); i++) {
312     quant_vec[i] = static_cast<uint16_t>((fp_vec[i] / scale) + offset);
313   }
314 }
315 
dequant_tensor(const std::vector<uint16_t> & quant_vec,std::vector<float> & fp_vec,float scale,int offset)316 void Runner::dequant_tensor(
317     const std::vector<uint16_t>& quant_vec,
318     std::vector<float>& fp_vec,
319     float scale,
320     int offset) {
321   offset = abs(offset);
322   for (int i = 0; i < quant_vec.size(); i++) {
323     fp_vec[i] = (quant_vec[i] - offset) * scale;
324   }
325 }
326 
327 // Using the same algorithm as EulerDiscreteScheduler in python.
step(const std::vector<float> & model_output,const std::vector<float> & sigmas,std::vector<float> & sample,std::vector<float> & prev_sample,int step_index)328 void Runner::step(
329     const std::vector<float>& model_output,
330     const std::vector<float>& sigmas,
331     std::vector<float>& sample,
332     std::vector<float>& prev_sample,
333     int step_index) {
334   float sigma = sigmas[step_index];
335   float dt = sigmas[step_index + 1] - sigma;
336 
337   for (int i = 0; i < sample.size(); ++i) {
338     float sigma_hat = sample[i] - (sigma * model_output[i]);
339     prev_sample[i] = (sample[i] - sigma_hat) / sigma;
340     prev_sample[i] = sample[i] + (prev_sample[i] * dt);
341   }
342   sample = prev_sample;
343 }
344 
generate(std::string prompt)345 Error Runner::generate(std::string prompt) {
346   ET_LOG(Info, "Start generating");
347   stats_.generate_start_ms = time_in_ms();
348 
349   // Start tokenize
350   stats_.tokenizer_parsing_start_ms = time_in_ms();
351   std::vector<int32_t> cond_tokens = tokenize(prompt);
352   cond_tokens.resize(max_tokens_);
353   std::vector<int32_t> uncond_tokens = tokenize("");
354   uncond_tokens.resize(max_tokens_);
355   stats_.tokenizer_parsing_end_ms = time_in_ms();
356 
357   std::vector<Result<MethodMeta>> method_metas = get_methods_meta();
358 
359   MethodMeta encoder_method_meta = method_metas[0].get();
360   // Initialize text_encoder input tensors: cond/uncond tokenized_input[1,77]
361   auto cond_tokens_tensor = from_blob(
362       cond_tokens.data(),
363       {1, 77},
364       encoder_method_meta.input_tensor_meta(0)->scalar_type());
365   auto uncond_tokens_tensor = from_blob(
366       uncond_tokens.data(),
367       {1, 77},
368       encoder_method_meta.input_tensor_meta(0)->scalar_type());
369   // Initialize text_encoder output tensors: cond/uncond embedding[1, 77, 1024]
370   constexpr int emb_size = 1 * 77 * 1024;
371   std::vector<uint16_t> cond_emb_vec(emb_size);
372   std::vector<uint16_t> uncond_emb_vec(emb_size);
373   std::vector<float> fp_emb_vec(emb_size);
374   auto cond_emb_tensor = from_blob(
375       cond_emb_vec.data(),
376       {1, 77, 1024},
377       encoder_method_meta.output_tensor_meta(0)->scalar_type());
378   auto uncond_emb_tensor = from_blob(
379       uncond_emb_vec.data(),
380       {1, 77, 1024},
381       encoder_method_meta.output_tensor_meta(0)->scalar_type());
382   auto ret = modules_[0]->set_output(method_names_[0], cond_emb_tensor);
383   long encoder_start = time_in_ms();
384   auto cond_res = modules_[0]->execute(method_names_[0], cond_tokens_tensor);
385   stats_.text_encoder_execution_time += (time_in_ms() - encoder_start);
386   ret = modules_[0]->set_output(method_names_[0], uncond_emb_tensor);
387   encoder_start = time_in_ms();
388   auto uncond_res =
389       modules_[0]->execute(method_names_[0], uncond_tokens_tensor);
390   stats_.text_encoder_execution_time += (time_in_ms() - encoder_start);
391 
392   // Initialize unet parameters
393   MethodMeta unet_method_meta = method_metas[1].get();
394   std::vector<float> time_steps = get_time_steps();
395   std::vector<float> sigmas = get_sigmas(time_steps);
396   float max_sigma = *std::max_element(sigmas.begin(), sigmas.end());
397   std::vector<float> latent;
398   if (fix_latents_) {
399     latent = gen_latent_from_file();
400   } else {
401     latent = gen_random_latent(max_sigma);
402   }
403   std::vector<float> prev_sample(latent.size());
404 
405   // Initialize unet input tensors
406   //  1. latent[1,64,64,4]
407   //  2. time_embedding[1,1280]
408   //  3. cond/uncond embedding[1,77,1024]
409   std::vector<uint16_t> latent_model_input(latent.size());
410   std::vector<float> fp_latent_model_input(latent.size());
411   auto latent_tensor = from_blob(
412       latent_model_input.data(),
413       {1, 64, 64, 4},
414       unet_method_meta.input_tensor_meta(0)->scalar_type());
415   std::vector<TensorPtr> time_emb_tensors;
416   time_emb_tensors.reserve(num_time_steps_);
417   for (auto step_index = 0; step_index < num_time_steps_; step_index++) {
418     time_emb_tensors.emplace_back(from_blob(
419         time_emb_list_[step_index].data(),
420         {1, 1280},
421         unet_method_meta.input_tensor_meta(1)->scalar_type()));
422   }
423   // requantize text encoders output
424   dequant_tensor(
425       cond_emb_vec,
426       fp_emb_vec,
427       text_encoder_output_scale_,
428       text_encoder_output_offset_);
429   quant_tensor(
430       fp_emb_vec,
431       cond_emb_vec,
432       unet_input_text_emb_scale_,
433       unet_input_text_emb_offset_);
434   dequant_tensor(
435       uncond_emb_vec,
436       fp_emb_vec,
437       text_encoder_output_scale_,
438       text_encoder_output_offset_);
439   quant_tensor(
440       fp_emb_vec,
441       uncond_emb_vec,
442       unet_input_text_emb_scale_,
443       unet_input_text_emb_offset_);
444 
445   // Initialize unet output tensors: text/uncond noise_pred[1,64,64,4]
446   std::vector<uint16_t> noise_pred_text(latent.size());
447   std::vector<uint16_t> noise_pred_uncond(latent.size());
448   std::vector<float> fp_noise_pred_text(noise_pred_text.size());
449   std::vector<float> fp_noise_pred_uncond(noise_pred_uncond.size());
450   auto noise_pred_text_tensor = from_blob(
451       noise_pred_text.data(),
452       {1, 64, 64, 4},
453       unet_method_meta.output_tensor_meta(0)->scalar_type());
454   auto noise_pred_uncond_tensor = from_blob(
455       noise_pred_uncond.data(),
456       {1, 64, 64, 4},
457       unet_method_meta.output_tensor_meta(0)->scalar_type());
458 
459   // Execute unet
460   for (int step_index = 0; step_index < num_time_steps_; step_index++) {
461     long start_post_process = time_in_ms();
462     scale_model_input(latent, fp_latent_model_input, sigmas[step_index]);
463 
464     quant_tensor(
465         fp_latent_model_input,
466         latent_model_input,
467         unet_input_latent_scale_,
468         unet_input_latent_offset_);
469 
470     stats_.unet_aggregate_post_processing_time +=
471         (time_in_ms() - start_post_process);
472     ret = modules_[1]->set_output(method_names_[1], noise_pred_text_tensor);
473     long start_unet_execution = time_in_ms();
474     auto cond_res = modules_[1]->execute(
475         method_names_[1],
476         {latent_tensor, time_emb_tensors[step_index], cond_emb_tensor});
477     stats_.unet_aggregate_execution_time +=
478         (time_in_ms() - start_unet_execution);
479     ret = modules_[1]->set_output(method_names_[1], noise_pred_uncond_tensor);
480     start_unet_execution = time_in_ms();
481     auto uncond_res = modules_[1]->execute(
482         method_names_[1],
483         {latent_tensor,
484          time_emb_tensors[step_index],
485          uncond_emb_tensor}); // results in noise_pred_uncond_vec
486     stats_.unet_aggregate_execution_time +=
487         (time_in_ms() - start_unet_execution);
488 
489     // start unet post processing
490     start_post_process = time_in_ms();
491 
492     dequant_tensor(
493         noise_pred_text,
494         fp_noise_pred_text,
495         unet_output_scale_,
496         unet_output_offset_);
497     dequant_tensor(
498         noise_pred_uncond,
499         fp_noise_pred_uncond,
500         unet_output_scale_,
501         unet_output_offset_);
502 
503     for (int i = 0; i < fp_noise_pred_text.size(); i++) {
504       fp_noise_pred_text[i] = fp_noise_pred_uncond[i] +
505           guidance_scale_ * (fp_noise_pred_text[i] - fp_noise_pred_uncond[i]);
506     }
507     step(fp_noise_pred_text, sigmas, latent, prev_sample, step_index);
508     stats_.unet_aggregate_post_processing_time +=
509         (time_in_ms() - start_post_process);
510   }
511 
512   // Start VAE
513   MethodMeta vae_method_meta = method_metas[2].get();
514   // Initialize vae input tensor : latent[1,64,64,4]
515   std::vector<uint16_t> vae_input(latent.size());
516   auto vae_input_tensor = from_blob(
517       vae_input.data(),
518       {1, 64, 64, 4},
519       vae_method_meta.input_tensor_meta(0)->scalar_type());
520   // Intialize vae output tensor: output[1,512,512,3]
521   constexpr int image_size = 1 * 512 * 512 * 3;
522   std::vector<uint16_t> q_out(image_size);
523   std::vector<float> out(image_size);
524   auto output_tensor = from_blob(
525       q_out.data(),
526       {1, 512, 512, 3},
527       vae_method_meta.output_tensor_meta(0)->scalar_type());
528 
529   quant_tensor(latent, vae_input, vae_input_scale_, vae_input_offset_);
530 
531   ret = modules_[2]->set_output(method_names_[2], output_tensor);
532   long start_vae_execution = time_in_ms();
533   auto vae_res = modules_[2]->execute(method_names_[2], vae_input_tensor);
534   stats_.vae_execution_time = (time_in_ms() - start_vae_execution);
535   stats_.generate_end_ms = time_in_ms();
536 
537   // Dequant uint16 output to fp32 output
538   dequant_tensor(q_out, out, vae_output_scale_, vae_output_offset_);
539 
540   // Saving outputs
541   auto output_file_name = output_path_ + "/output_0_0.raw";
542   std::ofstream fout(output_file_name.c_str(), std::ios::binary);
543   fout.write(
544       reinterpret_cast<const char*>(out.data()), out.size() * sizeof(float));
545   fout.close();
546 
547   return Error::Ok;
548 }
549 
print_performance()550 Error Runner::print_performance() {
551   ET_LOG(Info, "\tTotal Number of steps:\t\t\t\t%d", num_time_steps_);
552 
553   ET_LOG(
554       Info,
555       "\tTokenizer Load Time:\t\t\t\t%f (seconds)",
556       ((double)(stats_.tokenizer_load_end_ms - stats_.tokenizer_load_start_ms) /
557        stats_.SCALING_FACTOR_UNITS_PER_SECOND));
558 
559   ET_LOG(
560       Info,
561       "\tModel Load Time:\t\t\t\t%f (seconds)",
562       ((double)(stats_.model_load_end_ms - stats_.model_load_start_ms) /
563        stats_.SCALING_FACTOR_UNITS_PER_SECOND));
564 
565   ET_LOG(
566       Info,
567       "\tGenerate Time(Tokenize + Encoder + UNet + VAE):\t%f (seconds)",
568       ((double)(stats_.generate_end_ms - stats_.generate_start_ms) /
569        stats_.SCALING_FACTOR_UNITS_PER_SECOND));
570 
571   ET_LOG(
572       Info,
573       "\tTokenize Time:\t\t\t\t\t%f (seconds)",
574       ((double)(stats_.tokenizer_parsing_end_ms -
575                 stats_.tokenizer_parsing_start_ms) /
576        stats_.SCALING_FACTOR_UNITS_PER_SECOND));
577 
578   ET_LOG(
579       Info,
580       "\tText Encoder Execution Time:\t\t\t%f (seconds)",
581       ((double)(stats_.text_encoder_execution_time) /
582        stats_.SCALING_FACTOR_UNITS_PER_SECOND));
583 
584   ET_LOG(
585       Info,
586       "\tUnet Aggregate (Cond + Uncond) Execution Time:\t%f (seconds)",
587       ((double)stats_.unet_aggregate_execution_time /
588        (stats_.SCALING_FACTOR_UNITS_PER_SECOND)));
589 
590   ET_LOG(
591       Info,
592       "\tUnet Average Execution Time:\t\t\t%f (seconds)",
593       ((double)(stats_.unet_aggregate_execution_time / (num_time_steps_ * 2)) /
594        (stats_.SCALING_FACTOR_UNITS_PER_SECOND)));
595 
596   ET_LOG(
597       Info,
598       "\tUnet Aggregate Post-Processing Time:\t\t%f (seconds)",
599       ((double)(stats_.unet_aggregate_post_processing_time) /
600        stats_.SCALING_FACTOR_UNITS_PER_SECOND));
601 
602   ET_LOG(
603       Info,
604       "\tUnet Average Post-Processing Time:\t\t%f (seconds)",
605       ((double)(stats_.unet_aggregate_post_processing_time /
606                 (num_time_steps_ * 2)) /
607        (stats_.SCALING_FACTOR_UNITS_PER_SECOND)));
608 
609   ET_LOG(
610       Info,
611       "\tVAE Execution Time:\t\t\t\t%f (seconds)",
612       ((double)(stats_.vae_execution_time) /
613        stats_.SCALING_FACTOR_UNITS_PER_SECOND));
614   return Error::Ok;
615 }
616 
617 } // namespace example
618