1 /*
2 * Copyright (c) Qualcomm Innovation Center, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 // A simple stable diffusion runner that includes preprocessing and post
10 // processing logic. The module takes in a string as input and emits a tensor as
11 // output.
12
13 #include <executorch/examples/qualcomm/qaihub_scripts/stable_diffusion/runner/runner.h>
14 #include <executorch/extension/llm/runner/util.h>
15 #include <executorch/extension/tensor/tensor.h>
16
17 #include <ctime>
18 #include <fstream>
19 #include <random>
20 #include <regex>
21
22 #include <executorch/runtime/core/exec_aten/exec_aten.h>
23 #include <executorch/runtime/platform/log.h>
24
25 using executorch::extension::from_blob;
26 using executorch::extension::Module;
27 using executorch::extension::TensorPtr;
28 using executorch::extension::llm::time_in_ms;
29 using executorch::runtime::Error;
30 using executorch::runtime::MethodMeta;
31 using executorch::runtime::Result;
32
33 namespace example {
34
Runner(const std::vector<std::string> & models_path,const int num_time_steps,const float guidance_scale,const float text_encoder_output_scale,const int text_encoder_output_offset,const float unet_input_latent_scale,const int unet_input_latent_offset,const float unet_input_text_emb_scale,const float unet_input_text_emb_offset,const float unet_output_scale,const int unet_output_offset,const float vae_input_scale,const int vae_input_offset,const float vae_output_scale,const int vae_output_offset,const std::string output_path,const bool fix_latents)35 Runner::Runner(
36 const std::vector<std::string>& models_path,
37 const int num_time_steps,
38 const float guidance_scale,
39 const float text_encoder_output_scale,
40 const int text_encoder_output_offset,
41 const float unet_input_latent_scale,
42 const int unet_input_latent_offset,
43 const float unet_input_text_emb_scale,
44 const float unet_input_text_emb_offset,
45 const float unet_output_scale,
46 const int unet_output_offset,
47 const float vae_input_scale,
48 const int vae_input_offset,
49 const float vae_output_scale,
50 const int vae_output_offset,
51 const std::string output_path,
52 const bool fix_latents)
53 : num_time_steps_(num_time_steps),
54 guidance_scale_(guidance_scale),
55 text_encoder_output_scale_(text_encoder_output_scale),
56 text_encoder_output_offset_(text_encoder_output_offset),
57 unet_input_latent_scale_(unet_input_latent_scale),
58 unet_input_latent_offset_(unet_input_latent_offset),
59 unet_input_text_emb_scale_(unet_input_text_emb_scale),
60 unet_input_text_emb_offset_(unet_input_text_emb_offset),
61 unet_output_scale_(unet_output_scale),
62 unet_output_offset_(unet_output_offset),
63 vae_input_scale_(vae_input_scale),
64 vae_input_offset_(vae_input_offset),
65 vae_output_scale_(vae_output_scale),
66 vae_output_offset_(vae_output_offset),
67 output_path_(output_path),
68 fix_latents_(fix_latents) {
69 for (int i = 0; i < models_path.size(); i++) {
70 modules_.push_back(std::make_unique<Module>(
71 models_path[i], Module::LoadMode::MmapUseMlockIgnoreErrors));
72 ET_LOG(Info, "creating module: model_path=%s", models_path[i].c_str());
73 }
74 }
75
get_methods_meta()76 std::vector<Result<MethodMeta>> Runner::get_methods_meta() {
77 std::vector<Result<MethodMeta>> methods_meta;
78 for (size_t i = 0; i < modules_.size(); ++i) {
79 methods_meta.emplace_back(modules_[i]->method_meta(method_names_[i]));
80 }
81 return methods_meta;
82 }
83
is_loaded() const84 bool Runner::is_loaded() const {
85 bool loaded = true;
86 for (const std::unique_ptr<Module>& module : modules_) {
87 loaded &= module->is_loaded();
88 }
89 return loaded;
90 }
91
load()92 Error Runner::load() {
93 if (is_loaded()) {
94 return Error::Ok;
95 }
96 stats_.model_load_start_ms = time_in_ms();
97 for (auto& module : modules_) {
98 method_names_.emplace_back(*module->method_names()->begin());
99 ET_CHECK_OK_OR_RETURN_ERROR(module->load_method(method_names_.back()));
100 }
101 stats_.model_load_end_ms = time_in_ms();
102 return Error::Ok;
103 }
104
parse_input_list(std::string & path)105 Error Runner::parse_input_list(std::string& path) {
106 // Fill in data for input
107 std::ifstream input_list(path);
108 time_emb_list_.reserve(num_time_steps_);
109 ET_CHECK_MSG(input_list.is_open(), "Input list error opening file");
110 std::string time_emb_file;
111 for (int i = 0; i < num_time_steps_; i++) {
112 std::getline(input_list, time_emb_file);
113 std::ifstream is;
114 is.open(time_emb_file, std::ios::binary);
115 is.seekg(0, std::ios::end);
116 size_t filesize = is.tellg();
117 is.seekg(0, std::ios::beg);
118 std::vector<uint16_t> time_emb;
119 time_emb.resize(filesize / sizeof(uint16_t));
120 is.read(reinterpret_cast<char*>(time_emb.data()), filesize);
121 time_emb_list_.push_back(time_emb);
122 }
123 return Error::Ok;
124 }
125
init_tokenizer(const std::string & vocab_json_path)126 Error Runner::init_tokenizer(const std::string& vocab_json_path) {
127 ET_LOG(Info, "Loading Tokenizer from json");
128 stats_.tokenizer_load_start_ms = time_in_ms();
129 std::ifstream fin(vocab_json_path);
130 auto update_map = [this](std::string& target, std::regex& re) {
131 std::smatch sm;
132 std::regex_search(target, sm, re);
133 // replace special character, please extend this if any cornor case found
134 std::string text = sm[1];
135 std::unordered_map<std::string, std::regex> post_process = {
136 {"\"", std::regex(R"(\\\")")},
137 {" ", std::regex(R"(</w>)")},
138 {"\\", std::regex(R"(\\\\)")}};
139 for (auto& p : post_process) {
140 text = std::regex_replace(text, p.second, p.first);
141 }
142 vocab_to_token_map_[text] = std::stoi(sm[2]);
143 };
144
145 if (fin.is_open()) {
146 std::string line, text;
147 while (getline(fin, line)) {
148 text += line;
149 }
150 fin.close();
151
152 std::regex re_anchor(R"(\d,\")");
153 std::regex re_pattern(R"(\{?\"(.*)\":([\d]+)\}?)");
154 auto begin = std::sregex_iterator(text.begin(), text.end(), re_anchor);
155 auto end = std::sregex_iterator();
156 size_t pos = 0;
157 for (std::sregex_iterator iter = begin; iter != end; ++iter) {
158 std::smatch match;
159 size_t len = iter->position() - pos + 1;
160 std::string target = text.substr(pos, len);
161 update_map(target, re_pattern);
162 pos = iter->position() + 1;
163 }
164 // process last vocabulary
165 std::string target = text.substr(pos);
166 update_map(target, re_pattern);
167 }
168 stats_.tokenizer_load_end_ms = time_in_ms();
169 return Error::Ok;
170 }
171
tokenize(std::string prompt)172 std::vector<int> Runner::tokenize(std::string prompt) {
173 std::string bos("<|startoftext|>"), eos("<|endoftext|>");
174 std::vector<std::string> vocabs;
175 vocabs.reserve(max_tokens_);
176 std::vector<int32_t> tokens(1, vocab_to_token_map_[bos]);
177
178 // pretokenize
179 // ref: https://github.com/monatis/clip.cpp
180 // https://huggingface.co/openai/clip-vit-base-patch32
181 std::string text;
182 std::regex re(
183 R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)");
184 std::smatch sm;
185 while (std::regex_search(prompt, sm, re)) {
186 for (auto& v : sm) {
187 vocabs.push_back(v);
188 }
189 prompt = sm.suffix();
190 }
191 for (std::string& v : vocabs) {
192 std::string word = (v[0] == ' ') ? v.substr(1) : v;
193 word += " ";
194 auto iter = vocab_to_token_map_.find(word);
195 if (iter != vocab_to_token_map_.end()) {
196 tokens.push_back(iter->second);
197 continue;
198 }
199 for (int i = 0; i < v.size(); ++i) {
200 for (int j = v.size() - 1; j >= i; --j) {
201 std::string token = v.substr(i, j - 1 + 1);
202 auto iter = vocab_to_token_map_.find(token);
203 if (iter != vocab_to_token_map_.end()) {
204 tokens.push_back(iter->second);
205 i = j + 1;
206 break;
207 } else if (j == i) {
208 ET_LOG(Error, "unknown token found: %s", token.c_str());
209 }
210 }
211 }
212 }
213 tokens.push_back(vocab_to_token_map_[eos]);
214 return tokens;
215 }
216
gen_latent_from_file()217 std::vector<float> Runner::gen_latent_from_file() {
218 std::vector<float> tensor_vector;
219 std::ifstream file("latents.raw", std::ios::binary);
220 if (!file.is_open()) {
221 ET_LOG(Error, "Error opening file!");
222 return tensor_vector;
223 }
224
225 // Read the tensor data
226 float value;
227 while (file.read(reinterpret_cast<char*>(&value), sizeof(float))) {
228 tensor_vector.push_back(value);
229 }
230 file.close();
231 return tensor_vector;
232 }
233
gen_random_latent(float sigma)234 std::vector<float> Runner::gen_random_latent(float sigma) {
235 std::random_device rnd_device;
236 std::mt19937 mersenne_engine{rnd_device()};
237 std::normal_distribution<float> dist{0.0f, 1.0f};
238
239 constexpr int latent_size = 1 * 64 * 64 * 4;
240 std::vector<float> random_vector(latent_size);
241
242 for (float& value : random_vector) {
243 value = dist(mersenne_engine) * sigma;
244 }
245 return random_vector;
246 }
247
get_time_steps()248 std::vector<float> Runner::get_time_steps() {
249 std::vector<float> time_steps(num_time_steps_);
250 for (int i = 0; i < num_time_steps_; ++i) {
251 time_steps[i] = (num_train_timesteps_ - 1) *
252 (1.0f - static_cast<float>(i) / (num_time_steps_ - 1));
253 }
254 return time_steps;
255 }
256
get_sigmas(const std::vector<float> & time_steps)257 std::vector<float> Runner::get_sigmas(const std::vector<float>& time_steps) {
258 float start = std::sqrt(beta_start_);
259 float end = std::sqrt(beta_end_);
260 std::vector<float> betas(num_train_timesteps_);
261 float step = (end - start) / (num_train_timesteps_ - 1);
262 for (int i = 0; i < num_train_timesteps_; ++i) {
263 float value = start + i * step;
264 betas[i] = 1 - (value * value);
265 }
266
267 std::vector<float> alphas_cumprod(num_train_timesteps_);
268 float cumprod = 1.0;
269 for (int i = 0; i < num_train_timesteps_; ++i) {
270 cumprod *= betas[i];
271 alphas_cumprod[i] = cumprod;
272 }
273
274 std::vector<float> sigmas(num_train_timesteps_);
275 for (int i = 0; i < num_train_timesteps_; ++i) {
276 sigmas[i] = std::sqrt((1.0 - alphas_cumprod[i]) / alphas_cumprod[i]);
277 }
278
279 std::vector<float> res(time_steps.size());
280 for (size_t i = 0; i < time_steps.size(); ++i) {
281 float index =
282 static_cast<float>(i) * (sigmas.size() - 1) / (time_steps.size() - 1);
283 size_t lower_index = static_cast<size_t>(std::floor(index));
284 size_t upper_index = static_cast<size_t>(std::ceil(index));
285
286 float weight = index - lower_index;
287 res[i] =
288 (1.0 - weight) * sigmas[lower_index] + weight * sigmas[upper_index];
289 }
290 std::reverse(res.begin(), res.end());
291 res.push_back(0);
292
293 return res;
294 }
295
scale_model_input(const std::vector<float> & latents,std::vector<float> & latent_model_input,float sigma)296 void Runner::scale_model_input(
297 const std::vector<float>& latents,
298 std::vector<float>& latent_model_input,
299 float sigma) {
300 for (int i = 0; i < latents.size(); i++) {
301 latent_model_input[i] = (latents[i] / std::sqrt(sigma * sigma + 1));
302 }
303 }
304
quant_tensor(const std::vector<float> & fp_vec,std::vector<uint16_t> & quant_vec,float scale,int offset)305 void Runner::quant_tensor(
306 const std::vector<float>& fp_vec,
307 std::vector<uint16_t>& quant_vec,
308 float scale,
309 int offset) {
310 offset = abs(offset);
311 for (int i = 0; i < fp_vec.size(); i++) {
312 quant_vec[i] = static_cast<uint16_t>((fp_vec[i] / scale) + offset);
313 }
314 }
315
dequant_tensor(const std::vector<uint16_t> & quant_vec,std::vector<float> & fp_vec,float scale,int offset)316 void Runner::dequant_tensor(
317 const std::vector<uint16_t>& quant_vec,
318 std::vector<float>& fp_vec,
319 float scale,
320 int offset) {
321 offset = abs(offset);
322 for (int i = 0; i < quant_vec.size(); i++) {
323 fp_vec[i] = (quant_vec[i] - offset) * scale;
324 }
325 }
326
327 // Using the same algorithm as EulerDiscreteScheduler in python.
step(const std::vector<float> & model_output,const std::vector<float> & sigmas,std::vector<float> & sample,std::vector<float> & prev_sample,int step_index)328 void Runner::step(
329 const std::vector<float>& model_output,
330 const std::vector<float>& sigmas,
331 std::vector<float>& sample,
332 std::vector<float>& prev_sample,
333 int step_index) {
334 float sigma = sigmas[step_index];
335 float dt = sigmas[step_index + 1] - sigma;
336
337 for (int i = 0; i < sample.size(); ++i) {
338 float sigma_hat = sample[i] - (sigma * model_output[i]);
339 prev_sample[i] = (sample[i] - sigma_hat) / sigma;
340 prev_sample[i] = sample[i] + (prev_sample[i] * dt);
341 }
342 sample = prev_sample;
343 }
344
generate(std::string prompt)345 Error Runner::generate(std::string prompt) {
346 ET_LOG(Info, "Start generating");
347 stats_.generate_start_ms = time_in_ms();
348
349 // Start tokenize
350 stats_.tokenizer_parsing_start_ms = time_in_ms();
351 std::vector<int32_t> cond_tokens = tokenize(prompt);
352 cond_tokens.resize(max_tokens_);
353 std::vector<int32_t> uncond_tokens = tokenize("");
354 uncond_tokens.resize(max_tokens_);
355 stats_.tokenizer_parsing_end_ms = time_in_ms();
356
357 std::vector<Result<MethodMeta>> method_metas = get_methods_meta();
358
359 MethodMeta encoder_method_meta = method_metas[0].get();
360 // Initialize text_encoder input tensors: cond/uncond tokenized_input[1,77]
361 auto cond_tokens_tensor = from_blob(
362 cond_tokens.data(),
363 {1, 77},
364 encoder_method_meta.input_tensor_meta(0)->scalar_type());
365 auto uncond_tokens_tensor = from_blob(
366 uncond_tokens.data(),
367 {1, 77},
368 encoder_method_meta.input_tensor_meta(0)->scalar_type());
369 // Initialize text_encoder output tensors: cond/uncond embedding[1, 77, 1024]
370 constexpr int emb_size = 1 * 77 * 1024;
371 std::vector<uint16_t> cond_emb_vec(emb_size);
372 std::vector<uint16_t> uncond_emb_vec(emb_size);
373 std::vector<float> fp_emb_vec(emb_size);
374 auto cond_emb_tensor = from_blob(
375 cond_emb_vec.data(),
376 {1, 77, 1024},
377 encoder_method_meta.output_tensor_meta(0)->scalar_type());
378 auto uncond_emb_tensor = from_blob(
379 uncond_emb_vec.data(),
380 {1, 77, 1024},
381 encoder_method_meta.output_tensor_meta(0)->scalar_type());
382 auto ret = modules_[0]->set_output(method_names_[0], cond_emb_tensor);
383 long encoder_start = time_in_ms();
384 auto cond_res = modules_[0]->execute(method_names_[0], cond_tokens_tensor);
385 stats_.text_encoder_execution_time += (time_in_ms() - encoder_start);
386 ret = modules_[0]->set_output(method_names_[0], uncond_emb_tensor);
387 encoder_start = time_in_ms();
388 auto uncond_res =
389 modules_[0]->execute(method_names_[0], uncond_tokens_tensor);
390 stats_.text_encoder_execution_time += (time_in_ms() - encoder_start);
391
392 // Initialize unet parameters
393 MethodMeta unet_method_meta = method_metas[1].get();
394 std::vector<float> time_steps = get_time_steps();
395 std::vector<float> sigmas = get_sigmas(time_steps);
396 float max_sigma = *std::max_element(sigmas.begin(), sigmas.end());
397 std::vector<float> latent;
398 if (fix_latents_) {
399 latent = gen_latent_from_file();
400 } else {
401 latent = gen_random_latent(max_sigma);
402 }
403 std::vector<float> prev_sample(latent.size());
404
405 // Initialize unet input tensors
406 // 1. latent[1,64,64,4]
407 // 2. time_embedding[1,1280]
408 // 3. cond/uncond embedding[1,77,1024]
409 std::vector<uint16_t> latent_model_input(latent.size());
410 std::vector<float> fp_latent_model_input(latent.size());
411 auto latent_tensor = from_blob(
412 latent_model_input.data(),
413 {1, 64, 64, 4},
414 unet_method_meta.input_tensor_meta(0)->scalar_type());
415 std::vector<TensorPtr> time_emb_tensors;
416 time_emb_tensors.reserve(num_time_steps_);
417 for (auto step_index = 0; step_index < num_time_steps_; step_index++) {
418 time_emb_tensors.emplace_back(from_blob(
419 time_emb_list_[step_index].data(),
420 {1, 1280},
421 unet_method_meta.input_tensor_meta(1)->scalar_type()));
422 }
423 // requantize text encoders output
424 dequant_tensor(
425 cond_emb_vec,
426 fp_emb_vec,
427 text_encoder_output_scale_,
428 text_encoder_output_offset_);
429 quant_tensor(
430 fp_emb_vec,
431 cond_emb_vec,
432 unet_input_text_emb_scale_,
433 unet_input_text_emb_offset_);
434 dequant_tensor(
435 uncond_emb_vec,
436 fp_emb_vec,
437 text_encoder_output_scale_,
438 text_encoder_output_offset_);
439 quant_tensor(
440 fp_emb_vec,
441 uncond_emb_vec,
442 unet_input_text_emb_scale_,
443 unet_input_text_emb_offset_);
444
445 // Initialize unet output tensors: text/uncond noise_pred[1,64,64,4]
446 std::vector<uint16_t> noise_pred_text(latent.size());
447 std::vector<uint16_t> noise_pred_uncond(latent.size());
448 std::vector<float> fp_noise_pred_text(noise_pred_text.size());
449 std::vector<float> fp_noise_pred_uncond(noise_pred_uncond.size());
450 auto noise_pred_text_tensor = from_blob(
451 noise_pred_text.data(),
452 {1, 64, 64, 4},
453 unet_method_meta.output_tensor_meta(0)->scalar_type());
454 auto noise_pred_uncond_tensor = from_blob(
455 noise_pred_uncond.data(),
456 {1, 64, 64, 4},
457 unet_method_meta.output_tensor_meta(0)->scalar_type());
458
459 // Execute unet
460 for (int step_index = 0; step_index < num_time_steps_; step_index++) {
461 long start_post_process = time_in_ms();
462 scale_model_input(latent, fp_latent_model_input, sigmas[step_index]);
463
464 quant_tensor(
465 fp_latent_model_input,
466 latent_model_input,
467 unet_input_latent_scale_,
468 unet_input_latent_offset_);
469
470 stats_.unet_aggregate_post_processing_time +=
471 (time_in_ms() - start_post_process);
472 ret = modules_[1]->set_output(method_names_[1], noise_pred_text_tensor);
473 long start_unet_execution = time_in_ms();
474 auto cond_res = modules_[1]->execute(
475 method_names_[1],
476 {latent_tensor, time_emb_tensors[step_index], cond_emb_tensor});
477 stats_.unet_aggregate_execution_time +=
478 (time_in_ms() - start_unet_execution);
479 ret = modules_[1]->set_output(method_names_[1], noise_pred_uncond_tensor);
480 start_unet_execution = time_in_ms();
481 auto uncond_res = modules_[1]->execute(
482 method_names_[1],
483 {latent_tensor,
484 time_emb_tensors[step_index],
485 uncond_emb_tensor}); // results in noise_pred_uncond_vec
486 stats_.unet_aggregate_execution_time +=
487 (time_in_ms() - start_unet_execution);
488
489 // start unet post processing
490 start_post_process = time_in_ms();
491
492 dequant_tensor(
493 noise_pred_text,
494 fp_noise_pred_text,
495 unet_output_scale_,
496 unet_output_offset_);
497 dequant_tensor(
498 noise_pred_uncond,
499 fp_noise_pred_uncond,
500 unet_output_scale_,
501 unet_output_offset_);
502
503 for (int i = 0; i < fp_noise_pred_text.size(); i++) {
504 fp_noise_pred_text[i] = fp_noise_pred_uncond[i] +
505 guidance_scale_ * (fp_noise_pred_text[i] - fp_noise_pred_uncond[i]);
506 }
507 step(fp_noise_pred_text, sigmas, latent, prev_sample, step_index);
508 stats_.unet_aggregate_post_processing_time +=
509 (time_in_ms() - start_post_process);
510 }
511
512 // Start VAE
513 MethodMeta vae_method_meta = method_metas[2].get();
514 // Initialize vae input tensor : latent[1,64,64,4]
515 std::vector<uint16_t> vae_input(latent.size());
516 auto vae_input_tensor = from_blob(
517 vae_input.data(),
518 {1, 64, 64, 4},
519 vae_method_meta.input_tensor_meta(0)->scalar_type());
520 // Intialize vae output tensor: output[1,512,512,3]
521 constexpr int image_size = 1 * 512 * 512 * 3;
522 std::vector<uint16_t> q_out(image_size);
523 std::vector<float> out(image_size);
524 auto output_tensor = from_blob(
525 q_out.data(),
526 {1, 512, 512, 3},
527 vae_method_meta.output_tensor_meta(0)->scalar_type());
528
529 quant_tensor(latent, vae_input, vae_input_scale_, vae_input_offset_);
530
531 ret = modules_[2]->set_output(method_names_[2], output_tensor);
532 long start_vae_execution = time_in_ms();
533 auto vae_res = modules_[2]->execute(method_names_[2], vae_input_tensor);
534 stats_.vae_execution_time = (time_in_ms() - start_vae_execution);
535 stats_.generate_end_ms = time_in_ms();
536
537 // Dequant uint16 output to fp32 output
538 dequant_tensor(q_out, out, vae_output_scale_, vae_output_offset_);
539
540 // Saving outputs
541 auto output_file_name = output_path_ + "/output_0_0.raw";
542 std::ofstream fout(output_file_name.c_str(), std::ios::binary);
543 fout.write(
544 reinterpret_cast<const char*>(out.data()), out.size() * sizeof(float));
545 fout.close();
546
547 return Error::Ok;
548 }
549
print_performance()550 Error Runner::print_performance() {
551 ET_LOG(Info, "\tTotal Number of steps:\t\t\t\t%d", num_time_steps_);
552
553 ET_LOG(
554 Info,
555 "\tTokenizer Load Time:\t\t\t\t%f (seconds)",
556 ((double)(stats_.tokenizer_load_end_ms - stats_.tokenizer_load_start_ms) /
557 stats_.SCALING_FACTOR_UNITS_PER_SECOND));
558
559 ET_LOG(
560 Info,
561 "\tModel Load Time:\t\t\t\t%f (seconds)",
562 ((double)(stats_.model_load_end_ms - stats_.model_load_start_ms) /
563 stats_.SCALING_FACTOR_UNITS_PER_SECOND));
564
565 ET_LOG(
566 Info,
567 "\tGenerate Time(Tokenize + Encoder + UNet + VAE):\t%f (seconds)",
568 ((double)(stats_.generate_end_ms - stats_.generate_start_ms) /
569 stats_.SCALING_FACTOR_UNITS_PER_SECOND));
570
571 ET_LOG(
572 Info,
573 "\tTokenize Time:\t\t\t\t\t%f (seconds)",
574 ((double)(stats_.tokenizer_parsing_end_ms -
575 stats_.tokenizer_parsing_start_ms) /
576 stats_.SCALING_FACTOR_UNITS_PER_SECOND));
577
578 ET_LOG(
579 Info,
580 "\tText Encoder Execution Time:\t\t\t%f (seconds)",
581 ((double)(stats_.text_encoder_execution_time) /
582 stats_.SCALING_FACTOR_UNITS_PER_SECOND));
583
584 ET_LOG(
585 Info,
586 "\tUnet Aggregate (Cond + Uncond) Execution Time:\t%f (seconds)",
587 ((double)stats_.unet_aggregate_execution_time /
588 (stats_.SCALING_FACTOR_UNITS_PER_SECOND)));
589
590 ET_LOG(
591 Info,
592 "\tUnet Average Execution Time:\t\t\t%f (seconds)",
593 ((double)(stats_.unet_aggregate_execution_time / (num_time_steps_ * 2)) /
594 (stats_.SCALING_FACTOR_UNITS_PER_SECOND)));
595
596 ET_LOG(
597 Info,
598 "\tUnet Aggregate Post-Processing Time:\t\t%f (seconds)",
599 ((double)(stats_.unet_aggregate_post_processing_time) /
600 stats_.SCALING_FACTOR_UNITS_PER_SECOND));
601
602 ET_LOG(
603 Info,
604 "\tUnet Average Post-Processing Time:\t\t%f (seconds)",
605 ((double)(stats_.unet_aggregate_post_processing_time /
606 (num_time_steps_ * 2)) /
607 (stats_.SCALING_FACTOR_UNITS_PER_SECOND)));
608
609 ET_LOG(
610 Info,
611 "\tVAE Execution Time:\t\t\t\t%f (seconds)",
612 ((double)(stats_.vae_execution_time) /
613 stats_.SCALING_FACTOR_UNITS_PER_SECOND));
614 return Error::Ok;
615 }
616
617 } // namespace example
618