xref: /aosp_15_r20/external/executorch/examples/models/phi-3-mini/runner.h (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // A simple phi-3-mini runner that includes preprocessing and post processing
10 // logic. The module takes in a string as input and emits a string as output.
11 
12 #pragma once
13 
14 #include <memory>
15 #include <string>
16 
17 #include <executorch/extension/llm/sampler/sampler.h>
18 #include <executorch/extension/llm/tokenizer/tokenizer.h>
19 #include <executorch/extension/module/module.h>
20 #include <executorch/runtime/core/exec_aten/exec_aten.h>
21 
22 namespace example {
23 
24 class Runner {
25  public:
26   explicit Runner(
27       const std::string& model_path,
28       const std::string& tokenizer_path,
29       const float temperature = 0.8f);
30 
31   /**
32    * Generates response for a given prompt.
33    *
34    * @param[in] prompt The prompt to generate a response for.
35    * @param[in] max_seq_len The maximum length of the sequence to generate,
36    * including prompt.
37    */
38   void generate(const std::string& prompt, std::size_t max_seq_len);
39 
40  private:
41   uint64_t logits_to_token(const exec_aten::Tensor& logits_tensor);
42   uint64_t prefill(std::vector<uint64_t>& tokens);
43   uint64_t run_model_step(uint64_t token);
44 
45   std::unique_ptr<executorch::extension::Module> module_;
46   std::unique_ptr<executorch::extension::llm::Tokenizer> tokenizer_;
47   std::unique_ptr<executorch::extension::llm::Sampler> sampler_;
48 };
49 
50 } // namespace example
51