1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 9 // A simple phi-3-mini runner that includes preprocessing and post processing 10 // logic. The module takes in a string as input and emits a string as output. 11 12 #pragma once 13 14 #include <memory> 15 #include <string> 16 17 #include <executorch/extension/llm/sampler/sampler.h> 18 #include <executorch/extension/llm/tokenizer/tokenizer.h> 19 #include <executorch/extension/module/module.h> 20 #include <executorch/runtime/core/exec_aten/exec_aten.h> 21 22 namespace example { 23 24 class Runner { 25 public: 26 explicit Runner( 27 const std::string& model_path, 28 const std::string& tokenizer_path, 29 const float temperature = 0.8f); 30 31 /** 32 * Generates response for a given prompt. 33 * 34 * @param[in] prompt The prompt to generate a response for. 35 * @param[in] max_seq_len The maximum length of the sequence to generate, 36 * including prompt. 37 */ 38 void generate(const std::string& prompt, std::size_t max_seq_len); 39 40 private: 41 uint64_t logits_to_token(const exec_aten::Tensor& logits_tensor); 42 uint64_t prefill(std::vector<uint64_t>& tokens); 43 uint64_t run_model_step(uint64_t token); 44 45 std::unique_ptr<executorch::extension::Module> module_; 46 std::unique_ptr<executorch::extension::llm::Tokenizer> tokenizer_; 47 std::unique_ptr<executorch::extension::llm::Sampler> sampler_; 48 }; 49 50 } // namespace example 51