xref: /aosp_15_r20/external/executorch/extension/llm/runner/irunner.h (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // An interface for LLM runners. Developers can create their own runner that
10 // implements their own load and generation logic to run the model.
11 
12 #pragma once
13 
14 #include <functional>
15 #include <string>
16 
17 #include <executorch/extension/llm/runner/stats.h>
18 #include <executorch/extension/module/module.h>
19 
20 namespace executorch {
21 namespace extension {
22 namespace llm {
23 
24 class ET_EXPERIMENTAL IRunner {
25  public:
26   virtual ~IRunner() = default;
27 
28   // Checks if the model is loaded.
29   virtual bool is_loaded() const = 0;
30 
31   // Load the model and tokenizer.
32   virtual ::executorch::runtime::Error load() = 0;
33 
34   // Generate the output tokens.
35   virtual ::executorch::runtime::Error generate(
36       const std::string& prompt,
37       int32_t seq_len,
38       std::function<void(const std::string&)> token_callback = {},
39       std::function<void(const ::executorch::extension::llm::Stats&)>
40           stats_callback = {},
41       bool echo = true,
42       bool warming = false) = 0;
43 
44   // Stop the generation.
45   virtual void stop() = 0;
46 };
47 
48 } // namespace llm
49 } // namespace extension
50 } // namespace executorch
51