xref: /aosp_15_r20/external/executorch/extension/llm/runner/text_prefiller.h (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Given a text prompt, encode it using tokenizer and prefill the KV cache of a
10 // LLM.
11 
12 #pragma once
13 
14 #include <executorch/extension/llm/runner/text_decoder_runner.h>
15 #include <executorch/extension/llm/tokenizer/tokenizer.h>
16 #include <functional>
17 
18 namespace executorch {
19 namespace extension {
20 namespace llm {
21 
22 class ET_EXPERIMENTAL TextPrefiller {
23  public:
24   TextPrefiller(
25       TextDecoderRunner* text_decoder_runner,
26       bool use_kv_cache_,
27       bool enable_parallel_prefill);
28   /**
29    * Prefill an LLM Module with the given text input.
30    * @param prompt_tokens The text prompt tokens to the LLM Module. Encoded by
31    * tokenizer.
32    * @param start_pos The starting position in KV cache of the input in the LLM
33    * Module.
34    * @return The next token of the LLM Module after prefill.
35    */
36   ::executorch::runtime::Result<uint64_t> prefill(
37       std::vector<uint64_t>& prompt_tokens,
38       int64_t& start_pos);
39 
40  private:
41   TextDecoderRunner* text_decoder_runner_;
42   bool use_kv_cache_;
43   bool enable_parallel_prefill_;
44 };
45 
46 } // namespace llm
47 } // namespace extension
48 } // namespace executorch
49 
50 namespace torch {
51 namespace executor {
52 // TODO(T197294990): Remove these deprecated aliases once all users have moved
53 // to the new `::executorch` namespaces.
54 using ::executorch::extension::llm::TextPrefiller;
55 } // namespace executor
56 } // namespace torch
57