1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 9 // Given a text prompt, encode it using tokenizer and prefill the KV cache of a 10 // LLM. 11 12 #pragma once 13 14 #include <executorch/extension/llm/runner/text_decoder_runner.h> 15 #include <executorch/extension/llm/tokenizer/tokenizer.h> 16 #include <functional> 17 18 namespace executorch { 19 namespace extension { 20 namespace llm { 21 22 class ET_EXPERIMENTAL TextPrefiller { 23 public: 24 TextPrefiller( 25 TextDecoderRunner* text_decoder_runner, 26 bool use_kv_cache_, 27 bool enable_parallel_prefill); 28 /** 29 * Prefill an LLM Module with the given text input. 30 * @param prompt_tokens The text prompt tokens to the LLM Module. Encoded by 31 * tokenizer. 32 * @param start_pos The starting position in KV cache of the input in the LLM 33 * Module. 34 * @return The next token of the LLM Module after prefill. 35 */ 36 ::executorch::runtime::Result<uint64_t> prefill( 37 std::vector<uint64_t>& prompt_tokens, 38 int64_t& start_pos); 39 40 private: 41 TextDecoderRunner* text_decoder_runner_; 42 bool use_kv_cache_; 43 bool enable_parallel_prefill_; 44 }; 45 46 } // namespace llm 47 } // namespace extension 48 } // namespace executorch 49 50 namespace torch { 51 namespace executor { 52 // TODO(T197294990): Remove these deprecated aliases once all users have moved 53 // to the new `::executorch` namespaces. 54 using ::executorch::extension::llm::TextPrefiller; 55 } // namespace executor 56 } // namespace torch 57