1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Contains values that are used by the mtk_llama_runner.cpp
10 
11 #pragma once
12 
13 namespace mtk::vars {
14 using example::llm_helper::LLMType;
15 
16 // Sizes
17 const size_t PROMPT_TOKEN_BATCH_SIZE = 128;
18 const size_t CACHE_SIZE = 512;
19 const size_t HIDDEN_SIZE = 4096;
20 const size_t NUM_HEAD = 32;
21 const size_t NUM_LAYER = 32;
22 const size_t MAX_TOKEN_LENGTH = 8192;
23 const double ROT_EMB_BASE = 500000;
24 
25 // Types
26 const LLMType MODEL_INPUT_TYPE = LLMType::FP32;
27 const LLMType MODEL_OUTPUT_TYPE = LLMType::FP32;
28 const LLMType CACHE_TYPE = LLMType::FP32;
29 const LLMType MASK_TYPE = LLMType::FP32;
30 const LLMType ROT_EMB_TYPE = LLMType::FP32;
31 
32 // Paths
33 const std::string TOKENIZER_PATH =
34     "/data/local/tmp/et-mtk/llama3/tokenizer.model";
35 const std::string TOKEN_EMBEDDING_PATH =
36     "/data/local/tmp/et-mtk/llama3/embedding_llama3-8B-instruct_fp32.bin";
37 
38 // Comma-Separated Paths
39 const std::string PROMPT_MODEL_PATHS =
40     "/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_3.pte,";
41 
42 // Comma-Separated Paths
43 const std::string GEN_MODEL_PATHS =
44     "/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_3.pte,";
45 
46 } // namespace mtk::vars
47