1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 9 // Contains values that are used by the mtk_llama_runner.cpp 10 11 #pragma once 12 13 namespace mtk::vars { 14 using example::llm_helper::LLMType; 15 16 // Sizes 17 const size_t PROMPT_TOKEN_BATCH_SIZE = 128; 18 const size_t CACHE_SIZE = 512; 19 const size_t HIDDEN_SIZE = 4096; 20 const size_t NUM_HEAD = 32; 21 const size_t NUM_LAYER = 32; 22 const size_t MAX_TOKEN_LENGTH = 8192; 23 const double ROT_EMB_BASE = 500000; 24 25 // Types 26 const LLMType MODEL_INPUT_TYPE = LLMType::FP32; 27 const LLMType MODEL_OUTPUT_TYPE = LLMType::FP32; 28 const LLMType CACHE_TYPE = LLMType::FP32; 29 const LLMType MASK_TYPE = LLMType::FP32; 30 const LLMType ROT_EMB_TYPE = LLMType::FP32; 31 32 // Paths 33 const std::string TOKENIZER_PATH = 34 "/data/local/tmp/et-mtk/llama3/tokenizer.model"; 35 const std::string TOKEN_EMBEDDING_PATH = 36 "/data/local/tmp/et-mtk/llama3/embedding_llama3-8B-instruct_fp32.bin"; 37 38 // Comma-Separated Paths 39 const std::string PROMPT_MODEL_PATHS = 40 "/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_128t512c_3.pte,"; 41 42 // Comma-Separated Paths 43 const std::string GEN_MODEL_PATHS = 44 "/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_0.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_1.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_2.pte,/data/local/tmp/et-mtk/llama3/llama3-8B-instruct_A16W4_4_chunks_1t512c_3.pte,"; 45 46 } // namespace mtk::vars 47