1#!/bin/bash 2# Copyright (c) Meta Platforms, Inc. and affiliates. 3# All rights reserved. 4# 5# This source code is licensed under the BSD-style license found in the 6# LICENSE file in the root directory of this source tree. 7 8set -exu 9# shellcheck source=/dev/null 10source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" 11 12while [[ $# -gt 0 ]]; do 13 case "$1" in 14 -model) 15 MODEL_NAME="$2" # stories110M 16 shift 2 17 ;; 18 -build_tool) 19 BUILD_TOOL="$2" # buck2 or cmake 20 shift 2 21 ;; 22 -dtype) 23 DTYPE="$2" # fp16, bf16, or fp32 24 shift 2 25 ;; 26 -mode) 27 MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe 28 shift 2 29 ;; 30 -upload) 31 UPLOAD_DIR="$2" 32 shift 2 33 ;; 34 *) 35 echo "Unknown option: $1" 36 usage 37 ;; 38 esac 39done 40 41# Default mode to xnnpack+custom if not set 42MODE=${MODE:-"xnnpack+custom"} 43 44# Default UPLOAD_DIR to empty string if not set 45UPLOAD_DIR="${UPLOAD_DIR:-}" 46 47if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args 48 echo "Expecting atleast 4 positional arguments" 49 echo "Usage: [...]" 50fi 51if [[ -z "${MODEL_NAME:-}" ]]; then 52 echo "Missing model name, exiting..." 53 exit 1 54fi 55 56if [[ -z "${BUILD_TOOL:-}" ]]; then 57 echo "Missing build tool (require buck2 or cmake), exiting..." 58 exit 59fi 60 61if [[ -z "${DTYPE:-}" ]]; then 62 echo "Missing dtype, choose fp16, bf16, or fp32, exiting..." 63 exit 1 64fi 65 66if [[ -z "${MODE:-}" ]]; then 67 echo "Missing mode, choose portable or xnnpack, exiting..." 68 exit 1 69fi 70 71if [[ "${MODE}" =~ .*xnnpack.* ]]; then 72 XNNPACK=ON 73else 74 XNNPACK=OFF 75fi 76 77if [[ "${MODE}" =~ .*custom.* ]]; then 78 CUSTOM=ON 79else 80 CUSTOM=OFF 81fi 82 83if [[ "${MODE}" =~ .*qe.* ]]; then 84 QE=ON 85else 86 QE=OFF 87fi 88 89if [[ "${MODE}" =~ .*mps.* ]]; then 90 MPS=ON 91else 92 MPS=OFF 93fi 94 95echo "MPS option ${MPS}" 96 97if [[ "${MODE}" =~ .*coreml.* ]]; then 98 COREML=ON 99else 100 COREML=OFF 101fi 102 103echo "COREML option ${COREML}" 104 105if [[ "${MODE}" =~ .*qnn.* ]]; then 106 QNN=ON 107 export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)" 108 export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 109 export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang" 110 export PYTHONPATH=".." 111 cp schema/program.fbs exir/_serialize/program.fbs 112 cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs 113 cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python 114 cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python 115 116else 117 QNN=OFF 118 QNN_SDK_ROOT="" 119fi 120 121echo "QNN option ${QNN}" 122echo "QNN_SDK_ROOT: ${QNN_SDK_ROOT}" 123 124if [[ -z "${BUCK:-}" ]]; then 125 BUCK=buck2 126fi 127 128if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then 129 PYTHON_EXECUTABLE=python3 130fi 131 132which "${PYTHON_EXECUTABLE}" 133 134cmake_install_executorch_libraries() { 135 echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a" 136 rm -rf cmake-out 137 retry cmake \ 138 -DCMAKE_INSTALL_PREFIX=cmake-out \ 139 -DCMAKE_BUILD_TYPE=Debug \ 140 -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ 141 -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ 142 -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ 143 -DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \ 144 -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ 145 -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ 146 -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \ 147 -DEXECUTORCH_BUILD_MPS="$MPS" \ 148 -DEXECUTORCH_BUILD_COREML="$COREML" \ 149 -DEXECUTORCH_BUILD_QNN="$QNN" \ 150 -DQNN_SDK_ROOT="$QNN_SDK_ROOT" \ 151 -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ 152 -Bcmake-out . 153 cmake --build cmake-out -j9 --target install --config Debug 154} 155 156cmake_build_llama_runner() { 157 echo "Building llama runner" 158 dir="examples/models/llama" 159 retry cmake \ 160 -DCMAKE_INSTALL_PREFIX=cmake-out \ 161 -DCMAKE_BUILD_TYPE=Debug \ 162 -DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \ 163 -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ 164 -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \ 165 -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ 166 -Bcmake-out/${dir} \ 167 ${dir} 168 cmake --build cmake-out/${dir} -j9 --config Debug 169 170} 171 172cleanup_files() { 173 echo "Deleting downloaded and generated files" 174 rm "${CHECKPOINT_FILE_NAME}" 175 rm tokenizer.model 176 rm tokenizer.bin 177 rm "${EXPORTED_MODEL_NAME}" 178 rm result.txt 179 rm params.json 180} 181 182prepare_artifacts_upload() { 183 if [ -n "${UPLOAD_DIR}" ]; then 184 echo "Preparing for uploading generated artifacs" 185 zip -j model.zip "${EXPORTED_MODEL_NAME}" tokenizer.bin 186 mkdir -p "${UPLOAD_DIR}" 187 mv model.zip "${UPLOAD_DIR}" 188 fi 189} 190 191# Download and create artifacts. 192PARAMS="params.json" 193CHECKPOINT_FILE_NAME="" 194touch "${PARAMS}" 195if [[ "${MODEL_NAME}" == "stories110M" ]]; then 196 CHECKPOINT_FILE_NAME="stories110M.pt" 197 download_stories_model_artifacts 198else 199 echo "Unsupported model name ${MODEL_NAME}" 200 exit 1 201fi 202 203# Check dtype. 204EXPORTED_MODEL_NAME="tinyllama_${MODE}_${DTYPE}" 205if [[ "${DTYPE}" == "fp16" ]]; then 206 EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}_h" 207elif [[ "${DTYPE}" == "bf16" ]]; then 208 EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}_bf" 209elif [[ "${DTYPE}" == "fp32" ]]; then 210 : 211else 212 echo "Unsupported dtype ${DTYPE}" 213 exit 1 214fi 215 216# Export model. 217EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte" 218echo "Exporting ${EXPORTED_MODEL_NAME}" 219EXPORT_ARGS="-c ${CHECKPOINT_FILE_NAME} -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv" 220if [[ "${XNNPACK}" == "ON" ]]; then 221 EXPORT_ARGS="${EXPORT_ARGS} -X --xnnpack-extended-ops -qmode 8da4w -G 128" 222fi 223if [[ "${CUSTOM}" == "ON" ]]; then 224 EXPORT_ARGS="${EXPORT_ARGS} --use_sdpa_with_kv_cache" 225fi 226if [[ "${QE}" == "ON" ]]; then 227 EXPORT_ARGS="${EXPORT_ARGS} --embedding-quantize 8,1024" 228fi 229if [[ "${MPS}" == "ON" ]]; then 230 EXPORT_ARGS="${EXPORT_ARGS} -kv -v --mps --disable_dynamic_shape" 231fi 232if [[ "${COREML}" == "ON" ]]; then 233 EXPORT_ARGS="${EXPORT_ARGS} -kv -v --coreml --disable_dynamic_shape" 234fi 235if [[ "${QNN}" == "ON" ]]; then 236 EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape" 237fi 238# Add dynamically linked library location 239$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS} 240 241# Create tokenizer.bin. 242echo "Creating tokenizer.bin" 243$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin 244 245 246RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10 --warmup=1" 247# Check build tool. 248echo "Running ${EXPORTED_MODEL_NAME} in portable mode" 249if [[ "${BUILD_TOOL}" == "buck2" ]]; then 250 # Run model. 251 # shellcheck source=/dev/null 252 $BUCK run examples/models/llama:main -- ${RUNTIME_ARGS} > result.txt 253elif [[ "${BUILD_TOOL}" == "cmake" ]]; then 254 cmake_install_executorch_libraries 255 cmake_build_llama_runner 256 # Run llama runner 257 NOW=$(date +"%H:%M:%S") 258 echo "Starting to run llama runner at ${NOW}" 259 # shellcheck source=/dev/null 260 cmake-out/examples/models/llama/llama_main ${RUNTIME_ARGS} > result.txt 261 NOW=$(date +"%H:%M:%S") 262 echo "Finished at ${NOW}" 263else 264 echo "Invalid build tool ${BUILD_TOOL}. Only buck2 is supported atm" 265 exit 1 266fi 267RESULT=$(cat result.txt) 268# Check results. 269EXPECTED_PREFIX="Once upon a time," 270# Expected result - may take too long to generate: 271# "Once upon a time, there was a little girl named Lily. She loved to play outside" ... 272if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then 273 echo "Expected result prefix: ${EXPECTED_PREFIX}" 274 echo "Actual result: ${RESULT}" 275 echo "Success" 276 277 prepare_artifacts_upload 278 cleanup_files 279else 280 echo "Expected result prefix: ${EXPECTED_PREFIX}" 281 echo "Actual result: ${RESULT}" 282 echo "Failure; results not the same" 283 284 cleanup_files 285 exit 1 286fi 287