xref: /aosp_15_r20/external/executorch/.ci/scripts/test_llama.sh (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1#!/bin/bash
2# Copyright (c) Meta Platforms, Inc. and affiliates.
3# All rights reserved.
4#
5# This source code is licensed under the BSD-style license found in the
6# LICENSE file in the root directory of this source tree.
7
8set -exu
9# shellcheck source=/dev/null
10source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11
12while [[ $# -gt 0 ]]; do
13  case "$1" in
14    -model)
15      MODEL_NAME="$2" # stories110M
16      shift 2
17      ;;
18    -build_tool)
19      BUILD_TOOL="$2" # buck2 or cmake
20      shift 2
21      ;;
22    -dtype)
23      DTYPE="$2" # fp16, bf16, or fp32
24      shift 2
25      ;;
26    -mode)
27      MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
28      shift 2
29      ;;
30    -upload)
31      UPLOAD_DIR="$2"
32      shift 2
33      ;;
34    *)
35      echo "Unknown option: $1"
36      usage
37      ;;
38  esac
39done
40
41# Default mode to xnnpack+custom if not set
42MODE=${MODE:-"xnnpack+custom"}
43
44# Default UPLOAD_DIR to empty string if not set
45UPLOAD_DIR="${UPLOAD_DIR:-}"
46
47if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
48    echo "Expecting atleast 4 positional arguments"
49    echo "Usage: [...]"
50fi
51if [[ -z "${MODEL_NAME:-}" ]]; then
52  echo "Missing model name, exiting..."
53  exit 1
54fi
55
56if [[ -z "${BUILD_TOOL:-}" ]]; then
57  echo "Missing build tool (require buck2 or cmake), exiting..."
58  exit
59fi
60
61if [[ -z "${DTYPE:-}" ]]; then
62  echo "Missing dtype, choose fp16, bf16, or fp32, exiting..."
63  exit 1
64fi
65
66if [[ -z "${MODE:-}" ]]; then
67  echo "Missing mode, choose portable or xnnpack, exiting..."
68  exit 1
69fi
70
71if [[ "${MODE}" =~ .*xnnpack.* ]]; then
72  XNNPACK=ON
73else
74  XNNPACK=OFF
75fi
76
77if [[ "${MODE}" =~ .*custom.* ]]; then
78  CUSTOM=ON
79else
80  CUSTOM=OFF
81fi
82
83if [[ "${MODE}" =~ .*qe.* ]]; then
84  QE=ON
85else
86  QE=OFF
87fi
88
89if [[ "${MODE}" =~ .*mps.* ]]; then
90  MPS=ON
91else
92  MPS=OFF
93fi
94
95echo "MPS option ${MPS}"
96
97if [[ "${MODE}" =~ .*coreml.* ]]; then
98  COREML=ON
99else
100  COREML=OFF
101fi
102
103echo "COREML option ${COREML}"
104
105if [[ "${MODE}" =~ .*qnn.* ]]; then
106  QNN=ON
107  export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
108  export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
109  export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
110  export PYTHONPATH=".."
111  cp schema/program.fbs exir/_serialize/program.fbs
112  cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
113  cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
114  cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
115
116else
117  QNN=OFF
118  QNN_SDK_ROOT=""
119fi
120
121echo "QNN option ${QNN}"
122echo "QNN_SDK_ROOT: ${QNN_SDK_ROOT}"
123
124if [[ -z "${BUCK:-}" ]]; then
125  BUCK=buck2
126fi
127
128if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
129  PYTHON_EXECUTABLE=python3
130fi
131
132which "${PYTHON_EXECUTABLE}"
133
134cmake_install_executorch_libraries() {
135    echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
136    rm -rf cmake-out
137    retry cmake \
138        -DCMAKE_INSTALL_PREFIX=cmake-out \
139        -DCMAKE_BUILD_TYPE=Debug \
140        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
141        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
142        -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
143        -DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
144        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
145        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
146        -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
147        -DEXECUTORCH_BUILD_MPS="$MPS" \
148        -DEXECUTORCH_BUILD_COREML="$COREML" \
149        -DEXECUTORCH_BUILD_QNN="$QNN" \
150        -DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
151        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
152        -Bcmake-out .
153    cmake --build cmake-out -j9 --target install --config Debug
154}
155
156cmake_build_llama_runner() {
157    echo "Building llama runner"
158    dir="examples/models/llama"
159    retry cmake \
160        -DCMAKE_INSTALL_PREFIX=cmake-out \
161        -DCMAKE_BUILD_TYPE=Debug \
162        -DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
163        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
164        -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
165        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
166        -Bcmake-out/${dir} \
167        ${dir}
168    cmake --build cmake-out/${dir} -j9 --config Debug
169
170}
171
172cleanup_files() {
173  echo "Deleting downloaded and generated files"
174  rm "${CHECKPOINT_FILE_NAME}"
175  rm tokenizer.model
176  rm tokenizer.bin
177  rm "${EXPORTED_MODEL_NAME}"
178  rm result.txt
179  rm params.json
180}
181
182prepare_artifacts_upload() {
183  if [ -n "${UPLOAD_DIR}" ]; then
184    echo "Preparing for uploading generated artifacs"
185    zip -j model.zip "${EXPORTED_MODEL_NAME}" tokenizer.bin
186    mkdir -p "${UPLOAD_DIR}"
187    mv model.zip "${UPLOAD_DIR}"
188  fi
189}
190
191# Download and create artifacts.
192PARAMS="params.json"
193CHECKPOINT_FILE_NAME=""
194touch "${PARAMS}"
195if [[ "${MODEL_NAME}" == "stories110M" ]]; then
196  CHECKPOINT_FILE_NAME="stories110M.pt"
197  download_stories_model_artifacts
198else
199  echo "Unsupported model name ${MODEL_NAME}"
200  exit 1
201fi
202
203# Check dtype.
204EXPORTED_MODEL_NAME="tinyllama_${MODE}_${DTYPE}"
205if [[ "${DTYPE}" == "fp16" ]]; then
206  EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}_h"
207elif [[ "${DTYPE}" == "bf16" ]]; then
208  EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}_bf"
209elif [[ "${DTYPE}" == "fp32" ]]; then
210  :
211else
212  echo "Unsupported dtype ${DTYPE}"
213  exit 1
214fi
215
216# Export model.
217EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
218echo "Exporting ${EXPORTED_MODEL_NAME}"
219EXPORT_ARGS="-c ${CHECKPOINT_FILE_NAME} -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
220if [[ "${XNNPACK}" == "ON" ]]; then
221  EXPORT_ARGS="${EXPORT_ARGS} -X --xnnpack-extended-ops -qmode 8da4w -G 128"
222fi
223if [[ "${CUSTOM}" == "ON" ]]; then
224  EXPORT_ARGS="${EXPORT_ARGS} --use_sdpa_with_kv_cache"
225fi
226if [[ "${QE}" == "ON" ]]; then
227  EXPORT_ARGS="${EXPORT_ARGS} --embedding-quantize 8,1024"
228fi
229if [[ "${MPS}" == "ON" ]]; then
230  EXPORT_ARGS="${EXPORT_ARGS} -kv -v --mps --disable_dynamic_shape"
231fi
232if [[ "${COREML}" == "ON" ]]; then
233  EXPORT_ARGS="${EXPORT_ARGS} -kv -v --coreml --disable_dynamic_shape"
234fi
235if [[ "${QNN}" == "ON" ]]; then
236  EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
237fi
238# Add dynamically linked library location
239$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
240
241# Create tokenizer.bin.
242echo "Creating tokenizer.bin"
243$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
244
245
246RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10 --warmup=1"
247# Check build tool.
248echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
249if [[ "${BUILD_TOOL}" == "buck2" ]]; then
250  # Run model.
251  # shellcheck source=/dev/null
252  $BUCK run examples/models/llama:main -- ${RUNTIME_ARGS} > result.txt
253elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
254  cmake_install_executorch_libraries
255  cmake_build_llama_runner
256  # Run llama runner
257  NOW=$(date +"%H:%M:%S")
258  echo "Starting to run llama runner at ${NOW}"
259  # shellcheck source=/dev/null
260  cmake-out/examples/models/llama/llama_main ${RUNTIME_ARGS} > result.txt
261  NOW=$(date +"%H:%M:%S")
262  echo "Finished at ${NOW}"
263else
264  echo "Invalid build tool ${BUILD_TOOL}. Only buck2 is supported atm"
265  exit 1
266fi
267RESULT=$(cat result.txt)
268# Check results.
269EXPECTED_PREFIX="Once upon a time,"
270# Expected result - may take too long to generate:
271# "Once upon a time, there was a little girl named Lily. She loved to play outside" ...
272if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
273  echo "Expected result prefix: ${EXPECTED_PREFIX}"
274  echo "Actual result: ${RESULT}"
275  echo "Success"
276
277  prepare_artifacts_upload
278  cleanup_files
279else
280  echo "Expected result prefix: ${EXPECTED_PREFIX}"
281  echo "Actual result: ${RESULT}"
282  echo "Failure; results not the same"
283
284  cleanup_files
285  exit 1
286fi
287