xref: /aosp_15_r20/external/pytorch/.ci/pytorch/common_utils.sh (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1#!/bin/bash
2
3# Common util **functions** that can be sourced in other scripts.
4
5# note: printf is used instead of echo to avoid backslash
6# processing and to properly handle values that begin with a '-'.
7
8log() { printf '%s\n' "$*"; }
9error() { log "ERROR: $*" >&2; }
10fatal() { error "$@"; exit 1; }
11
12retry () {
13    "$@" || (sleep 10 && "$@") || (sleep 20 && "$@") || (sleep 40 && "$@")
14}
15
16# compositional trap taken from https://stackoverflow.com/a/7287873/23845
17# appends a command to a trap
18#
19# - 1st arg:  code to add
20# - remaining args:  names of traps to modify
21#
22trap_add() {
23    trap_add_cmd=$1; shift || fatal "${FUNCNAME[0]} usage error"
24    for trap_add_name in "$@"; do
25        trap -- "$(
26            # helper fn to get existing trap command from output
27            # of trap -p
28            extract_trap_cmd() { printf '%s\n' "$3"; }
29            # print existing trap command with newline
30            eval "extract_trap_cmd $(trap -p "${trap_add_name}")"
31            # print the new trap command
32            printf '%s\n' "${trap_add_cmd}"
33        )" "${trap_add_name}" \
34            || fatal "unable to add to trap ${trap_add_name}"
35    done
36}
37# set the trace attribute for the above function.  this is
38# required to modify DEBUG or RETURN traps because functions don't
39# inherit them unless the trace attribute is set
40declare -f -t trap_add
41
42function assert_git_not_dirty() {
43    # TODO: we should add an option to `build_amd.py` that reverts the repo to
44    #       an unmodified state.
45    if [[ "$BUILD_ENVIRONMENT" != *rocm* ]] && [[ "$BUILD_ENVIRONMENT" != *xla* ]] ; then
46        git_status=$(git status --porcelain | grep -v '?? third_party' || true)
47        if [[ $git_status ]]; then
48            echo "Build left local git repository checkout dirty"
49            echo "git status --porcelain:"
50            echo "${git_status}"
51            exit 1
52        fi
53    fi
54}
55
56function pip_install_whl() {
57  # This is used to install PyTorch and other build artifacts wheel locally
58  # without using any network connection
59  python3 -mpip install --no-index --no-deps "$@"
60}
61
62function pip_install() {
63  # retry 3 times
64  # old versions of pip don't have the "--progress-bar" flag
65  pip install --progress-bar off "$@" || pip install --progress-bar off "$@" || pip install --progress-bar off "$@" ||\
66  pip install "$@" || pip install "$@" || pip install "$@"
67}
68
69function pip_uninstall() {
70  # uninstall 2 times
71  pip uninstall -y "$@" || pip uninstall -y "$@"
72}
73
74function get_exit_code() {
75  set +e
76  "$@"
77  retcode=$?
78  set -e
79  return $retcode
80}
81
82function get_bazel() {
83  # Download and use the cross-platform, dependency-free Python
84  # version of Bazelisk to fetch the platform specific version of
85  # Bazel to use from .bazelversion.
86  retry curl --location --output tools/bazel \
87    https://raw.githubusercontent.com/bazelbuild/bazelisk/v1.16.0/bazelisk.py
88  shasum --algorithm=1 --check \
89    <(echo 'd4369c3d293814d3188019c9f7527a948972d9f8  tools/bazel')
90  chmod u+x tools/bazel
91}
92
93# This function is bazel specific because of the bug
94# in the bazel that requires some special paths massaging
95# as a workaround. See
96# https://github.com/bazelbuild/bazel/issues/10167
97function install_sccache_nvcc_for_bazel() {
98  sudo mv /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc-real
99
100  # Write the `/usr/local/cuda/bin/nvcc`
101  cat << EOF | sudo tee /usr/local/cuda/bin/nvcc
102#!/bin/sh
103if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
104  exec sccache /usr/local/cuda/bin/nvcc "\$@"
105else
106  exec external/local_cuda/cuda/bin/nvcc-real "\$@"
107fi
108EOF
109
110  sudo chmod +x /usr/local/cuda/bin/nvcc
111}
112
113function install_monkeytype {
114  # Install MonkeyType
115  pip_install MonkeyType
116}
117
118
119function get_pinned_commit() {
120  cat .github/ci_commit_pins/"${1}".txt
121}
122
123function install_torchaudio() {
124  local commit
125  commit=$(get_pinned_commit audio)
126  if [[ "$1" == "cuda" ]]; then
127    # TODO: This is better to be passed as a parameter from _linux-test workflow
128    # so that it can be consistent with what is set in build
129    TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${commit}"
130  else
131    pip_install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${commit}"
132  fi
133
134}
135
136function install_torchtext() {
137  local data_commit
138  local text_commit
139  data_commit=$(get_pinned_commit data)
140  text_commit=$(get_pinned_commit text)
141  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/data.git@${data_commit}"
142  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/text.git@${text_commit}"
143}
144
145function install_torchvision() {
146  local orig_preload
147  local commit
148  commit=$(get_pinned_commit vision)
149  orig_preload=${LD_PRELOAD}
150  if [ -n "${LD_PRELOAD}" ]; then
151    # Silence dlerror to work-around glibc ASAN bug, see https://sourceware.org/bugzilla/show_bug.cgi?id=27653#c9
152    echo 'char* dlerror(void) { return "";}'|gcc -fpic -shared -o "${HOME}/dlerror.so" -x c -
153    LD_PRELOAD=${orig_preload}:${HOME}/dlerror.so
154  fi
155  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${commit}"
156  if [ -n "${LD_PRELOAD}" ]; then
157    LD_PRELOAD=${orig_preload}
158  fi
159}
160
161function install_tlparse() {
162  pip_install --user "tlparse==0.3.7"
163  PATH="$(python -m site --user-base)/bin:$PATH"
164}
165
166function install_torchrec_and_fbgemm() {
167  local torchrec_commit
168  torchrec_commit=$(get_pinned_commit torchrec)
169  local fbgemm_commit
170  fbgemm_commit=$(get_pinned_commit fbgemm)
171  pip_uninstall torchrec-nightly
172  pip_uninstall fbgemm-gpu-nightly
173  pip_install setuptools-git-versioning scikit-build pyre-extensions
174  # See https://github.com/pytorch/pytorch/issues/106971
175  CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
176  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
177}
178
179function clone_pytorch_xla() {
180  if [[ ! -d ./xla ]]; then
181    git clone --recursive -b r2.4 https://github.com/pytorch/xla.git
182    pushd xla
183    # pin the xla hash so that we don't get broken by changes to xla
184    git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
185    git submodule sync
186    git submodule update --init --recursive
187    popd
188  fi
189}
190
191function checkout_install_torchdeploy() {
192  local commit
193  commit=$(get_pinned_commit multipy)
194  pushd ..
195  git clone --recurse-submodules https://github.com/pytorch/multipy.git
196  pushd multipy
197  git checkout "${commit}"
198  python multipy/runtime/example/generate_examples.py
199  BUILD_CUDA_TESTS=1 pip install -e .
200  popd
201  popd
202}
203
204function test_torch_deploy(){
205 pushd ..
206 pushd multipy
207 ./multipy/runtime/build/test_deploy
208 ./multipy/runtime/build/test_deploy_gpu
209 popd
210 popd
211}
212
213function checkout_install_torchbench() {
214  local commit
215  commit=$(get_pinned_commit torchbench)
216  git clone https://github.com/pytorch/benchmark torchbench
217  pushd torchbench
218  git checkout "$commit"
219
220  if [ "$1" ]; then
221    python install.py --continue_on_fail models "$@"
222  else
223    # Occasionally the installation may fail on one model but it is ok to continue
224    # to install and test other models
225    python install.py --continue_on_fail
226  fi
227  popd
228}
229
230function print_sccache_stats() {
231  echo 'PyTorch Build Statistics'
232  sccache --show-stats
233
234  if [[ -n "${OUR_GITHUB_JOB_ID}" ]]; then
235    sccache --show-stats --stats-format json | jq .stats \
236      > "sccache-stats-${BUILD_ENVIRONMENT}-${OUR_GITHUB_JOB_ID}.json"
237  else
238    echo "env var OUR_GITHUB_JOB_ID not set, will not write sccache stats to json"
239  fi
240}
241