xref: /aosp_15_r20/external/pytorch/.ci/caffe2/test.sh (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1#!/bin/bash
2
3# shellcheck source=./common.sh
4source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
5
6if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
7  pip install click mock tabulate networkx==2.0
8  pip -q install --user "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
9fi
10
11# Skip tests in environments where they are not built/applicable
12if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
13  echo 'Skipping tests'
14  exit 0
15fi
16if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
17  # temporary to locate some kernel issues on the CI nodes
18  export HSAKMT_DEBUG_LEVEL=4
19fi
20# These additional packages are needed for circleci ROCm builds.
21if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
22    # Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
23    # defaults installs the most recent networkx version, so we install this lower
24    # version explicitly before scikit-image pulls it in as a dependency
25    pip install networkx==2.0
26    # click - onnx
27    pip install --progress-bar off click protobuf tabulate virtualenv mock typing-extensions
28fi
29
30# Find where cpp tests and Caffe2 itself are installed
31if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
32  # For cmake only build we install everything into /usr/local
33  cpp_test_dir="$INSTALL_PREFIX/cpp_test"
34  ld_library_path="$INSTALL_PREFIX/lib"
35else
36  # For Python builds we install into python
37  # cd to /usr first so the python import doesn't get confused by any 'caffe2'
38  # directory in cwd
39  python_installation="$(dirname $(dirname $(cd /usr && $PYTHON -c 'import os; import caffe2; print(os.path.realpath(caffe2.__file__))')))"
40  caffe2_pypath="$python_installation/caffe2"
41  cpp_test_dir="$python_installation/torch/test"
42  ld_library_path="$python_installation/torch/lib"
43fi
44
45################################################################################
46# C++ tests #
47################################################################################
48# Only run cpp tests in the first shard, don't run cpp tests a second time in the second shard
49if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
50  echo "Running C++ tests.."
51  for test in $(find "$cpp_test_dir" -executable -type f); do
52    case "$test" in
53      # skip tests we know are hanging or bad
54      */mkl_utils_test|*/aten/integer_divider_test)
55        continue
56        ;;
57      */scalar_tensor_test|*/basic|*/native_test)
58        if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
59          continue
60        else
61          LD_LIBRARY_PATH="$ld_library_path" "$test"
62        fi
63        ;;
64      */*_benchmark)
65        LD_LIBRARY_PATH="$ld_library_path" "$test" --benchmark_color=false
66        ;;
67      *)
68        # Currently, we use a mixture of gtest (caffe2) and Catch2 (ATen). While
69        # planning to migrate to gtest as the common PyTorch c++ test suite, we
70        # currently do NOT use the xml test reporter, because Catch doesn't
71        # support multiple reporters
72        # c.f. https://github.com/catchorg/Catch2/blob/master/docs/release-notes.md#223
73        # which means that enabling XML output means you lose useful stdout
74        # output for Jenkins.  It's more important to have useful console
75        # output than it is to have XML output for Jenkins.
76        # Note: in the future, if we want to use xml test reporter once we switch
77        # to all gtest, one can simply do:
78        LD_LIBRARY_PATH="$ld_library_path" \
79            "$test" --gtest_output=xml:"$gtest_reports_dir/$(basename $test).xml"
80        ;;
81    esac
82  done
83fi
84
85################################################################################
86# Python tests #
87################################################################################
88if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
89  exit 0
90fi
91
92# If pip is installed as root, we must use sudo.
93# CircleCI docker images could install conda as jenkins user, or use the OS's python package.
94PIP=$(which pip)
95PIP_USER=$(stat --format '%U' $PIP)
96CURRENT_USER=$(id -u -n)
97if [[ "$PIP_USER" = root && "$CURRENT_USER" != root ]]; then
98  MAYBE_SUDO=sudo
99fi
100
101# Uninstall pre-installed hypothesis and coverage to use an older version as newer
102# versions remove the timeout parameter from settings which ideep/conv_transpose_test.py uses
103$MAYBE_SUDO pip -q uninstall -y hypothesis
104$MAYBE_SUDO pip -q uninstall -y coverage
105
106# "pip install hypothesis==3.44.6" from official server is unreliable on
107# CircleCI, so we host a copy on S3 instead
108$MAYBE_SUDO pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
109$MAYBE_SUDO pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
110$MAYBE_SUDO pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
111
112# Collect additional tests to run (outside caffe2/python)
113EXTRA_TESTS=()
114
115# CUDA builds always include NCCL support
116if [[ "$BUILD_ENVIRONMENT" == *-cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *-rocm* ]]; then
117  EXTRA_TESTS+=("$caffe2_pypath/contrib/nccl")
118fi
119
120rocm_ignore_test=()
121if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then
122  # Currently these tests are failing on ROCM platform:
123
124  # On ROCm, RCCL (distributed) development isn't complete.
125  # https://github.com/ROCmSoftwarePlatform/rccl
126  rocm_ignore_test+=("--ignore $caffe2_pypath/python/data_parallel_model_test.py")
127
128  # This test has been flaky in ROCm CI (but note the tests are
129  # cpu-only so should be unrelated to ROCm)
130  rocm_ignore_test+=("--ignore $caffe2_pypath/python/operator_test/blobs_queue_db_test.py")
131  # This test is skipped on Jenkins(compiled without MKL) and otherwise known flaky
132  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/convfusion_op_test.py")
133  # This test is skipped on Jenkins(compiled without MKL) and causing segfault on Circle
134  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/pool_op_test.py")
135fi
136
137echo "Running Python tests.."
138# locale setting is required by click package
139for loc in "en_US.utf8" "C.UTF-8"; do
140  if locale -a | grep "$loc" >/dev/null 2>&1; then
141    export LC_ALL="$loc"
142    export LANG="$loc"
143    break;
144  fi
145done
146
147# Some Caffe2 tests fail when run using AVX512 ISA, see https://github.com/pytorch/pytorch/issues/66111
148export DNNL_MAX_CPU_ISA=AVX2
149
150# Should still run even in the absence of SHARD_NUMBER
151if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
152  # TODO([email protected]) remove this when the linked issue resolved.
153  # py is temporary until https://github.com/Teemu/pytest-sugar/issues/241 is fixed
154  pip install --user py==1.11.0
155  pip install --user pytest-sugar
156  # NB: Warnings are disabled because they make it harder to see what
157  # the actual erroring test is
158  "$PYTHON" \
159    -m pytest \
160    -x \
161    -v \
162    --disable-warnings \
163    --junit-xml="$pytest_reports_dir/result.xml" \
164    --ignore "$caffe2_pypath/python/test/executor_test.py" \
165    --ignore "$caffe2_pypath/python/operator_test/matmul_op_test.py" \
166    --ignore "$caffe2_pypath/python/operator_test/pack_ops_test.py" \
167    --ignore "$caffe2_pypath/python/mkl/mkl_sbn_speed_test.py" \
168    --ignore "$caffe2_pypath/python/trt/test_pt_onnx_trt.py" \
169    ${rocm_ignore_test[@]} \
170    "$caffe2_pypath/python" \
171    "${EXTRA_TESTS[@]}"
172fi
173