1#!/bin/bash 2# Script used only in CD pipeline 3 4set -ex 5 6NCCL_VERSION=v2.21.5-1 7 8function install_cusparselt_052 { 9 # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html 10 mkdir tmp_cusparselt && pushd tmp_cusparselt 11 wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz 12 tar xf libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz 13 cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/include/* /usr/local/cuda/include/ 14 cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/ 15 popd 16 rm -rf tmp_cusparselt 17} 18 19function install_124 { 20 echo "Installing CUDA 12.4.1 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2" 21 rm -rf /usr/local/cuda-12.4 /usr/local/cuda 22 # install CUDA 12.4.1 in the same container 23 wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run 24 chmod +x cuda_12.4.1_550.54.15_linux_sbsa.run 25 ./cuda_12.4.1_550.54.15_linux_sbsa.run --toolkit --silent 26 rm -f cuda_12.4.1_550.54.15_linux_sbsa.run 27 rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda 28 29 # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement 30 mkdir tmp_cudnn && cd tmp_cudnn 31 wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz -O cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz 32 tar xf cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz 33 cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/include/* /usr/local/cuda/include/ 34 cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/lib/* /usr/local/cuda/lib64/ 35 cd .. 36 rm -rf tmp_cudnn 37 38 # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses 39 # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build 40 git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git 41 cd nccl && make -j src.build 42 cp -a build/include/* /usr/local/cuda/include/ 43 cp -a build/lib/* /usr/local/cuda/lib64/ 44 cd .. 45 rm -rf nccl 46 47 install_cusparselt_052 48 49 ldconfig 50} 51 52function prune_124 { 53 echo "Pruning CUDA 12.4" 54 ##################################################################################### 55 # CUDA 12.4 prune static libs 56 ##################################################################################### 57 export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune" 58 export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64" 59 60 export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" 61 export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" 62 63 if [[ -n "$OVERRIDE_GENCODE" ]]; then 64 export GENCODE=$OVERRIDE_GENCODE 65 fi 66 67 # all CUDA libs except CuDNN and CuBLAS 68 ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ 69 | xargs -I {} bash -c \ 70 "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" 71 72 # prune CuDNN and CuBLAS 73 $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a 74 $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a 75 76 ##################################################################################### 77 # CUDA 12.1 prune visual tools 78 ##################################################################################### 79 export CUDA_BASE="/usr/local/cuda-12.4/" 80 rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/ 81} 82 83# idiomatic parameter and option handling in sh 84while test $# -gt 0 85do 86 case "$1" in 87 12.4) install_124; prune_124 88 ;; 89 *) echo "bad argument $1"; exit 1 90 ;; 91 esac 92 shift 93done 94