xref: /aosp_15_r20/external/pytorch/.ci/docker/common/install_cuda.sh (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1#!/bin/bash
2
3set -ex
4
5NCCL_VERSION=v2.21.5-1
6CUDNN_VERSION=9.1.0.70
7
8function install_cusparselt_040 {
9    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
10    mkdir tmp_cusparselt && pushd tmp_cusparselt
11    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz
12    tar xf libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz
13    cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/include/* /usr/local/cuda/include/
14    cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/lib/* /usr/local/cuda/lib64/
15    popd
16    rm -rf tmp_cusparselt
17}
18
19function install_cusparselt_052 {
20    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
21    mkdir tmp_cusparselt && pushd tmp_cusparselt
22    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
23    tar xf libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
24    cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/include/* /usr/local/cuda/include/
25    cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
26    popd
27    rm -rf tmp_cusparselt
28}
29
30function install_cusparselt_062 {
31    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
32    mkdir tmp_cusparselt && pushd tmp_cusparselt
33    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.2.3-archive.tar.xz
34    tar xf libcusparse_lt-linux-x86_64-0.6.2.3-archive.tar.xz
35    cp -a libcusparse_lt-linux-x86_64-0.6.2.3-archive/include/* /usr/local/cuda/include/
36    cp -a libcusparse_lt-linux-x86_64-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/
37    popd
38    rm -rf tmp_cusparselt
39}
40
41function install_118 {
42    echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
43    rm -rf /usr/local/cuda-11.8 /usr/local/cuda
44    # install CUDA 11.8.0 in the same container
45    wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
46    chmod +x cuda_11.8.0_520.61.05_linux.run
47    ./cuda_11.8.0_520.61.05_linux.run --toolkit --silent
48    rm -f cuda_11.8.0_520.61.05_linux.run
49    rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.8 /usr/local/cuda
50
51    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
52    mkdir tmp_cudnn && cd tmp_cudnn
53    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
54    tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
55    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/include/* /usr/local/cuda/include/
56    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/lib/* /usr/local/cuda/lib64/
57    cd ..
58    rm -rf tmp_cudnn
59
60    # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
61    # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
62    git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
63    cd nccl && make -j src.build
64    cp -a build/include/* /usr/local/cuda/include/
65    cp -a build/lib/* /usr/local/cuda/lib64/
66    cd ..
67    rm -rf nccl
68
69    install_cusparselt_040
70
71    ldconfig
72}
73
74function install_121 {
75    echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
76    rm -rf /usr/local/cuda-12.1 /usr/local/cuda
77    # install CUDA 12.1.0 in the same container
78    wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
79    chmod +x cuda_12.1.1_530.30.02_linux.run
80    ./cuda_12.1.1_530.30.02_linux.run --toolkit --silent
81    rm -f cuda_12.1.1_530.30.02_linux.run
82    rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda
83
84    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
85    mkdir tmp_cudnn && cd tmp_cudnn
86    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
87    tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
88    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
89    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
90    cd ..
91    rm -rf tmp_cudnn
92
93    # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
94    # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
95    git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
96    cd nccl && make -j src.build
97    cp -a build/include/* /usr/local/cuda/include/
98    cp -a build/lib/* /usr/local/cuda/lib64/
99    cd ..
100    rm -rf nccl
101
102    install_cusparselt_052
103
104    ldconfig
105}
106
107function install_124 {
108  echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
109  rm -rf /usr/local/cuda-12.4 /usr/local/cuda
110  # install CUDA 12.4.1 in the same container
111  wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run
112  chmod +x cuda_12.4.1_550.54.15_linux.run
113  ./cuda_12.4.1_550.54.15_linux.run --toolkit --silent
114  rm -f cuda_12.4.1_550.54.15_linux.run
115  rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda
116
117  # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
118  mkdir tmp_cudnn && cd tmp_cudnn
119  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
120  tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
121  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
122  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
123  cd ..
124  rm -rf tmp_cudnn
125
126  # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
127  # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
128  git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
129  cd nccl && make -j src.build
130  cp -a build/include/* /usr/local/cuda/include/
131  cp -a build/lib/* /usr/local/cuda/lib64/
132  cd ..
133  rm -rf nccl
134
135  install_cusparselt_062
136
137  ldconfig
138}
139
140function prune_118 {
141    echo "Pruning CUDA 11.8 and cuDNN"
142    #####################################################################################
143    # CUDA 11.8 prune static libs
144    #####################################################################################
145    export NVPRUNE="/usr/local/cuda-11.8/bin/nvprune"
146    export CUDA_LIB_DIR="/usr/local/cuda-11.8/lib64"
147
148    export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
149    export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
150
151    if [[ -n "$OVERRIDE_GENCODE" ]]; then
152        export GENCODE=$OVERRIDE_GENCODE
153    fi
154
155    # all CUDA libs except CuDNN and CuBLAS (cudnn and cublas need arch 3.7 included)
156    ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
157      | xargs -I {} bash -c \
158                "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
159
160    # prune CuDNN and CuBLAS
161    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
162    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
163
164    #####################################################################################
165    # CUDA 11.8 prune visual tools
166    #####################################################################################
167    export CUDA_BASE="/usr/local/cuda-11.8/"
168    rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/
169}
170
171function prune_121 {
172  echo "Pruning CUDA 12.1"
173  #####################################################################################
174  # CUDA 12.1 prune static libs
175  #####################################################################################
176    export NVPRUNE="/usr/local/cuda-12.1/bin/nvprune"
177    export CUDA_LIB_DIR="/usr/local/cuda-12.1/lib64"
178
179    export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
180    export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
181
182    if [[ -n "$OVERRIDE_GENCODE" ]]; then
183        export GENCODE=$OVERRIDE_GENCODE
184    fi
185
186    # all CUDA libs except CuDNN and CuBLAS
187    ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
188      | xargs -I {} bash -c \
189                "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
190
191    # prune CuDNN and CuBLAS
192    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
193    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
194
195    #####################################################################################
196    # CUDA 12.1 prune visual tools
197    #####################################################################################
198    export CUDA_BASE="/usr/local/cuda-12.1/"
199    rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2023.1.0 $CUDA_BASE/nsight-systems-2023.1.2/
200}
201
202function prune_124 {
203  echo "Pruning CUDA 12.4"
204  #####################################################################################
205  # CUDA 12.4 prune static libs
206  #####################################################################################
207  export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
208  export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"
209
210  export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
211  export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
212
213  if [[ -n "$OVERRIDE_GENCODE" ]]; then
214      export GENCODE=$OVERRIDE_GENCODE
215  fi
216  if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
217      export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
218  fi
219
220  # all CUDA libs except CuDNN and CuBLAS
221  ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
222      | xargs -I {} bash -c \
223                "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
224
225  # prune CuDNN and CuBLAS
226  $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
227  $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
228
229  #####################################################################################
230  # CUDA 12.1 prune visual tools
231  #####################################################################################
232  export CUDA_BASE="/usr/local/cuda-12.4/"
233  rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
234}
235
236# idiomatic parameter and option handling in sh
237while test $# -gt 0
238do
239    case "$1" in
240    11.8) install_118; prune_118
241        ;;
242    12.1) install_121; prune_121
243        ;;
244    12.4) install_124; prune_124
245        ;;
246    *) echo "bad argument $1"; exit 1
247        ;;
248    esac
249    shift
250done
251