xref: /aosp_15_r20/external/pytorch/.ci/pytorch/test.sh (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1#!/bin/bash
2
3# Required environment variable: $BUILD_ENVIRONMENT
4# (This is set by default in the Docker images we build, so you don't
5# need to set it yourself.
6
7set -ex
8
9# Suppress ANSI color escape sequences
10export TERM=vt100
11
12# shellcheck source=./common.sh
13source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
14
15# Do not change workspace permissions for ROCm CI jobs
16# as it can leave workspace with bad permissions for cancelled jobs
17if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
18  # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
19  WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
20  cleanup_workspace() {
21    echo "sudo may print the following warning message that can be ignored. The chown command will still run."
22    echo "    sudo: setrlimit(RLIMIT_STACK): Operation not permitted"
23    echo "For more details refer to https://github.com/sudo-project/sudo/issues/42"
24    sudo chown -R "$WORKSPACE_ORIGINAL_OWNER_ID" /var/lib/jenkins/workspace
25  }
26  # Disable shellcheck SC2064 as we want to parse the original owner immediately.
27  # shellcheck disable=SC2064
28  trap_add cleanup_workspace EXIT
29  sudo chown -R jenkins /var/lib/jenkins/workspace
30  git config --global --add safe.directory /var/lib/jenkins/workspace
31fi
32
33echo "Environment variables:"
34env
35
36TORCH_INSTALL_DIR=$(python -c "import site; print(site.getsitepackages()[0])")/torch
37TORCH_BIN_DIR="$TORCH_INSTALL_DIR"/bin
38TORCH_LIB_DIR="$TORCH_INSTALL_DIR"/lib
39TORCH_TEST_DIR="$TORCH_INSTALL_DIR"/test
40
41BUILD_DIR="build"
42BUILD_RENAMED_DIR="build_renamed"
43BUILD_BIN_DIR="$BUILD_DIR"/bin
44
45#Set Default values for these variables in case they are not set
46SHARD_NUMBER="${SHARD_NUMBER:=1}"
47NUM_TEST_SHARDS="${NUM_TEST_SHARDS:=1}"
48
49export VALGRIND=ON
50# export TORCH_INDUCTOR_INSTALL_GXX=ON
51if [[ "$BUILD_ENVIRONMENT" == *clang9* ]]; then
52  # clang9 appears to miscompile code involving c10::optional<c10::SymInt>,
53  # such that valgrind complains along these lines:
54  #
55  # Conditional jump or move depends on uninitialised value(s)
56  #    at 0x40303A: ~optional_base (Optional.h:281)
57  #    by 0x40303A: call (Dispatcher.h:448)
58  #    by 0x40303A: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, c10::optional<c10::SymInt>) (basic.cpp:10)
59  #    by 0x403700: main (basic.cpp:16)
60  #  Uninitialised value was created by a stack allocation
61  #    at 0x402AAA: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, c10::optional<c10::SymInt>) (basic.cpp:6)
62  #
63  # The problem does not appear with gcc or newer versions of clang (we tested
64  # clang14).  So we suppress valgrind testing for clang9 specifically.
65  # You may need to suppress it for other versions of clang if they still have
66  # the bug.
67  #
68  # A minimal repro for the valgrind error is below:
69  #
70  # #include <ATen/ATen.h>
71  # #include <ATen/core/dispatch/Dispatcher.h>
72  #
73  # using namespace at;
74  #
75  # Tensor call(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional<c10::SymInt> storage_offset) {
76  #   auto op = c10::Dispatcher::singleton()
77  #       .findSchemaOrThrow(at::_ops::as_strided::name, at::_ops::as_strided::overload_name)
78  #       .typed<at::_ops::as_strided::schema>();
79  #   return op.call(self, size, stride, storage_offset);
80  # }
81  #
82  # int main(int argv) {
83  #   Tensor b = empty({3, 4});
84  #   auto z = call(b, b.sym_sizes(), b.sym_strides(), c10::nullopt);
85  # }
86  export VALGRIND=OFF
87fi
88
89if [[ "${PYTORCH_TEST_RERUN_DISABLED_TESTS}" == "1" ]] || [[ "${CONTINUE_THROUGH_ERROR}" == "1" ]]; then
90  # When rerunning disable tests, do not generate core dumps as it could consume
91  # the runner disk space when crashed tests are run multiple times. Running out
92  # of space is a nasty issue because there is no space left to even download the
93  # GHA to clean up the disk
94  #
95  # We also want to turn off core dump when CONTINUE_THROUGH_ERROR is set as there
96  # is a small risk of having multiple core files generated. Arguably, they are not
97  # that useful in this case anyway and the test will still continue
98  ulimit -c 0
99
100  # Note that by piping the core dump to a script set in /proc/sys/kernel/core_pattern
101  # as documented in https://man7.org/linux/man-pages/man5/core.5.html, we could
102  # dynamically stop generating more core file when the disk space drops below a
103  # certain threshold. However, this is not supported inside Docker container atm
104fi
105
106# Get fully qualified path using realpath
107if [[ "$BUILD_ENVIRONMENT" != *bazel* ]]; then
108  CUSTOM_TEST_ARTIFACT_BUILD_DIR=$(realpath "${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-"build/custom_test_artifacts"}")
109fi
110
111# Reduce set of tests to include when running run_test.py
112if [[ -n $TESTS_TO_INCLUDE ]]; then
113  echo "Setting INCLUDE_CLAUSE"
114  INCLUDE_CLAUSE="--include $TESTS_TO_INCLUDE"
115fi
116
117echo "Environment variables"
118env
119
120echo "Testing pytorch"
121
122export LANG=C.UTF-8
123
124PR_NUMBER=${PR_NUMBER:-${CIRCLE_PR_NUMBER:-}}
125
126if [[ "$TEST_CONFIG" == 'default' ]]; then
127  export CUDA_VISIBLE_DEVICES=0
128  export HIP_VISIBLE_DEVICES=0
129fi
130
131if [[ "$TEST_CONFIG" == 'distributed' ]] && [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
132  export HIP_VISIBLE_DEVICES=0,1
133fi
134
135if [[ "$TEST_CONFIG" == 'slow' ]]; then
136  export PYTORCH_TEST_WITH_SLOW=1
137  export PYTORCH_TEST_SKIP_FAST=1
138fi
139
140if [[ "$BUILD_ENVIRONMENT" == *slow-gradcheck* ]]; then
141  export PYTORCH_TEST_WITH_SLOW_GRADCHECK=1
142  # TODO: slow gradcheck tests run out of memory a lot recently, so setting this
143  # to run them sequentially with only one process to mitigate the issue
144  export PYTORCH_TEST_CUDA_MEM_LEAK_CHECK=1
145fi
146
147if [[ "$BUILD_ENVIRONMENT" == *cuda* || "$BUILD_ENVIRONMENT" == *rocm* ]]; then
148  # Used so that only cuda/rocm specific versions of tests are generated
149  # mainly used so that we're not spending extra cycles testing cpu
150  # devices on expensive gpu machines
151  export PYTORCH_TESTING_DEVICE_ONLY_FOR="cuda"
152elif [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
153  export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
154  # setting PYTHON_TEST_EXTRA_OPTION
155  export PYTHON_TEST_EXTRA_OPTION="--xpu"
156fi
157
158if [[ "$TEST_CONFIG" == *crossref* ]]; then
159  export PYTORCH_TEST_WITH_CROSSREF=1
160fi
161
162if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
163  # regression in ROCm 6.0 on MI50 CI runners due to hipblaslt; remove in 6.1
164  export VALGRIND=OFF
165  # Print GPU info
166  rocminfo
167  rocminfo | grep -E 'Name:.*\sgfx|Marketing'
168fi
169
170if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
171  # Source Intel oneAPI envrioment script to enable xpu runtime related libraries
172  # refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-5.html
173  # shellcheck disable=SC1091
174  source /opt/intel/oneapi/compiler/latest/env/vars.sh
175  # Check XPU status before testing
176  xpu-smi discovery
177fi
178
179if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
180  # JIT C++ extensions require ninja.
181  pip_install --user "ninja==1.10.2"
182  # ninja is installed in $HOME/.local/bin, e.g., /var/lib/jenkins/.local/bin for CI user jenkins
183  # but this script should be runnable by any user, including root
184  export PATH="$HOME/.local/bin:$PATH"
185fi
186
187if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then
188  # TODO: revisit this once the CI is stabilized on aarch64 linux
189  export VALGRIND=OFF
190fi
191
192install_tlparse
193
194# DANGER WILL ROBINSON.  The LD_PRELOAD here could cause you problems
195# if you're not careful.  Check this if you made some changes and the
196# ASAN test is not working
197if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
198    export ASAN_OPTIONS=detect_leaks=0:symbolize=1:detect_stack_use_after_return=true:strict_init_order=true:detect_odr_violation=1:detect_container_overflow=0:check_initialization_order=true:debug=true
199    export UBSAN_OPTIONS=print_stacktrace=1:suppressions=$PWD/ubsan.supp
200    export PYTORCH_TEST_WITH_ASAN=1
201    export PYTORCH_TEST_WITH_UBSAN=1
202    # TODO: Figure out how to avoid hard-coding these paths
203    export ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-15/bin/llvm-symbolizer
204    export TORCH_USE_RTLD_GLOBAL=1
205    # NB: We load libtorch.so with RTLD_GLOBAL for UBSAN, unlike our
206    # default behavior.
207    #
208    # The reason for this is that without RTLD_GLOBAL, if we load multiple
209    # libraries that depend on libtorch (as is the case with C++ extensions), we
210    # will get multiple copies of libtorch in our address space.  When UBSAN is
211    # turned on, it will do a bunch of virtual pointer consistency checks which
212    # won't work correctly.  When this happens, you get a violation like:
213    #
214    #    member call on address XXXXXX which does not point to an object of
215    #    type 'std::_Sp_counted_base<__gnu_cxx::_Lock_policy::_S_atomic>'
216    #    XXXXXX note: object is of type
217    #    'std::_Sp_counted_ptr<torch::nn::LinearImpl*, (__gnu_cxx::_Lock_policy)2>'
218    #
219    # (NB: the textual types of the objects here are misleading, because
220    # they actually line up; it just so happens that there's two copies
221    # of the type info floating around in the address space, so they
222    # don't pointer compare equal.  See also
223    #   https://github.com/google/sanitizers/issues/1175
224    #
225    # UBSAN is kind of right here: if we relied on RTTI across C++ extension
226    # modules they would indeed do the wrong thing;  but in our codebase, we
227    # don't use RTTI (because it doesn't work in mobile).  To appease
228    # UBSAN, however, it's better if we ensure all the copies agree!
229    #
230    # By the way, an earlier version of this code attempted to load
231    # libtorch_python.so with LD_PRELOAD, which has a similar effect of causing
232    # it to be loaded globally.  This isn't really a good idea though, because
233    # it depends on a ton of dynamic libraries that most programs aren't gonna
234    # have, and it applies to child processes.
235
236    # TODO: get rid of the hardcoded path
237    export LD_PRELOAD=/usr/lib/llvm-15/lib/clang/15.0.7/lib/linux/libclang_rt.asan-x86_64.so
238    # Disable valgrind for asan
239    export VALGRIND=OFF
240
241    (cd test && python -c "import torch; print(torch.__version__, torch.version.git_version)")
242    echo "The next four invocations are expected to crash; if they don't that means ASAN/UBSAN is misconfigured"
243    (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_csrc_asan(3)")
244    #(cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_csrc_ubsan(0)")
245    (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_vptr_ubsan()")
246    (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_aten_asan(3)")
247fi
248
249# The torch._C._crash_if_debug_asserts_fail() function should only fail if both of the following are true:
250# 1. The build is in debug mode
251# 2. The value 424242 is passed in
252# This tests that the debug asserts are working correctly.
253if [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then
254    echo "We are in debug mode: $BUILD_ENVIRONMENT. Expect the python assertion to fail"
255    (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_debug_asserts_fail(424242)")
256elif [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then
257    # Noop when debug is disabled. Skip bazel jobs because torch isn't available there yet.
258    echo "We are not in debug mode: $BUILD_ENVIRONMENT. Expect the assertion to pass"
259    (cd test && python -c "import torch; torch._C._crash_if_debug_asserts_fail(424242)")
260fi
261
262if [[ $TEST_CONFIG == 'nogpu_NO_AVX2' ]]; then
263  export ATEN_CPU_CAPABILITY=default
264elif [[ $TEST_CONFIG == 'nogpu_AVX512' ]]; then
265  export ATEN_CPU_CAPABILITY=avx2
266fi
267
268test_python_legacy_jit() {
269  time python test/run_test.py --include test_jit_legacy test_jit_fuser_legacy --verbose
270  assert_git_not_dirty
271}
272
273test_python_shard() {
274  if [[ -z "$NUM_TEST_SHARDS" ]]; then
275    echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
276    exit 1
277  fi
278
279  # Bare --include flag is not supported and quoting for lint ends up with flag not being interpreted correctly
280  # shellcheck disable=SC2086
281
282  # modify LD_LIBRARY_PATH to ensure it has the conda env.
283  # This set of tests has been shown to be buggy without it for the split-build
284  time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose $PYTHON_TEST_EXTRA_OPTION
285
286  assert_git_not_dirty
287}
288
289test_python() {
290  # shellcheck disable=SC2086
291  time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --verbose $PYTHON_TEST_EXTRA_OPTION
292  assert_git_not_dirty
293}
294
295
296test_dynamo_shard() {
297  if [[ -z "$NUM_TEST_SHARDS" ]]; then
298    echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
299    exit 1
300  fi
301  python tools/dynamo/verify_dynamo.py
302  # PLEASE DO NOT ADD ADDITIONAL EXCLUDES HERE.
303  # Instead, use @skipIfTorchDynamo on your tests.
304  time python test/run_test.py --dynamo \
305    --exclude-inductor-tests \
306    --exclude-jit-executor \
307    --exclude-distributed-tests \
308    --exclude-torch-export-tests \
309    --shard "$1" "$NUM_TEST_SHARDS" \
310    --verbose
311  assert_git_not_dirty
312}
313
314test_inductor_distributed() {
315  # Smuggle a few multi-gpu tests here so that we don't have to request another large node
316  echo "Testing multi_gpu tests in test_torchinductor"
317  python test/run_test.py -i inductor/test_torchinductor.py -k test_multi_gpu --verbose
318  python test/run_test.py -i inductor/test_aot_inductor.py -k test_non_default_cuda_device --verbose
319  python test/run_test.py -i inductor/test_aot_inductor.py -k test_replicate_on_devices --verbose
320  python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose
321  python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose
322  python test/run_test.py -i distributed/tensor/parallel/test_micro_pipeline_tp.py --verbose
323  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose
324  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group --verbose
325  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing --verbose
326  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_hsdp --verbose
327  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_2d_transformer_checkpoint_resume --verbose
328  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_gradient_accumulation --verbose
329  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_state_dict.py -k test_dp_state_dict_save_load --verbose
330  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_frozen.py --verbose
331  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype --verbose
332  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype --verbose
333  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py -k test_clip_grad_norm_2d --verbose
334  python test/run_test.py -i distributed/fsdp/test_fsdp_tp_integration.py -k test_fsdp_tp_integration --verbose
335
336  # this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported
337  # with if required # gpus aren't available
338  python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives --verbose
339  assert_git_not_dirty
340}
341
342test_inductor_shard() {
343  if [[ -z "$NUM_TEST_SHARDS" ]]; then
344    echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
345    exit 1
346  fi
347
348  python tools/dynamo/verify_dynamo.py
349  python test/run_test.py --inductor \
350    --include test_modules test_ops test_ops_gradients test_torch \
351    --shard "$1" "$NUM_TEST_SHARDS" \
352    --verbose
353
354  # Do not add --inductor for the following inductor unit tests, otherwise we will fail because of nested dynamo state
355  python test/run_test.py \
356    --include inductor/test_torchinductor inductor/test_torchinductor_opinfo inductor/test_aot_inductor \
357    --shard "$1" "$NUM_TEST_SHARDS" \
358    --verbose
359}
360
361test_inductor_aoti() {
362  # docker build uses bdist_wheel which does not work with test_aot_inductor
363  # TODO: need a faster way to build
364  if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
365    # We need to hipify before building again
366    python3 tools/amd_build/build_amd.py
367  fi
368  BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
369  CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
370}
371
372test_inductor_cpp_wrapper_abi_compatible() {
373  export TORCHINDUCTOR_ABI_COMPATIBLE=1
374  TEST_REPORTS_DIR=$(pwd)/test/test-reports
375  mkdir -p "$TEST_REPORTS_DIR"
376
377  echo "Testing Inductor cpp wrapper mode with TORCHINDUCTOR_ABI_COMPATIBLE=1"
378  # cpu stack allocation causes segfault and needs more investigation
379  PYTORCH_TESTING_DEVICE_ONLY_FOR="" python test/run_test.py --include inductor/test_cpu_cpp_wrapper
380  python test/run_test.py --include inductor/test_cuda_cpp_wrapper
381
382  TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \
383    --training --inductor --disable-cudagraphs --only vit_base_patch16_224 \
384    --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv"
385  python benchmarks/dynamo/check_accuracy.py \
386    --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \
387    --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv"
388}
389
390# "Global" flags for inductor benchmarking controlled by TEST_CONFIG
391# For example 'dynamic_aot_eager_torchbench' TEST_CONFIG means we run
392# the benchmark script with '--dynamic-shapes --backend aot_eager --device cuda'
393# The matrix of test options is specified in .github/workflows/inductor.yml,
394# .github/workflows/inductor-periodic.yml, and
395# .github/workflows/inductor-perf-test-nightly.yml
396DYNAMO_BENCHMARK_FLAGS=()
397
398pr_time_benchmarks() {
399
400  pip_install --user "fbscribelogger"
401
402  TEST_REPORTS_DIR=$(pwd)/test/test-reports
403  mkdir -p "$TEST_REPORTS_DIR"
404  PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_after.txt" "benchmarks/dynamo/pr_time_benchmarks/benchmarks"
405  echo "benchmark results on current PR: "
406  cat  "$TEST_REPORTS_DIR/pr_time_benchmarks_after.txt"
407
408}
409
410if [[ "${TEST_CONFIG}" == *pr_time_benchmarks* ]]; then
411  pr_time_benchmarks
412  exit 0
413elif [[ "${TEST_CONFIG}" == *dynamo_eager* ]]; then
414  DYNAMO_BENCHMARK_FLAGS+=(--backend eager)
415elif [[ "${TEST_CONFIG}" == *aot_eager* ]]; then
416  DYNAMO_BENCHMARK_FLAGS+=(--backend aot_eager)
417elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
418  DYNAMO_BENCHMARK_FLAGS+=(--export-aot-inductor)
419elif [[ "${TEST_CONFIG}" == *inductor* && "${TEST_CONFIG}" != *perf* ]]; then
420  DYNAMO_BENCHMARK_FLAGS+=(--inductor)
421fi
422
423if [[ "${TEST_CONFIG}" == *dynamic* ]]; then
424  DYNAMO_BENCHMARK_FLAGS+=(--dynamic-shapes --dynamic-batch-only)
425fi
426
427if [[ "${TEST_CONFIG}" == *cpu* ]]; then
428  DYNAMO_BENCHMARK_FLAGS+=(--device cpu)
429else
430  DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
431fi
432
433test_perf_for_dashboard() {
434  TEST_REPORTS_DIR=$(pwd)/test/test-reports
435  mkdir -p "$TEST_REPORTS_DIR"
436
437  local suite="$1"
438  shift
439
440  local backend=inductor
441  local modes=()
442  if [[ "$DASHBOARD_TAG" == *training-true* ]]; then
443    modes+=(training)
444  fi
445  if [[ "$DASHBOARD_TAG" == *inference-true* ]]; then
446    modes+=(inference)
447  fi
448  # TODO: All the accuracy tests can be skipped once the CI accuracy checking is stable enough
449  local targets=(accuracy performance)
450
451  local device=cuda
452  if [[ "${TEST_CONFIG}" == *cpu* ]]; then
453    if [[ "${TEST_CONFIG}" == *cpu_x86* ]]; then
454      device=cpu_x86
455    elif [[ "${TEST_CONFIG}" == *cpu_aarch64* ]]; then
456      device=cpu_aarch64
457    fi
458    test_inductor_set_cpu_affinity
459  elif [[ "${TEST_CONFIG}" == *cuda_a10g* ]]; then
460    device=cuda_a10g
461  fi
462
463  for mode in "${modes[@]}"; do
464    if [[ "$mode" == "inference" ]]; then
465      dtype=bfloat16
466    elif [[ "$mode" == "training" ]]; then
467      dtype=amp
468    fi
469    for target in "${targets[@]}"; do
470      local target_flag=("--${target}")
471      if [[ "$target" == "performance" ]]; then
472        target_flag+=( --cold-start-latency)
473      elif [[ "$target" == "accuracy" ]]; then
474        target_flag+=( --no-translation-validation)
475      fi
476
477      if [[ "$DASHBOARD_TAG" == *default-true* ]]; then
478        $TASKSET python "benchmarks/dynamo/$suite.py" \
479            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
480            --output "$TEST_REPORTS_DIR/${backend}_no_cudagraphs_${suite}_${dtype}_${mode}_${device}_${target}.csv"
481      fi
482      if [[ "$DASHBOARD_TAG" == *cudagraphs-true* ]]; then
483        $TASKSET python "benchmarks/dynamo/$suite.py" \
484            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" \
485            --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_${mode}_${device}_${target}.csv"
486      fi
487      if [[ "$DASHBOARD_TAG" == *dynamic-true* ]]; then
488        $TASKSET python "benchmarks/dynamo/$suite.py" \
489            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --dynamic-shapes \
490            --dynamic-batch-only "$@" \
491            --output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_${device}_${target}.csv"
492      fi
493      if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]] && [[ "$mode" == "inference" ]]; then
494        TORCHINDUCTOR_CPP_WRAPPER=1 $TASKSET python "benchmarks/dynamo/$suite.py" \
495            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
496            --output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_${device}_${target}.csv"
497      fi
498      if [[ "$DASHBOARD_TAG" == *freezing_cudagraphs-true* ]] && [[ "$mode" == "inference" ]]; then
499        $TASKSET python "benchmarks/dynamo/$suite.py" \
500            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" --freezing \
501            --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_${suite}_${dtype}_${mode}_${device}_${target}.csv"
502      fi
503      if [[ "$DASHBOARD_TAG" == *freeze_autotune_cudagraphs-true* ]] && [[ "$mode" == "inference" ]]; then
504        TORCHINDUCTOR_MAX_AUTOTUNE=1 $TASKSET python "benchmarks/dynamo/$suite.py" \
505            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" --freezing \
506            --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_autotune_${suite}_${dtype}_${mode}_${device}_${target}.csv"
507      fi
508      if [[ "$DASHBOARD_TAG" == *aotinductor-true* ]] && [[ "$mode" == "inference" ]]; then
509        if [[ "$target" == "accuracy" ]]; then
510          # Also collect Export pass rate and display as a separate row
511          $TASKSET python "benchmarks/dynamo/$suite.py" \
512              "${target_flag[@]}" --"$mode" --"$dtype" --export --disable-cudagraphs "$@" \
513              --output "$TEST_REPORTS_DIR/${backend}_export_${suite}_${dtype}_${mode}_${device}_${target}.csv"
514        fi
515        TORCHINDUCTOR_ABI_COMPATIBLE=1 $TASKSET python "benchmarks/dynamo/$suite.py" \
516            "${target_flag[@]}" --"$mode" --"$dtype" --export-aot-inductor --disable-cudagraphs "$@" \
517            --output "$TEST_REPORTS_DIR/${backend}_aot_inductor_${suite}_${dtype}_${mode}_${device}_${target}.csv"
518      fi
519      if [[ "$DASHBOARD_TAG" == *maxautotune-true* ]]; then
520        TORCHINDUCTOR_MAX_AUTOTUNE=1 $TASKSET python "benchmarks/dynamo/$suite.py" \
521            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" \
522            --output "$TEST_REPORTS_DIR/${backend}_max_autotune_${suite}_${dtype}_${mode}_${device}_${target}.csv"
523      fi
524      if [[ "$DASHBOARD_TAG" == *cudagraphs_low_precision-true* ]] && [[ "$mode" == "inference" ]]; then
525        # TODO: This has a new dtype called quant and the benchmarks script needs to be updated to support this.
526        # The tentative command is as follows. It doesn't work now, but it's ok because we only need mock data
527        # to fill the dashboard.
528        $TASKSET python "benchmarks/dynamo/$suite.py" \
529          "${target_flag[@]}" --"$mode" --quant --backend "$backend" "$@" \
530          --output "$TEST_REPORTS_DIR/${backend}_cudagraphs_low_precision_${suite}_quant_${mode}_${device}_${target}.csv" || true
531        # Copy cudagraph results as mock data, easiest choice?
532        cp "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_${mode}_${device}_${target}.csv" \
533          "$TEST_REPORTS_DIR/${backend}_cudagraphs_low_precision_${suite}_quant_${mode}_${device}_${target}.csv"
534      fi
535    done
536  done
537}
538
539test_single_dynamo_benchmark() {
540  # Usage: test_single_dynamo_benchmark inductor_inference huggingface 0 --args-for-script
541
542  # Use test-reports directory under test folder will allow the CI to automatically pick up
543  # the test reports and upload them to S3. Need to use full path here otherwise the script
544  # will bark about file not found later on
545  TEST_REPORTS_DIR=$(pwd)/test/test-reports
546  mkdir -p "$TEST_REPORTS_DIR"
547
548  local name="$1"
549  shift
550  local suite="$1"
551  shift
552  # shard id is mandatory, even if it is not passed
553  local shard_id="$1"
554  shift
555
556  local partition_flags=()
557  if [[ -n "$NUM_TEST_SHARDS" && -n "$shard_id" ]]; then
558    partition_flags=( --total-partitions "$NUM_TEST_SHARDS" --partition-id "$shard_id" )
559  fi
560
561  if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
562    python "benchmarks/dynamo/$suite.py" \
563      --ci --performance --disable-cudagraphs --inductor \
564      "${DYNAMO_BENCHMARK_FLAGS[@]}" "$@" "${partition_flags[@]}" \
565      --output "$TEST_REPORTS_DIR/${name}_${suite}.csv"
566  elif [[ "${TEST_CONFIG}" == *perf* ]]; then
567    test_perf_for_dashboard "$suite" \
568      "${DYNAMO_BENCHMARK_FLAGS[@]}" "$@" "${partition_flags[@]}"
569  else
570    if [[ "${TEST_CONFIG}" == *aot_inductor* && "${TEST_CONFIG}" != *cpu_aot_inductor* ]]; then
571      # Test AOTInductor with the ABI-compatible mode on CI
572      # This can be removed once the ABI-compatible mode becomes default.
573      # For CPU device, we perfer non ABI-compatible mode on CI when testing AOTInductor.
574      export TORCHINDUCTOR_ABI_COMPATIBLE=1
575    fi
576
577    if [[ "${TEST_CONFIG}" == *_avx2* ]]; then
578      TEST_CONFIG=${TEST_CONFIG//_avx2/}
579    fi
580    if [[ "${TEST_CONFIG}" == *_avx512* ]]; then
581      TEST_CONFIG=${TEST_CONFIG//_avx512/}
582    fi
583    python "benchmarks/dynamo/$suite.py" \
584      --ci --accuracy --timing --explain \
585      "${DYNAMO_BENCHMARK_FLAGS[@]}" \
586      "$@" "${partition_flags[@]}" \
587      --output "$TEST_REPORTS_DIR/${name}_${suite}.csv"
588    python benchmarks/dynamo/check_accuracy.py \
589      --actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
590      --expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
591    python benchmarks/dynamo/check_graph_breaks.py \
592      --actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
593      --expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
594  fi
595}
596
597test_inductor_micro_benchmark() {
598  TEST_REPORTS_DIR=$(pwd)/test/test-reports
599  if [[ "${TEST_CONFIG}" == *cpu* ]]; then
600    test_inductor_set_cpu_affinity
601  fi
602  python benchmarks/gpt_fast/benchmark.py --output "${TEST_REPORTS_DIR}/gpt_fast_benchmark.csv"
603}
604
605test_inductor_halide() {
606  python test/run_test.py --include inductor/test_halide.py --verbose
607  assert_git_not_dirty
608}
609
610test_dynamo_benchmark() {
611  # Usage: test_dynamo_benchmark huggingface 0
612  TEST_REPORTS_DIR=$(pwd)/test/test-reports
613
614  local suite="$1"
615  shift
616  local shard_id="$1"
617  shift
618
619  if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
620    test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
621  elif [[ "${TEST_CONFIG}" == *perf* ]]; then
622    test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
623  else
624    if [[ "${TEST_CONFIG}" == *cpu* ]]; then
625      local dt="float32"
626      if [[ "${TEST_CONFIG}" == *amp* ]]; then
627        dt="amp"
628      fi
629      if [[ "${TEST_CONFIG}" == *freezing* ]]; then
630        test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --"$dt" --freezing "$@"
631      else
632        test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --"$dt" "$@"
633      fi
634    elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
635      test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
636    else
637      test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
638      test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
639    fi
640  fi
641}
642
643test_inductor_torchbench_smoketest_perf() {
644  TEST_REPORTS_DIR=$(pwd)/test/test-reports
645  mkdir -p "$TEST_REPORTS_DIR"
646
647  # Test some models in the cpp wrapper mode
648  TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
649    --bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
650  TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
651    --bfloat16 --inference --inductor --only llama --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
652  TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
653    --bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
654  python benchmarks/dynamo/check_accuracy.py \
655    --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \
656    --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv"
657
658  python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
659    --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
660    --output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
661  # The threshold value needs to be actively maintained to make this check useful
662  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
663
664  TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
665    --export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
666  # The threshold value needs to be actively maintained to make this check useful
667  # The perf number of nanogpt seems not very stable, e.g.
668  # https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314,
669  # and thus we lower its threshold to reduce flakiness. If this continues to be a problem,
670  # we switch to use some other model.
671  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9
672
673  # Check memory compression ratio for a few models
674  for test in hf_Albert timm_vision_transformer; do
675    python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
676      --disable-cudagraphs --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" \
677      --only $test --output "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv"
678    cat "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv"
679    python benchmarks/dynamo/check_memory_compression_ratio.py --actual \
680      "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv" \
681      --expected benchmarks/dynamo/expected_ci_perf_inductor_torchbench.csv
682  done
683
684  # Perform some "warm-start" runs for a few huggingface models.
685  for test in AlbertForQuestionAnswering AllenaiLongformerBase DistilBertForMaskedLM DistillGPT2 GoogleFnet YituTechConvBert; do
686    python benchmarks/dynamo/huggingface.py --accuracy --training --amp --inductor --device cuda --warm-start-latency \
687      --only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv"
688    python benchmarks/dynamo/check_accuracy.py \
689      --actual "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv" \
690      --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv"
691  done
692}
693
694test_inductor_get_core_number() {
695  if [[ "${TEST_CONFIG}" == *aarch64* ]]; then
696    echo "$(($(lscpu | grep 'Cluster(s):' | awk '{print $2}') * $(lscpu | grep 'Core(s) per cluster:' | awk '{print $4}')))"
697  else
698    echo "$(($(lscpu | grep 'Socket(s):' | awk '{print $2}') * $(lscpu | grep 'Core(s) per socket:' | awk '{print $4}')))"
699  fi
700}
701
702test_inductor_set_cpu_affinity(){
703  #set jemalloc
704  JEMALLOC_LIB="$(find /usr/lib -name libjemalloc.so.2)"
705  export LD_PRELOAD="$JEMALLOC_LIB":"$LD_PRELOAD"
706  export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1"
707
708  if [[ "${TEST_CONFIG}" != *aarch64* ]]; then
709    # Use Intel OpenMP for x86
710    IOMP_LIB="$(dirname "$(which python)")/../lib/libiomp5.so"
711    export LD_PRELOAD="$IOMP_LIB":"$LD_PRELOAD"
712    export KMP_AFFINITY=granularity=fine,compact,1,0
713    export KMP_BLOCKTIME=1
714  fi
715  cores=$(test_inductor_get_core_number)
716  export OMP_NUM_THREADS=$cores
717  end_core=$((cores-1))
718  export TASKSET="taskset -c 0-$end_core"
719}
720
721test_inductor_torchbench_cpu_smoketest_perf(){
722  TEST_REPORTS_DIR=$(pwd)/test/test-reports
723  mkdir -p "$TEST_REPORTS_DIR"
724
725  test_inductor_set_cpu_affinity
726  MODELS_SPEEDUP_TARGET=benchmarks/dynamo/expected_ci_speedup_inductor_torchbench_cpu.csv
727
728  grep -v '^ *#' < "$MODELS_SPEEDUP_TARGET" | while IFS=',' read -r -a model_cfg
729  do
730    local model_name=${model_cfg[0]}
731    local data_type=${model_cfg[2]}
732    local speedup_target=${model_cfg[5]}
733    local backend=${model_cfg[1]}
734    if [[ ${model_cfg[4]} == "cpp" ]]; then
735      export TORCHINDUCTOR_CPP_WRAPPER=1
736    else
737      unset TORCHINDUCTOR_CPP_WRAPPER
738    fi
739    local output_name="$TEST_REPORTS_DIR/inductor_inference_${model_cfg[0]}_${model_cfg[1]}_${model_cfg[2]}_${model_cfg[3]}_cpu_smoketest.csv"
740
741    if [[ ${model_cfg[3]} == "dynamic" ]]; then
742      $TASKSET python benchmarks/dynamo/torchbench.py \
743        --inference --performance --"$data_type" -dcpu -n50 --only "$model_name" --dynamic-shapes \
744        --dynamic-batch-only --freezing --timeout 9000 --"$backend" --output "$output_name"
745    else
746      $TASKSET python benchmarks/dynamo/torchbench.py \
747        --inference --performance --"$data_type" -dcpu -n50 --only "$model_name" \
748        --freezing --timeout 9000 --"$backend" --output "$output_name"
749    fi
750    cat "$output_name"
751    # The threshold value needs to be actively maintained to make this check useful.
752    python benchmarks/dynamo/check_perf_csv.py -f "$output_name" -t "$speedup_target"
753  done
754
755  # Add a few ABI-compatible accuracy tests for CPU. These can be removed once we turn on ABI-compatible as default.
756  TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/timm_models.py --device cpu --accuracy \
757    --bfloat16 --inference --export-aot-inductor --disable-cudagraphs --only adv_inception_v3 \
758    --output "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv"
759  TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/timm_models.py --device cpu --accuracy \
760    --bfloat16 --inference --export-aot-inductor --disable-cudagraphs --only beit_base_patch16_224 \
761    --output "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv"
762  python benchmarks/dynamo/check_accuracy.py \
763    --actual "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv" \
764    --expected "benchmarks/dynamo/ci_expected_accuracy/aot_inductor_timm_inference.csv"
765}
766
767test_torchbench_gcp_smoketest(){
768  pushd "${TORCHBENCHPATH}"
769  python test.py -v
770  popd
771}
772
773test_python_gloo_with_tls() {
774  source "$(dirname "${BASH_SOURCE[0]}")/run_glootls_test.sh"
775  assert_git_not_dirty
776}
777
778
779test_aten() {
780  # Test ATen
781  # The following test(s) of ATen have already been skipped by caffe2 in rocm environment:
782  # scalar_tensor_test, basic, native_test
783  echo "Running ATen tests with pytorch lib"
784
785  if [[ -n "$IN_WHEEL_TEST" ]]; then
786    echo "Running test with the install folder"
787    # Rename the build folder when running test to ensure it
788    # is not depended on the folder
789    mv "$BUILD_DIR" "$BUILD_RENAMED_DIR"
790    TEST_BASE_DIR="$TORCH_TEST_DIR"
791  else
792    echo "Running test with the build folder"
793    TEST_BASE_DIR="$BUILD_BIN_DIR"
794  fi
795
796  # NB: the ATen test binaries don't have RPATH set, so it's necessary to
797  # put the dynamic libraries somewhere were the dynamic linker can find them.
798  # This is a bit of a hack.
799  ${SUDO} ln -sf "$TORCH_LIB_DIR"/libc10* "$TEST_BASE_DIR"
800  ${SUDO} ln -sf "$TORCH_LIB_DIR"/libcaffe2* "$TEST_BASE_DIR"
801  ${SUDO} ln -sf "$TORCH_LIB_DIR"/libmkldnn* "$TEST_BASE_DIR"
802  ${SUDO} ln -sf "$TORCH_LIB_DIR"/libnccl* "$TEST_BASE_DIR"
803  ${SUDO} ln -sf "$TORCH_LIB_DIR"/libtorch* "$TEST_BASE_DIR"
804
805  ls "$TEST_BASE_DIR"
806  aten/tools/run_tests.sh "$TEST_BASE_DIR"
807
808  if [[ -n "$IN_WHEEL_TEST" ]]; then
809    # Restore the build folder to avoid any impact on other tests
810    mv "$BUILD_RENAMED_DIR" "$BUILD_DIR"
811  fi
812
813  assert_git_not_dirty
814}
815
816test_without_numpy() {
817  pushd "$(dirname "${BASH_SOURCE[0]}")"
818  python -c "import sys;sys.path.insert(0, 'fake_numpy');from unittest import TestCase;import torch;x=torch.randn(3,3);TestCase().assertRaises(RuntimeError, lambda: x.numpy())"
819  # Regression test for https://github.com/pytorch/pytorch/issues/66353
820  python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;print(torch.tensor([torch.tensor(0.), torch.tensor(1.)]))"
821  # Regression test for https://github.com/pytorch/pytorch/issues/109387
822  if [[ "${TEST_CONFIG}" == *dynamo* ]]; then
823    python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;torch.compile(lambda x:print(x))('Hello World')"
824  fi
825  popd
826}
827
828test_libtorch() {
829  local SHARD="$1"
830
831  # The slow test config corresponds to a default test config that should run
832  # the libtorch tests instead.
833  if [[ "$TEST_CONFIG" != "slow" ]]; then
834    echo "Testing libtorch"
835    ln -sf "$TORCH_LIB_DIR"/libbackend_with_compiler.so "$TORCH_BIN_DIR"
836    ln -sf "$TORCH_LIB_DIR"/libjitbackend_test.so "$TORCH_BIN_DIR"
837    ln -sf "$TORCH_LIB_DIR"/libcaffe2_nvrtc.so "$TORCH_BIN_DIR"
838    ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
839    ln -sf "$TORCH_LIB_DIR"/libshm* "$TORCH_BIN_DIR"
840    ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR"
841    ln -sf "$TORCH_LIB_DIR"/libnvfuser* "$TORCH_BIN_DIR"
842
843    export CPP_TESTS_DIR="${TORCH_BIN_DIR}"
844
845    if [[ -z "${SHARD}" || "${SHARD}" == "1" ]]; then
846      test_libtorch_api
847    fi
848
849    if [[ -z "${SHARD}" || "${SHARD}" == "2" ]]; then
850      test_libtorch_jit
851    fi
852
853    assert_git_not_dirty
854  fi
855}
856
857test_libtorch_jit() {
858  # Prepare the model used by test_jit, the model needs to be in the test directory
859  # to get picked up by run_test
860  pushd test
861  python cpp/jit/tests_setup.py setup
862  popd
863
864  # Run jit and lazy tensor cpp tests together to finish them faster
865  if [[ "$BUILD_ENVIRONMENT" == *cuda* && "$TEST_CONFIG" != *nogpu* ]]; then
866    LTC_TS_CUDA=1 python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/test_lazy
867  else
868    # CUDA tests have already been skipped when CUDA is not available
869    python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/test_lazy -k "not CUDA"
870  fi
871
872  # Cleaning up test artifacts in the test folder
873  pushd test
874  python cpp/jit/tests_setup.py shutdown
875  popd
876}
877
878test_libtorch_api() {
879  # Start background download
880  MNIST_DIR="${PWD}/test/cpp/api/mnist"
881  python tools/download_mnist.py --quiet -d "${MNIST_DIR}"
882
883  if [[ "$BUILD_ENVIRONMENT" == *asan* || "$BUILD_ENVIRONMENT" == *slow-gradcheck* ]]; then
884    TEST_REPORTS_DIR=test/test-reports/cpp-unittest/test_libtorch
885    mkdir -p $TEST_REPORTS_DIR
886
887    OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" "$TORCH_BIN_DIR"/test_api --gtest_filter='-IMethodTest.*' --gtest_output=xml:$TEST_REPORTS_DIR/test_api.xml
888    "$TORCH_BIN_DIR"/test_tensorexpr --gtest_output=xml:$TEST_REPORTS_DIR/test_tensorexpr.xml
889  else
890    # Exclude IMethodTest that relies on torch::deploy, which will instead be ran in test_deploy
891    OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_api -k "not IMethodTest"
892    python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr
893  fi
894
895  if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* && "${BUILD_ENVIRONMENT}" != *asan* ]]; then
896    # NB: This test is not under TORCH_BIN_DIR but under BUILD_BIN_DIR
897    export CPP_TESTS_DIR="${BUILD_BIN_DIR}"
898    python test/run_test.py --cpp --verbose -i cpp/static_runtime_test
899  fi
900}
901
902test_xpu_bin(){
903  TEST_REPORTS_DIR=$(pwd)/test/test-reports
904  mkdir -p "$TEST_REPORTS_DIR"
905
906  for xpu_case in "${BUILD_BIN_DIR}"/*{xpu,sycl}*; do
907    if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then
908      case_name=$(basename "$xpu_case")
909      echo "Testing ${case_name} ..."
910      "$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml
911    fi
912  done
913}
914
915test_aot_compilation() {
916  echo "Testing Ahead of Time compilation"
917  ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
918  ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR"
919
920  if [ -f "$TORCH_BIN_DIR"/test_mobile_nnc ]; then
921    CPP_TESTS_DIR="${TORCH_BIN_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_mobile_nnc
922  fi
923
924  if [ -f "$TORCH_BIN_DIR"/aot_model_compiler_test ]; then
925    source test/mobile/nnc/test_aot_compile.sh
926  fi
927}
928
929test_vulkan() {
930  if [[ "$BUILD_ENVIRONMENT" == *vulkan* ]]; then
931    ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_TEST_DIR"
932    ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_TEST_DIR"
933    export VK_ICD_FILENAMES=/var/lib/jenkins/swiftshader/swiftshader/build/Linux/vk_swiftshader_icd.json
934    CPP_TESTS_DIR="${TORCH_TEST_DIR}" LD_LIBRARY_PATH=/var/lib/jenkins/swiftshader/swiftshader/build/Linux/ python test/run_test.py --cpp --verbose -i cpp/vulkan_api_test
935  fi
936}
937
938test_distributed() {
939  echo "Testing distributed python tests"
940  # shellcheck disable=SC2086
941  time python test/run_test.py --distributed-tests --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" $INCLUDE_CLAUSE --verbose
942  assert_git_not_dirty
943
944  if [[ ("$BUILD_ENVIRONMENT" == *cuda* || "$BUILD_ENVIRONMENT" == *rocm*) && "$SHARD_NUMBER" == 1 ]]; then
945    echo "Testing distributed C++ tests"
946    ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR"
947    ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
948
949    export CPP_TESTS_DIR="${TORCH_BIN_DIR}"
950    # These are distributed tests, so let's continue running them sequentially here to avoid
951    # any surprise
952    python test/run_test.py --cpp --verbose -i cpp/FileStoreTest
953    python test/run_test.py --cpp --verbose -i cpp/HashStoreTest
954    python test/run_test.py --cpp --verbose -i cpp/TCPStoreTest
955
956    if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
957      MPIEXEC=$(command -v mpiexec)
958      if [[ -n "$MPIEXEC" ]]; then
959        # NB: mpiexec only works directly with the C++ test binary here
960        MPICMD="${MPIEXEC} -np 2 $TORCH_BIN_DIR/ProcessGroupMPITest"
961        eval "$MPICMD"
962      fi
963
964      python test/run_test.py --cpp --verbose -i cpp/ProcessGroupGlooTest
965      python test/run_test.py --cpp --verbose -i cpp/ProcessGroupNCCLTest
966      python test/run_test.py --cpp --verbose -i cpp/ProcessGroupNCCLErrorsTest
967    fi
968  fi
969}
970
971test_rpc() {
972  echo "Testing RPC C++ tests"
973  # NB: the ending test_rpc must match the current function name for the current
974  # test reporting process to function as expected.
975  ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR"
976  ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
977
978  CPP_TESTS_DIR="${TORCH_BIN_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_cpp_rpc
979}
980
981test_custom_backend() {
982  echo "Testing custom backends"
983  CUSTOM_BACKEND_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-backend-build"
984  pushd test/custom_backend
985  cp -a "$CUSTOM_BACKEND_BUILD" build
986  # Run tests Python-side and export a lowered module.
987  python test_custom_backend.py -v
988  python backend.py --export-module-to=model.pt
989  # Run tests C++-side and load the exported lowered module.
990  build/test_custom_backend ./model.pt
991  rm -f ./model.pt
992  popd
993  assert_git_not_dirty
994}
995
996test_custom_script_ops() {
997  echo "Testing custom script operators"
998  CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
999  pushd test/custom_operator
1000  cp -a "$CUSTOM_OP_BUILD" build
1001  # Run tests Python-side and export a script module.
1002  python test_custom_ops.py -v
1003  python model.py --export-script-module=model.pt
1004  # Run tests C++-side and load the exported script module.
1005  build/test_custom_ops ./model.pt
1006  popd
1007  assert_git_not_dirty
1008}
1009
1010test_jit_hooks() {
1011  echo "Testing jit hooks in cpp"
1012  HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
1013  pushd test/jit_hooks
1014  cp -a "$HOOK_BUILD" build
1015  # Run tests Python-side and export the script modules with hooks
1016  python model.py --export-script-module=model
1017  # Run tests C++-side and load the exported script modules
1018  build/test_jit_hooks ./model
1019  popd
1020  assert_git_not_dirty
1021}
1022
1023test_torch_function_benchmark() {
1024  echo "Testing __torch_function__ benchmarks"
1025  pushd benchmarks/overrides_benchmark
1026  python bench.py -n 1 -m 2
1027  python pyspybench.py Tensor -n 1
1028  python pyspybench.py SubTensor -n 1
1029  python pyspybench.py WithTorchFunction -n 1
1030  python pyspybench.py SubWithTorchFunction -n 1
1031  popd
1032  assert_git_not_dirty
1033}
1034
1035build_xla() {
1036  # xla test needs pytorch headers in torch/include
1037  pushd ..
1038  python -c "import os, torch, shutil; shutil.copytree(os.path.join(os.path.dirname(torch.__file__), 'include'), 'workspace/torch/include', dirs_exist_ok=True)"
1039  popd
1040
1041  # xla test needs sccache setup.
1042  # shellcheck source=./common-build.sh
1043  source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
1044
1045  XLA_DIR=xla
1046  USE_CACHE=1
1047  clone_pytorch_xla
1048  # shellcheck disable=SC1091
1049  source "xla/.circleci/common.sh"
1050
1051  # TODO: The torch pin #73164 is involved in the sev https://github.com/pytorch/pytorch/issues/86093
1052  # so this is temporarily removed until XLA fixes the weird logic in https://github.com/pytorch/xla/blob/master/scripts/apply_patches.sh#L17-L18
1053  rm "${XLA_DIR}/torch_patches/.torch_pin" || true
1054
1055  apply_patches
1056  SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
1057  # These functions are defined in .circleci/common.sh in pytorch/xla repo
1058  retry install_deps_pytorch_xla $XLA_DIR $USE_CACHE
1059  CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SANDBOX_BUILD=1 build_torch_xla $XLA_DIR
1060  assert_git_not_dirty
1061}
1062
1063test_xla() {
1064  # xla test needs sccache setup.
1065  # shellcheck source=./common-build.sh
1066  source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
1067
1068  clone_pytorch_xla
1069  # shellcheck disable=SC1091
1070  source "./xla/.circleci/common.sh"
1071  SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
1072  # Set LD_LIBRARY_PATH for C++ tests
1073  export LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}"
1074  CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SKIP_MP_OP_TESTS=1 XLA_SKIP_XLA_OP_TESTS=1 run_torch_xla_tests "$(pwd)" "$(pwd)/xla"
1075  assert_git_not_dirty
1076}
1077
1078function check_public_api_test_fails {
1079    test_name=$1
1080    invalid_item_name=$2
1081    invalid_item_desc=$3
1082
1083    echo "Running public API test '${test_name}'..."
1084    test_output=$(python test/test_public_bindings.py -k "${test_name}" 2>&1) && ret=$? || ret=$?
1085
1086    # Ensure test fails correctly.
1087    if [ "$ret" -eq 0 ]; then
1088        cat << EOF
1089Expected the public API test '${test_name}' to fail after introducing
1090${invalid_item_desc}, but it succeeded! Check test/test_public_bindings.py
1091for any changes that may have broken the test.
1092EOF
1093        return 1
1094    fi
1095
1096    # Ensure invalid item is in the test output.
1097    echo "${test_output}" | grep -q "${invalid_item_name}" && ret=$? || ret=$?
1098
1099    if [ $ret -ne 0 ]; then
1100        cat << EOF
1101Expected the public API test '${test_name}' to identify ${invalid_item_desc}, but
1102it didn't! It's possible the test may not have run. Check test/test_public_bindings.py
1103for any changes that may have broken the test.
1104EOF
1105        return 1
1106    fi
1107
1108    echo "Success! '${test_name}' identified ${invalid_item_desc} ${invalid_item_name}."
1109    return 0
1110}
1111
1112# Do NOT run this test before any other tests, like test_python_shard, etc.
1113# Because this function uninstalls the torch built from branch and installs
1114# the torch built on its base commit.
1115test_forward_backward_compatibility() {
1116  set -x
1117
1118  # First, validate public API tests in the torch built from branch.
1119  # Step 1. Make sure the public API test "test_correct_module_names" fails when a new file
1120  # introduces an invalid public API function.
1121  new_filename=$(mktemp XXXXXXXX.py -p "${TORCH_INSTALL_DIR}")
1122
1123  BAD_PUBLIC_FUNC=$(
1124  cat << 'EOF'
1125def new_public_func():
1126  pass
1127
1128# valid public API functions have __module__ set correctly
1129new_public_func.__module__ = None
1130EOF
1131  )
1132
1133  echo "${BAD_PUBLIC_FUNC}" >> "${new_filename}"
1134  invalid_api="torch.$(basename -s '.py' "${new_filename}").new_public_func"
1135  echo "Created an invalid public API function ${invalid_api}..."
1136
1137  check_public_api_test_fails \
1138      "test_correct_module_names" \
1139      "${invalid_api}" \
1140      "an invalid public API function" && ret=$? || ret=$?
1141
1142  rm -v "${new_filename}"
1143
1144  if [ "$ret" -ne 0 ]; then
1145      exit 1
1146  fi
1147
1148  # Step 2. Make sure that the public API test "test_correct_module_names" fails when an existing
1149  # file is modified to introduce an invalid public API function.
1150  EXISTING_FILEPATH="${TORCH_INSTALL_DIR}/nn/parameter.py"
1151  cp -v "${EXISTING_FILEPATH}" "${EXISTING_FILEPATH}.orig"
1152  echo "${BAD_PUBLIC_FUNC}" >> "${EXISTING_FILEPATH}"
1153  invalid_api="torch.nn.parameter.new_public_func"
1154  echo "Appended an invalid public API function to existing file ${EXISTING_FILEPATH}..."
1155
1156  check_public_api_test_fails \
1157      "test_correct_module_names" \
1158      "${invalid_api}" \
1159      "an invalid public API function" && ret=$? || ret=$?
1160
1161  mv -v "${EXISTING_FILEPATH}.orig" "${EXISTING_FILEPATH}"
1162
1163  if [ "$ret" -ne 0 ]; then
1164      exit 1
1165  fi
1166
1167  # Step 3. Make sure that the public API test "test_modules_can_be_imported" fails when a module
1168  # cannot be imported.
1169  new_module_dir=$(mktemp XXXXXXXX -d -p "${TORCH_INSTALL_DIR}")
1170  echo "invalid syntax garbage" > "${new_module_dir}/__init__.py"
1171  invalid_module_name="torch.$(basename "${new_module_dir}")"
1172
1173  check_public_api_test_fails \
1174      "test_modules_can_be_imported" \
1175      "${invalid_module_name}" \
1176      "a non-importable module" && ret=$? || ret=$?
1177
1178  rm -rv "${new_module_dir}"
1179
1180  if [ "$ret" -ne 0 ]; then
1181      exit 1
1182  fi
1183
1184  # Next, build torch from the merge base.
1185  REPO_DIR=$(pwd)
1186  if [[ "${BASE_SHA}" == "${SHA1}" ]]; then
1187    echo "On trunk, we should compare schemas with torch built from the parent commit"
1188    SHA_TO_COMPARE=$(git rev-parse "${SHA1}"^)
1189  else
1190    echo "On pull, we should compare schemas with torch built from the merge base"
1191    SHA_TO_COMPARE=$(git merge-base "${SHA1}" "${BASE_SHA}")
1192  fi
1193  export SHA_TO_COMPARE
1194
1195  # create a dummy ts model at this version
1196  python test/create_dummy_torchscript_model.py /tmp/model_new.pt
1197  python -m venv venv
1198  # shellcheck disable=SC1091
1199  . venv/bin/activate
1200
1201  # build torch at the base commit to generate a base function schema for comparison
1202  git reset --hard "${SHA_TO_COMPARE}"
1203  git submodule sync && git submodule update --init --recursive
1204  echo "::group::Installing Torch From Base Commit"
1205  pip install -r requirements.txt
1206  # shellcheck source=./common-build.sh
1207  source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
1208  python setup.py bdist_wheel --bdist-dir="base_bdist_tmp" --dist-dir="base_dist"
1209  python -mpip install base_dist/*.whl
1210  echo "::endgroup::"
1211
1212  pushd test/forward_backward_compatibility
1213  pip show torch
1214  python dump_all_function_schemas.py --filename nightly_schemas.txt
1215
1216  git reset --hard "${SHA1}"
1217  git submodule sync && git submodule update --init --recursive
1218  # FC: verify new model can be load with old code.
1219  if ! python ../load_torchscript_model.py /tmp/model_new.pt; then
1220      echo "FC check failed: new model cannot be load in old code"
1221      return 1
1222  fi
1223  python ../create_dummy_torchscript_model.py /tmp/model_old.pt
1224  deactivate
1225  rm -r "${REPO_DIR}/venv" "${REPO_DIR}/base_dist"
1226  pip show torch
1227  python check_forward_backward_compatibility.py --existing-schemas nightly_schemas.txt
1228  # BC: verify old model can be load with new code
1229  if ! python ../load_torchscript_model.py /tmp/model_old.pt; then
1230      echo "BC check failed: old model cannot be load in new code"
1231      return 1
1232  fi
1233  popd
1234  set +x
1235  assert_git_not_dirty
1236}
1237
1238test_bazel() {
1239  set -e
1240
1241  # bazel test needs sccache setup.
1242  # shellcheck source=./common-build.sh
1243  source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
1244
1245  get_bazel
1246
1247  if [[ "$CUDA_VERSION" == "cpu" ]]; then
1248    # Test //c10/... without Google flags and logging libraries. The
1249    # :all_tests target in the subsequent Bazel invocation tests
1250    # //c10/... with the Google libraries.
1251    tools/bazel test --config=cpu-only --test_timeout=480 --test_output=all --test_tag_filters=-gpu-required --test_filter=-*CUDA \
1252      --no//c10:use_gflags --no//c10:use_glog //c10/...
1253
1254    tools/bazel test --config=cpu-only --test_timeout=480 --test_output=all --test_tag_filters=-gpu-required --test_filter=-*CUDA :all_tests
1255  else
1256    # Increase the test timeout to 480 like CPU tests because modules_test frequently timeout
1257    tools/bazel test --test_timeout=480 --test_output=errors \
1258      //:any_test \
1259      //:autograd_test \
1260      //:dataloader_test \
1261      //:dispatch_test \
1262      //:enum_test \
1263      //:expanding_array_test \
1264      //:fft_test \
1265      //:functional_test \
1266      //:grad_mode_test \
1267      //:inference_mode_test \
1268      //:init_test \
1269      //:jit_test \
1270      //:memory_test \
1271      //:meta_tensor_test \
1272      //:misc_test \
1273      //:moduledict_test \
1274      //:modulelist_test \
1275      //:modules_test \
1276      //:namespace_test \
1277      //:nested_test \
1278      //:nn_utils_test \
1279      //:operations_test \
1280      //:ordered_dict_test \
1281      //:parallel_benchmark_test \
1282      //:parameterdict_test \
1283      //:parameterlist_test \
1284      //:sequential_test \
1285      //:serialize_test \
1286      //:special_test \
1287      //:static_test \
1288      //:support_test \
1289      //:tensor_flatten_test \
1290      //:tensor_indexing_test \
1291      //:tensor_options_cuda_test \
1292      //:tensor_options_test \
1293      //:tensor_test \
1294      //:torch_dist_autograd_test \
1295      //:torch_include_test \
1296      //:transformer_test \
1297      //:test_bazel \
1298      //c10/cuda/test:test \
1299      //c10/test:core_tests \
1300      //c10/test:typeid_test \
1301      //c10/test:util/ssize_test \
1302      //c10/test:util_base_tests
1303  fi
1304}
1305
1306test_benchmarks() {
1307  if [[ "$BUILD_ENVIRONMENT" == *cuda* && $TEST_CONFIG != *nogpu* ]]; then
1308    pip_install --user "pytest-benchmark==3.2.3"
1309    pip_install --user "requests"
1310    BENCHMARK_DATA="benchmarks/.data"
1311    mkdir -p ${BENCHMARK_DATA}
1312    pytest benchmarks/fastrnns/test_bench.py --benchmark-sort=Name --benchmark-json=${BENCHMARK_DATA}/fastrnns_default.json --fuser=default --executor=default
1313    pytest benchmarks/fastrnns/test_bench.py --benchmark-sort=Name --benchmark-json=${BENCHMARK_DATA}/fastrnns_legacy_old.json --fuser=old --executor=legacy
1314    pytest benchmarks/fastrnns/test_bench.py --benchmark-sort=Name --benchmark-json=${BENCHMARK_DATA}/fastrnns_profiling_te.json --fuser=te --executor=profiling
1315    # TODO: Enable these for GHA once we have credentials for forked pull requests
1316    if [[ -z "${GITHUB_ACTIONS}" ]]; then
1317      python benchmarks/upload_scribe.py --pytest_bench_json ${BENCHMARK_DATA}/fastrnns_default.json
1318      python benchmarks/upload_scribe.py --pytest_bench_json ${BENCHMARK_DATA}/fastrnns_legacy_old.json
1319      python benchmarks/upload_scribe.py --pytest_bench_json ${BENCHMARK_DATA}/fastrnns_profiling_te.json
1320    fi
1321    assert_git_not_dirty
1322  fi
1323}
1324
1325test_cpp_extensions() {
1326  # This is to test whether cpp extension build is compatible with current env. No need to test both ninja and no-ninja build
1327  time python test/run_test.py --include test_cpp_extensions_aot_ninja --verbose
1328  assert_git_not_dirty
1329}
1330
1331test_vec256() {
1332  # This is to test vec256 instructions DEFAULT/AVX/AVX2 (platform dependent, some platforms might not support AVX/AVX2)
1333  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
1334    echo "Testing vec256 instructions"
1335    mkdir -p test/test-reports/vec256
1336    pushd build/bin
1337    vec256_tests=$(find . -maxdepth 1 -executable -name 'vec256_test*')
1338    for vec256_exec in $vec256_tests
1339    do
1340      $vec256_exec --gtest_output=xml:test/test-reports/vec256/"$vec256_exec".xml
1341    done
1342    popd
1343    assert_git_not_dirty
1344  fi
1345}
1346
1347test_docs_test() {
1348  .ci/pytorch/docs-test.sh
1349}
1350
1351test_executorch() {
1352  echo "Install torchvision and torchaudio"
1353  install_torchvision
1354  install_torchaudio
1355
1356  pushd /executorch
1357
1358  export PYTHON_EXECUTABLE=python
1359  export EXECUTORCH_BUILD_PYBIND=ON
1360  export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
1361
1362  # NB: We need to rebuild ExecuTorch runner here because it depends on PyTorch
1363  # from the PR
1364  # shellcheck disable=SC1091
1365  source .ci/scripts/setup-linux.sh cmake
1366
1367  echo "Run ExecuTorch unit tests"
1368  pytest -v -n auto
1369  # shellcheck disable=SC1091
1370  LLVM_PROFDATA=llvm-profdata-12 LLVM_COV=llvm-cov-12 bash test/run_oss_cpp_tests.sh
1371
1372  echo "Run ExecuTorch regression tests for some models"
1373  # TODO(huydhn): Add more coverage here using ExecuTorch's gather models script
1374  # shellcheck disable=SC1091
1375  source .ci/scripts/test.sh mv3 cmake xnnpack-quantization-delegation ''
1376
1377  popd
1378
1379  # Test torchgen generated code for Executorch.
1380  echo "Testing ExecuTorch op registration"
1381  "$BUILD_BIN_DIR"/test_edge_op_registration
1382
1383  assert_git_not_dirty
1384}
1385
1386test_linux_aarch64(){
1387  python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
1388       test_transformers test_multiprocessing test_numpy_interop --verbose
1389
1390  # Dynamo tests
1391  python test/run_test.py --include dynamo/test_compile dynamo/test_backends dynamo/test_comptime dynamo/test_config \
1392       dynamo/test_functions dynamo/test_fx_passes_pre_grad dynamo/test_interop dynamo/test_model_output dynamo/test_modules \
1393       dynamo/test_optimizers dynamo/test_recompile_ux dynamo/test_recompiles --verbose
1394
1395  # Inductor tests
1396  python test/run_test.py --include inductor/test_torchinductor inductor/test_benchmark_fusion inductor/test_codecache \
1397       inductor/test_config inductor/test_control_flow inductor/test_coordinate_descent_tuner inductor/test_fx_fusion \
1398       inductor/test_group_batch_fusion inductor/test_inductor_freezing inductor/test_inductor_utils \
1399       inductor/test_inplacing_pass inductor/test_kernel_benchmark inductor/test_layout_optim \
1400       inductor/test_max_autotune inductor/test_memory_planning inductor/test_metrics inductor/test_multi_kernel inductor/test_pad_mm \
1401       inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \
1402       inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \
1403       inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes --verbose
1404}
1405
1406if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
1407  (cd test && python -c "import torch; print(torch.__config__.show())")
1408  (cd test && python -c "import torch; print(torch.__config__.parallel_info())")
1409fi
1410if [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" != *perf_cpu_aarch64* ]]; then
1411  test_linux_aarch64
1412elif [[ "${TEST_CONFIG}" == *backward* ]]; then
1413  test_forward_backward_compatibility
1414  # Do NOT add tests after bc check tests, see its comment.
1415elif [[ "${TEST_CONFIG}" == *xla* ]]; then
1416  install_torchvision
1417  build_xla
1418  test_xla
1419elif [[ "${TEST_CONFIG}" == *executorch* ]]; then
1420  test_executorch
1421elif [[ "$TEST_CONFIG" == 'jit_legacy' ]]; then
1422  test_python_legacy_jit
1423elif [[ "${BUILD_ENVIRONMENT}" == *libtorch* ]]; then
1424  # TODO: run some C++ tests
1425  echo "no-op at the moment"
1426elif [[ "$TEST_CONFIG" == distributed ]]; then
1427  test_distributed
1428  # Only run RPC C++ tests on the first shard
1429  if [[ "${SHARD_NUMBER}" == 1 ]]; then
1430    test_rpc
1431  fi
1432elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
1433  test_inductor_distributed
1434elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
1435  test_inductor_halide
1436elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
1437  test_inductor_micro_benchmark
1438elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
1439  install_torchvision
1440  id=$((SHARD_NUMBER-1))
1441  test_dynamo_benchmark huggingface "$id"
1442elif [[ "${TEST_CONFIG}" == *timm* ]]; then
1443  install_torchvision
1444  id=$((SHARD_NUMBER-1))
1445  test_dynamo_benchmark timm_models "$id"
1446elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
1447  if [[ "${TEST_CONFIG}" == *cpu* ]]; then
1448    install_torchaudio cpu
1449  else
1450    install_torchaudio cuda
1451  fi
1452  install_torchtext
1453  install_torchvision
1454  TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install git+https://github.com/pytorch/ao.git
1455  id=$((SHARD_NUMBER-1))
1456  # https://github.com/opencv/opencv-python/issues/885
1457  pip_install opencv-python==4.8.0.74
1458  if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
1459    checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer
1460    PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
1461  elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
1462    checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \
1463      llama_v2_7b_16h resnet50 timm_efficientnet mobilenet_v3_large timm_resnest \
1464      functorch_maml_omniglot yolov3 mobilenet_v2 resnext50_32x4d densenet121 mnasnet1_0
1465    PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_cpu_smoketest_perf
1466  elif [[ "${TEST_CONFIG}" == *torchbench_gcp_smoketest* ]]; then
1467    checkout_install_torchbench
1468    TORCHBENCHPATH=$(pwd)/torchbench test_torchbench_gcp_smoketest
1469  else
1470    checkout_install_torchbench
1471    # Do this after checkout_install_torchbench to ensure we clobber any
1472    # nightlies that torchbench may pull in
1473    if [[ "${TEST_CONFIG}" != *cpu* ]]; then
1474      install_torchrec_and_fbgemm
1475    fi
1476    PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"
1477  fi
1478elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper_abi_compatible* ]]; then
1479  install_torchvision
1480  test_inductor_cpp_wrapper_abi_compatible
1481elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
1482  install_torchvision
1483  test_inductor_shard "${SHARD_NUMBER}"
1484  if [[ "${SHARD_NUMBER}" == 1 ]]; then
1485    if [[ "${BUILD_ENVIRONMENT}" != linux-jammy-py3.9-gcc11-build ]]; then
1486      test_inductor_distributed
1487    fi
1488  fi
1489elif [[ "${TEST_CONFIG}" == *dynamo* ]]; then
1490  install_torchvision
1491  test_dynamo_shard "${SHARD_NUMBER}"
1492  if [[ "${SHARD_NUMBER}" == 1 ]]; then
1493    test_aten
1494  fi
1495elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then
1496  install_torchvision
1497  test_python_shard "$SHARD_NUMBER"
1498  test_aten
1499elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
1500  test_without_numpy
1501  install_torchvision
1502  test_python_shard 1
1503  test_aten
1504  test_libtorch 1
1505  if [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then
1506    test_xpu_bin
1507  fi
1508elif [[ "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
1509  install_torchvision
1510  test_python_shard 2
1511  test_libtorch 2
1512  test_aot_compilation
1513  test_custom_script_ops
1514  test_custom_backend
1515  test_torch_function_benchmark
1516elif [[ "${SHARD_NUMBER}" -gt 2 ]]; then
1517  # Handle arbitrary number of shards
1518  install_torchvision
1519  test_python_shard "$SHARD_NUMBER"
1520elif [[ "${BUILD_ENVIRONMENT}" == *vulkan* ]]; then
1521  test_vulkan
1522elif [[ "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
1523  test_bazel
1524elif [[ "${BUILD_ENVIRONMENT}" == *-mobile-lightweight-dispatch* ]]; then
1525  test_libtorch
1526elif [[ "${TEST_CONFIG}" = docs_test ]]; then
1527  test_docs_test
1528elif [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then
1529  install_torchvision
1530  test_python
1531  test_aten
1532  test_xpu_bin
1533else
1534  install_torchvision
1535  install_monkeytype
1536  test_python
1537  test_aten
1538  test_vec256
1539  test_libtorch
1540  test_aot_compilation
1541  test_custom_script_ops
1542  test_custom_backend
1543  test_torch_function_benchmark
1544  test_benchmarks
1545fi
1546