1#!/bin/bash 2 3# Required environment variable: $BUILD_ENVIRONMENT 4# (This is set by default in the Docker images we build, so you don't 5# need to set it yourself. 6 7set -ex 8 9# Suppress ANSI color escape sequences 10export TERM=vt100 11 12# shellcheck source=./common.sh 13source "$(dirname "${BASH_SOURCE[0]}")/common.sh" 14 15# Do not change workspace permissions for ROCm CI jobs 16# as it can leave workspace with bad permissions for cancelled jobs 17if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then 18 # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96) 19 WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace") 20 cleanup_workspace() { 21 echo "sudo may print the following warning message that can be ignored. The chown command will still run." 22 echo " sudo: setrlimit(RLIMIT_STACK): Operation not permitted" 23 echo "For more details refer to https://github.com/sudo-project/sudo/issues/42" 24 sudo chown -R "$WORKSPACE_ORIGINAL_OWNER_ID" /var/lib/jenkins/workspace 25 } 26 # Disable shellcheck SC2064 as we want to parse the original owner immediately. 27 # shellcheck disable=SC2064 28 trap_add cleanup_workspace EXIT 29 sudo chown -R jenkins /var/lib/jenkins/workspace 30 git config --global --add safe.directory /var/lib/jenkins/workspace 31fi 32 33echo "Environment variables:" 34env 35 36TORCH_INSTALL_DIR=$(python -c "import site; print(site.getsitepackages()[0])")/torch 37TORCH_BIN_DIR="$TORCH_INSTALL_DIR"/bin 38TORCH_LIB_DIR="$TORCH_INSTALL_DIR"/lib 39TORCH_TEST_DIR="$TORCH_INSTALL_DIR"/test 40 41BUILD_DIR="build" 42BUILD_RENAMED_DIR="build_renamed" 43BUILD_BIN_DIR="$BUILD_DIR"/bin 44 45#Set Default values for these variables in case they are not set 46SHARD_NUMBER="${SHARD_NUMBER:=1}" 47NUM_TEST_SHARDS="${NUM_TEST_SHARDS:=1}" 48 49export VALGRIND=ON 50# export TORCH_INDUCTOR_INSTALL_GXX=ON 51if [[ "$BUILD_ENVIRONMENT" == *clang9* ]]; then 52 # clang9 appears to miscompile code involving c10::optional<c10::SymInt>, 53 # such that valgrind complains along these lines: 54 # 55 # Conditional jump or move depends on uninitialised value(s) 56 # at 0x40303A: ~optional_base (Optional.h:281) 57 # by 0x40303A: call (Dispatcher.h:448) 58 # by 0x40303A: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, c10::optional<c10::SymInt>) (basic.cpp:10) 59 # by 0x403700: main (basic.cpp:16) 60 # Uninitialised value was created by a stack allocation 61 # at 0x402AAA: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, c10::optional<c10::SymInt>) (basic.cpp:6) 62 # 63 # The problem does not appear with gcc or newer versions of clang (we tested 64 # clang14). So we suppress valgrind testing for clang9 specifically. 65 # You may need to suppress it for other versions of clang if they still have 66 # the bug. 67 # 68 # A minimal repro for the valgrind error is below: 69 # 70 # #include <ATen/ATen.h> 71 # #include <ATen/core/dispatch/Dispatcher.h> 72 # 73 # using namespace at; 74 # 75 # Tensor call(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional<c10::SymInt> storage_offset) { 76 # auto op = c10::Dispatcher::singleton() 77 # .findSchemaOrThrow(at::_ops::as_strided::name, at::_ops::as_strided::overload_name) 78 # .typed<at::_ops::as_strided::schema>(); 79 # return op.call(self, size, stride, storage_offset); 80 # } 81 # 82 # int main(int argv) { 83 # Tensor b = empty({3, 4}); 84 # auto z = call(b, b.sym_sizes(), b.sym_strides(), c10::nullopt); 85 # } 86 export VALGRIND=OFF 87fi 88 89if [[ "${PYTORCH_TEST_RERUN_DISABLED_TESTS}" == "1" ]] || [[ "${CONTINUE_THROUGH_ERROR}" == "1" ]]; then 90 # When rerunning disable tests, do not generate core dumps as it could consume 91 # the runner disk space when crashed tests are run multiple times. Running out 92 # of space is a nasty issue because there is no space left to even download the 93 # GHA to clean up the disk 94 # 95 # We also want to turn off core dump when CONTINUE_THROUGH_ERROR is set as there 96 # is a small risk of having multiple core files generated. Arguably, they are not 97 # that useful in this case anyway and the test will still continue 98 ulimit -c 0 99 100 # Note that by piping the core dump to a script set in /proc/sys/kernel/core_pattern 101 # as documented in https://man7.org/linux/man-pages/man5/core.5.html, we could 102 # dynamically stop generating more core file when the disk space drops below a 103 # certain threshold. However, this is not supported inside Docker container atm 104fi 105 106# Get fully qualified path using realpath 107if [[ "$BUILD_ENVIRONMENT" != *bazel* ]]; then 108 CUSTOM_TEST_ARTIFACT_BUILD_DIR=$(realpath "${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-"build/custom_test_artifacts"}") 109fi 110 111# Reduce set of tests to include when running run_test.py 112if [[ -n $TESTS_TO_INCLUDE ]]; then 113 echo "Setting INCLUDE_CLAUSE" 114 INCLUDE_CLAUSE="--include $TESTS_TO_INCLUDE" 115fi 116 117echo "Environment variables" 118env 119 120echo "Testing pytorch" 121 122export LANG=C.UTF-8 123 124PR_NUMBER=${PR_NUMBER:-${CIRCLE_PR_NUMBER:-}} 125 126if [[ "$TEST_CONFIG" == 'default' ]]; then 127 export CUDA_VISIBLE_DEVICES=0 128 export HIP_VISIBLE_DEVICES=0 129fi 130 131if [[ "$TEST_CONFIG" == 'distributed' ]] && [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then 132 export HIP_VISIBLE_DEVICES=0,1 133fi 134 135if [[ "$TEST_CONFIG" == 'slow' ]]; then 136 export PYTORCH_TEST_WITH_SLOW=1 137 export PYTORCH_TEST_SKIP_FAST=1 138fi 139 140if [[ "$BUILD_ENVIRONMENT" == *slow-gradcheck* ]]; then 141 export PYTORCH_TEST_WITH_SLOW_GRADCHECK=1 142 # TODO: slow gradcheck tests run out of memory a lot recently, so setting this 143 # to run them sequentially with only one process to mitigate the issue 144 export PYTORCH_TEST_CUDA_MEM_LEAK_CHECK=1 145fi 146 147if [[ "$BUILD_ENVIRONMENT" == *cuda* || "$BUILD_ENVIRONMENT" == *rocm* ]]; then 148 # Used so that only cuda/rocm specific versions of tests are generated 149 # mainly used so that we're not spending extra cycles testing cpu 150 # devices on expensive gpu machines 151 export PYTORCH_TESTING_DEVICE_ONLY_FOR="cuda" 152elif [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then 153 export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu" 154 # setting PYTHON_TEST_EXTRA_OPTION 155 export PYTHON_TEST_EXTRA_OPTION="--xpu" 156fi 157 158if [[ "$TEST_CONFIG" == *crossref* ]]; then 159 export PYTORCH_TEST_WITH_CROSSREF=1 160fi 161 162if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then 163 # regression in ROCm 6.0 on MI50 CI runners due to hipblaslt; remove in 6.1 164 export VALGRIND=OFF 165 # Print GPU info 166 rocminfo 167 rocminfo | grep -E 'Name:.*\sgfx|Marketing' 168fi 169 170if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then 171 # Source Intel oneAPI envrioment script to enable xpu runtime related libraries 172 # refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-5.html 173 # shellcheck disable=SC1091 174 source /opt/intel/oneapi/compiler/latest/env/vars.sh 175 # Check XPU status before testing 176 xpu-smi discovery 177fi 178 179if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then 180 # JIT C++ extensions require ninja. 181 pip_install --user "ninja==1.10.2" 182 # ninja is installed in $HOME/.local/bin, e.g., /var/lib/jenkins/.local/bin for CI user jenkins 183 # but this script should be runnable by any user, including root 184 export PATH="$HOME/.local/bin:$PATH" 185fi 186 187if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then 188 # TODO: revisit this once the CI is stabilized on aarch64 linux 189 export VALGRIND=OFF 190fi 191 192install_tlparse 193 194# DANGER WILL ROBINSON. The LD_PRELOAD here could cause you problems 195# if you're not careful. Check this if you made some changes and the 196# ASAN test is not working 197if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then 198 export ASAN_OPTIONS=detect_leaks=0:symbolize=1:detect_stack_use_after_return=true:strict_init_order=true:detect_odr_violation=1:detect_container_overflow=0:check_initialization_order=true:debug=true 199 export UBSAN_OPTIONS=print_stacktrace=1:suppressions=$PWD/ubsan.supp 200 export PYTORCH_TEST_WITH_ASAN=1 201 export PYTORCH_TEST_WITH_UBSAN=1 202 # TODO: Figure out how to avoid hard-coding these paths 203 export ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-15/bin/llvm-symbolizer 204 export TORCH_USE_RTLD_GLOBAL=1 205 # NB: We load libtorch.so with RTLD_GLOBAL for UBSAN, unlike our 206 # default behavior. 207 # 208 # The reason for this is that without RTLD_GLOBAL, if we load multiple 209 # libraries that depend on libtorch (as is the case with C++ extensions), we 210 # will get multiple copies of libtorch in our address space. When UBSAN is 211 # turned on, it will do a bunch of virtual pointer consistency checks which 212 # won't work correctly. When this happens, you get a violation like: 213 # 214 # member call on address XXXXXX which does not point to an object of 215 # type 'std::_Sp_counted_base<__gnu_cxx::_Lock_policy::_S_atomic>' 216 # XXXXXX note: object is of type 217 # 'std::_Sp_counted_ptr<torch::nn::LinearImpl*, (__gnu_cxx::_Lock_policy)2>' 218 # 219 # (NB: the textual types of the objects here are misleading, because 220 # they actually line up; it just so happens that there's two copies 221 # of the type info floating around in the address space, so they 222 # don't pointer compare equal. See also 223 # https://github.com/google/sanitizers/issues/1175 224 # 225 # UBSAN is kind of right here: if we relied on RTTI across C++ extension 226 # modules they would indeed do the wrong thing; but in our codebase, we 227 # don't use RTTI (because it doesn't work in mobile). To appease 228 # UBSAN, however, it's better if we ensure all the copies agree! 229 # 230 # By the way, an earlier version of this code attempted to load 231 # libtorch_python.so with LD_PRELOAD, which has a similar effect of causing 232 # it to be loaded globally. This isn't really a good idea though, because 233 # it depends on a ton of dynamic libraries that most programs aren't gonna 234 # have, and it applies to child processes. 235 236 # TODO: get rid of the hardcoded path 237 export LD_PRELOAD=/usr/lib/llvm-15/lib/clang/15.0.7/lib/linux/libclang_rt.asan-x86_64.so 238 # Disable valgrind for asan 239 export VALGRIND=OFF 240 241 (cd test && python -c "import torch; print(torch.__version__, torch.version.git_version)") 242 echo "The next four invocations are expected to crash; if they don't that means ASAN/UBSAN is misconfigured" 243 (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_csrc_asan(3)") 244 #(cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_csrc_ubsan(0)") 245 (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_vptr_ubsan()") 246 (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_aten_asan(3)") 247fi 248 249# The torch._C._crash_if_debug_asserts_fail() function should only fail if both of the following are true: 250# 1. The build is in debug mode 251# 2. The value 424242 is passed in 252# This tests that the debug asserts are working correctly. 253if [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then 254 echo "We are in debug mode: $BUILD_ENVIRONMENT. Expect the python assertion to fail" 255 (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_debug_asserts_fail(424242)") 256elif [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then 257 # Noop when debug is disabled. Skip bazel jobs because torch isn't available there yet. 258 echo "We are not in debug mode: $BUILD_ENVIRONMENT. Expect the assertion to pass" 259 (cd test && python -c "import torch; torch._C._crash_if_debug_asserts_fail(424242)") 260fi 261 262if [[ $TEST_CONFIG == 'nogpu_NO_AVX2' ]]; then 263 export ATEN_CPU_CAPABILITY=default 264elif [[ $TEST_CONFIG == 'nogpu_AVX512' ]]; then 265 export ATEN_CPU_CAPABILITY=avx2 266fi 267 268test_python_legacy_jit() { 269 time python test/run_test.py --include test_jit_legacy test_jit_fuser_legacy --verbose 270 assert_git_not_dirty 271} 272 273test_python_shard() { 274 if [[ -z "$NUM_TEST_SHARDS" ]]; then 275 echo "NUM_TEST_SHARDS must be defined to run a Python test shard" 276 exit 1 277 fi 278 279 # Bare --include flag is not supported and quoting for lint ends up with flag not being interpreted correctly 280 # shellcheck disable=SC2086 281 282 # modify LD_LIBRARY_PATH to ensure it has the conda env. 283 # This set of tests has been shown to be buggy without it for the split-build 284 time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose $PYTHON_TEST_EXTRA_OPTION 285 286 assert_git_not_dirty 287} 288 289test_python() { 290 # shellcheck disable=SC2086 291 time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --verbose $PYTHON_TEST_EXTRA_OPTION 292 assert_git_not_dirty 293} 294 295 296test_dynamo_shard() { 297 if [[ -z "$NUM_TEST_SHARDS" ]]; then 298 echo "NUM_TEST_SHARDS must be defined to run a Python test shard" 299 exit 1 300 fi 301 python tools/dynamo/verify_dynamo.py 302 # PLEASE DO NOT ADD ADDITIONAL EXCLUDES HERE. 303 # Instead, use @skipIfTorchDynamo on your tests. 304 time python test/run_test.py --dynamo \ 305 --exclude-inductor-tests \ 306 --exclude-jit-executor \ 307 --exclude-distributed-tests \ 308 --exclude-torch-export-tests \ 309 --shard "$1" "$NUM_TEST_SHARDS" \ 310 --verbose 311 assert_git_not_dirty 312} 313 314test_inductor_distributed() { 315 # Smuggle a few multi-gpu tests here so that we don't have to request another large node 316 echo "Testing multi_gpu tests in test_torchinductor" 317 python test/run_test.py -i inductor/test_torchinductor.py -k test_multi_gpu --verbose 318 python test/run_test.py -i inductor/test_aot_inductor.py -k test_non_default_cuda_device --verbose 319 python test/run_test.py -i inductor/test_aot_inductor.py -k test_replicate_on_devices --verbose 320 python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose 321 python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose 322 python test/run_test.py -i distributed/tensor/parallel/test_micro_pipeline_tp.py --verbose 323 python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose 324 python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group --verbose 325 python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing --verbose 326 python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_hsdp --verbose 327 python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_2d_transformer_checkpoint_resume --verbose 328 python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_gradient_accumulation --verbose 329 python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_state_dict.py -k test_dp_state_dict_save_load --verbose 330 python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_frozen.py --verbose 331 python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype --verbose 332 python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype --verbose 333 python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py -k test_clip_grad_norm_2d --verbose 334 python test/run_test.py -i distributed/fsdp/test_fsdp_tp_integration.py -k test_fsdp_tp_integration --verbose 335 336 # this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported 337 # with if required # gpus aren't available 338 python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives --verbose 339 assert_git_not_dirty 340} 341 342test_inductor_shard() { 343 if [[ -z "$NUM_TEST_SHARDS" ]]; then 344 echo "NUM_TEST_SHARDS must be defined to run a Python test shard" 345 exit 1 346 fi 347 348 python tools/dynamo/verify_dynamo.py 349 python test/run_test.py --inductor \ 350 --include test_modules test_ops test_ops_gradients test_torch \ 351 --shard "$1" "$NUM_TEST_SHARDS" \ 352 --verbose 353 354 # Do not add --inductor for the following inductor unit tests, otherwise we will fail because of nested dynamo state 355 python test/run_test.py \ 356 --include inductor/test_torchinductor inductor/test_torchinductor_opinfo inductor/test_aot_inductor \ 357 --shard "$1" "$NUM_TEST_SHARDS" \ 358 --verbose 359} 360 361test_inductor_aoti() { 362 # docker build uses bdist_wheel which does not work with test_aot_inductor 363 # TODO: need a faster way to build 364 if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then 365 # We need to hipify before building again 366 python3 tools/amd_build/build_amd.py 367 fi 368 BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop 369 CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference 370} 371 372test_inductor_cpp_wrapper_abi_compatible() { 373 export TORCHINDUCTOR_ABI_COMPATIBLE=1 374 TEST_REPORTS_DIR=$(pwd)/test/test-reports 375 mkdir -p "$TEST_REPORTS_DIR" 376 377 echo "Testing Inductor cpp wrapper mode with TORCHINDUCTOR_ABI_COMPATIBLE=1" 378 # cpu stack allocation causes segfault and needs more investigation 379 PYTORCH_TESTING_DEVICE_ONLY_FOR="" python test/run_test.py --include inductor/test_cpu_cpp_wrapper 380 python test/run_test.py --include inductor/test_cuda_cpp_wrapper 381 382 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \ 383 --training --inductor --disable-cudagraphs --only vit_base_patch16_224 \ 384 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" 385 python benchmarks/dynamo/check_accuracy.py \ 386 --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \ 387 --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv" 388} 389 390# "Global" flags for inductor benchmarking controlled by TEST_CONFIG 391# For example 'dynamic_aot_eager_torchbench' TEST_CONFIG means we run 392# the benchmark script with '--dynamic-shapes --backend aot_eager --device cuda' 393# The matrix of test options is specified in .github/workflows/inductor.yml, 394# .github/workflows/inductor-periodic.yml, and 395# .github/workflows/inductor-perf-test-nightly.yml 396DYNAMO_BENCHMARK_FLAGS=() 397 398pr_time_benchmarks() { 399 400 pip_install --user "fbscribelogger" 401 402 TEST_REPORTS_DIR=$(pwd)/test/test-reports 403 mkdir -p "$TEST_REPORTS_DIR" 404 PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_after.txt" "benchmarks/dynamo/pr_time_benchmarks/benchmarks" 405 echo "benchmark results on current PR: " 406 cat "$TEST_REPORTS_DIR/pr_time_benchmarks_after.txt" 407 408} 409 410if [[ "${TEST_CONFIG}" == *pr_time_benchmarks* ]]; then 411 pr_time_benchmarks 412 exit 0 413elif [[ "${TEST_CONFIG}" == *dynamo_eager* ]]; then 414 DYNAMO_BENCHMARK_FLAGS+=(--backend eager) 415elif [[ "${TEST_CONFIG}" == *aot_eager* ]]; then 416 DYNAMO_BENCHMARK_FLAGS+=(--backend aot_eager) 417elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then 418 DYNAMO_BENCHMARK_FLAGS+=(--export-aot-inductor) 419elif [[ "${TEST_CONFIG}" == *inductor* && "${TEST_CONFIG}" != *perf* ]]; then 420 DYNAMO_BENCHMARK_FLAGS+=(--inductor) 421fi 422 423if [[ "${TEST_CONFIG}" == *dynamic* ]]; then 424 DYNAMO_BENCHMARK_FLAGS+=(--dynamic-shapes --dynamic-batch-only) 425fi 426 427if [[ "${TEST_CONFIG}" == *cpu* ]]; then 428 DYNAMO_BENCHMARK_FLAGS+=(--device cpu) 429else 430 DYNAMO_BENCHMARK_FLAGS+=(--device cuda) 431fi 432 433test_perf_for_dashboard() { 434 TEST_REPORTS_DIR=$(pwd)/test/test-reports 435 mkdir -p "$TEST_REPORTS_DIR" 436 437 local suite="$1" 438 shift 439 440 local backend=inductor 441 local modes=() 442 if [[ "$DASHBOARD_TAG" == *training-true* ]]; then 443 modes+=(training) 444 fi 445 if [[ "$DASHBOARD_TAG" == *inference-true* ]]; then 446 modes+=(inference) 447 fi 448 # TODO: All the accuracy tests can be skipped once the CI accuracy checking is stable enough 449 local targets=(accuracy performance) 450 451 local device=cuda 452 if [[ "${TEST_CONFIG}" == *cpu* ]]; then 453 if [[ "${TEST_CONFIG}" == *cpu_x86* ]]; then 454 device=cpu_x86 455 elif [[ "${TEST_CONFIG}" == *cpu_aarch64* ]]; then 456 device=cpu_aarch64 457 fi 458 test_inductor_set_cpu_affinity 459 elif [[ "${TEST_CONFIG}" == *cuda_a10g* ]]; then 460 device=cuda_a10g 461 fi 462 463 for mode in "${modes[@]}"; do 464 if [[ "$mode" == "inference" ]]; then 465 dtype=bfloat16 466 elif [[ "$mode" == "training" ]]; then 467 dtype=amp 468 fi 469 for target in "${targets[@]}"; do 470 local target_flag=("--${target}") 471 if [[ "$target" == "performance" ]]; then 472 target_flag+=( --cold-start-latency) 473 elif [[ "$target" == "accuracy" ]]; then 474 target_flag+=( --no-translation-validation) 475 fi 476 477 if [[ "$DASHBOARD_TAG" == *default-true* ]]; then 478 $TASKSET python "benchmarks/dynamo/$suite.py" \ 479 "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \ 480 --output "$TEST_REPORTS_DIR/${backend}_no_cudagraphs_${suite}_${dtype}_${mode}_${device}_${target}.csv" 481 fi 482 if [[ "$DASHBOARD_TAG" == *cudagraphs-true* ]]; then 483 $TASKSET python "benchmarks/dynamo/$suite.py" \ 484 "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" \ 485 --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_${mode}_${device}_${target}.csv" 486 fi 487 if [[ "$DASHBOARD_TAG" == *dynamic-true* ]]; then 488 $TASKSET python "benchmarks/dynamo/$suite.py" \ 489 "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --dynamic-shapes \ 490 --dynamic-batch-only "$@" \ 491 --output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_${device}_${target}.csv" 492 fi 493 if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]] && [[ "$mode" == "inference" ]]; then 494 TORCHINDUCTOR_CPP_WRAPPER=1 $TASKSET python "benchmarks/dynamo/$suite.py" \ 495 "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \ 496 --output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_${device}_${target}.csv" 497 fi 498 if [[ "$DASHBOARD_TAG" == *freezing_cudagraphs-true* ]] && [[ "$mode" == "inference" ]]; then 499 $TASKSET python "benchmarks/dynamo/$suite.py" \ 500 "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" --freezing \ 501 --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_${suite}_${dtype}_${mode}_${device}_${target}.csv" 502 fi 503 if [[ "$DASHBOARD_TAG" == *freeze_autotune_cudagraphs-true* ]] && [[ "$mode" == "inference" ]]; then 504 TORCHINDUCTOR_MAX_AUTOTUNE=1 $TASKSET python "benchmarks/dynamo/$suite.py" \ 505 "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" --freezing \ 506 --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_autotune_${suite}_${dtype}_${mode}_${device}_${target}.csv" 507 fi 508 if [[ "$DASHBOARD_TAG" == *aotinductor-true* ]] && [[ "$mode" == "inference" ]]; then 509 if [[ "$target" == "accuracy" ]]; then 510 # Also collect Export pass rate and display as a separate row 511 $TASKSET python "benchmarks/dynamo/$suite.py" \ 512 "${target_flag[@]}" --"$mode" --"$dtype" --export --disable-cudagraphs "$@" \ 513 --output "$TEST_REPORTS_DIR/${backend}_export_${suite}_${dtype}_${mode}_${device}_${target}.csv" 514 fi 515 TORCHINDUCTOR_ABI_COMPATIBLE=1 $TASKSET python "benchmarks/dynamo/$suite.py" \ 516 "${target_flag[@]}" --"$mode" --"$dtype" --export-aot-inductor --disable-cudagraphs "$@" \ 517 --output "$TEST_REPORTS_DIR/${backend}_aot_inductor_${suite}_${dtype}_${mode}_${device}_${target}.csv" 518 fi 519 if [[ "$DASHBOARD_TAG" == *maxautotune-true* ]]; then 520 TORCHINDUCTOR_MAX_AUTOTUNE=1 $TASKSET python "benchmarks/dynamo/$suite.py" \ 521 "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" \ 522 --output "$TEST_REPORTS_DIR/${backend}_max_autotune_${suite}_${dtype}_${mode}_${device}_${target}.csv" 523 fi 524 if [[ "$DASHBOARD_TAG" == *cudagraphs_low_precision-true* ]] && [[ "$mode" == "inference" ]]; then 525 # TODO: This has a new dtype called quant and the benchmarks script needs to be updated to support this. 526 # The tentative command is as follows. It doesn't work now, but it's ok because we only need mock data 527 # to fill the dashboard. 528 $TASKSET python "benchmarks/dynamo/$suite.py" \ 529 "${target_flag[@]}" --"$mode" --quant --backend "$backend" "$@" \ 530 --output "$TEST_REPORTS_DIR/${backend}_cudagraphs_low_precision_${suite}_quant_${mode}_${device}_${target}.csv" || true 531 # Copy cudagraph results as mock data, easiest choice? 532 cp "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_${mode}_${device}_${target}.csv" \ 533 "$TEST_REPORTS_DIR/${backend}_cudagraphs_low_precision_${suite}_quant_${mode}_${device}_${target}.csv" 534 fi 535 done 536 done 537} 538 539test_single_dynamo_benchmark() { 540 # Usage: test_single_dynamo_benchmark inductor_inference huggingface 0 --args-for-script 541 542 # Use test-reports directory under test folder will allow the CI to automatically pick up 543 # the test reports and upload them to S3. Need to use full path here otherwise the script 544 # will bark about file not found later on 545 TEST_REPORTS_DIR=$(pwd)/test/test-reports 546 mkdir -p "$TEST_REPORTS_DIR" 547 548 local name="$1" 549 shift 550 local suite="$1" 551 shift 552 # shard id is mandatory, even if it is not passed 553 local shard_id="$1" 554 shift 555 556 local partition_flags=() 557 if [[ -n "$NUM_TEST_SHARDS" && -n "$shard_id" ]]; then 558 partition_flags=( --total-partitions "$NUM_TEST_SHARDS" --partition-id "$shard_id" ) 559 fi 560 561 if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then 562 python "benchmarks/dynamo/$suite.py" \ 563 --ci --performance --disable-cudagraphs --inductor \ 564 "${DYNAMO_BENCHMARK_FLAGS[@]}" "$@" "${partition_flags[@]}" \ 565 --output "$TEST_REPORTS_DIR/${name}_${suite}.csv" 566 elif [[ "${TEST_CONFIG}" == *perf* ]]; then 567 test_perf_for_dashboard "$suite" \ 568 "${DYNAMO_BENCHMARK_FLAGS[@]}" "$@" "${partition_flags[@]}" 569 else 570 if [[ "${TEST_CONFIG}" == *aot_inductor* && "${TEST_CONFIG}" != *cpu_aot_inductor* ]]; then 571 # Test AOTInductor with the ABI-compatible mode on CI 572 # This can be removed once the ABI-compatible mode becomes default. 573 # For CPU device, we perfer non ABI-compatible mode on CI when testing AOTInductor. 574 export TORCHINDUCTOR_ABI_COMPATIBLE=1 575 fi 576 577 if [[ "${TEST_CONFIG}" == *_avx2* ]]; then 578 TEST_CONFIG=${TEST_CONFIG//_avx2/} 579 fi 580 if [[ "${TEST_CONFIG}" == *_avx512* ]]; then 581 TEST_CONFIG=${TEST_CONFIG//_avx512/} 582 fi 583 python "benchmarks/dynamo/$suite.py" \ 584 --ci --accuracy --timing --explain \ 585 "${DYNAMO_BENCHMARK_FLAGS[@]}" \ 586 "$@" "${partition_flags[@]}" \ 587 --output "$TEST_REPORTS_DIR/${name}_${suite}.csv" 588 python benchmarks/dynamo/check_accuracy.py \ 589 --actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \ 590 --expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv" 591 python benchmarks/dynamo/check_graph_breaks.py \ 592 --actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \ 593 --expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv" 594 fi 595} 596 597test_inductor_micro_benchmark() { 598 TEST_REPORTS_DIR=$(pwd)/test/test-reports 599 if [[ "${TEST_CONFIG}" == *cpu* ]]; then 600 test_inductor_set_cpu_affinity 601 fi 602 python benchmarks/gpt_fast/benchmark.py --output "${TEST_REPORTS_DIR}/gpt_fast_benchmark.csv" 603} 604 605test_inductor_halide() { 606 python test/run_test.py --include inductor/test_halide.py --verbose 607 assert_git_not_dirty 608} 609 610test_dynamo_benchmark() { 611 # Usage: test_dynamo_benchmark huggingface 0 612 TEST_REPORTS_DIR=$(pwd)/test/test-reports 613 614 local suite="$1" 615 shift 616 local shard_id="$1" 617 shift 618 619 if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then 620 test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@" 621 elif [[ "${TEST_CONFIG}" == *perf* ]]; then 622 test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@" 623 else 624 if [[ "${TEST_CONFIG}" == *cpu* ]]; then 625 local dt="float32" 626 if [[ "${TEST_CONFIG}" == *amp* ]]; then 627 dt="amp" 628 fi 629 if [[ "${TEST_CONFIG}" == *freezing* ]]; then 630 test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --"$dt" --freezing "$@" 631 else 632 test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --"$dt" "$@" 633 fi 634 elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then 635 test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@" 636 else 637 test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@" 638 test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@" 639 fi 640 fi 641} 642 643test_inductor_torchbench_smoketest_perf() { 644 TEST_REPORTS_DIR=$(pwd)/test/test-reports 645 mkdir -p "$TEST_REPORTS_DIR" 646 647 # Test some models in the cpp wrapper mode 648 TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ 649 --bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" 650 TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ 651 --bfloat16 --inference --inductor --only llama --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" 652 TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ 653 --bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" 654 python benchmarks/dynamo/check_accuracy.py \ 655 --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \ 656 --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv" 657 658 python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \ 659 --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \ 660 --output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" 661 # The threshold value needs to be actively maintained to make this check useful 662 python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4 663 664 TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \ 665 --export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" 666 # The threshold value needs to be actively maintained to make this check useful 667 # The perf number of nanogpt seems not very stable, e.g. 668 # https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314, 669 # and thus we lower its threshold to reduce flakiness. If this continues to be a problem, 670 # we switch to use some other model. 671 python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9 672 673 # Check memory compression ratio for a few models 674 for test in hf_Albert timm_vision_transformer; do 675 python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \ 676 --disable-cudagraphs --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" \ 677 --only $test --output "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv" 678 cat "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv" 679 python benchmarks/dynamo/check_memory_compression_ratio.py --actual \ 680 "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv" \ 681 --expected benchmarks/dynamo/expected_ci_perf_inductor_torchbench.csv 682 done 683 684 # Perform some "warm-start" runs for a few huggingface models. 685 for test in AlbertForQuestionAnswering AllenaiLongformerBase DistilBertForMaskedLM DistillGPT2 GoogleFnet YituTechConvBert; do 686 python benchmarks/dynamo/huggingface.py --accuracy --training --amp --inductor --device cuda --warm-start-latency \ 687 --only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv" 688 python benchmarks/dynamo/check_accuracy.py \ 689 --actual "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv" \ 690 --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv" 691 done 692} 693 694test_inductor_get_core_number() { 695 if [[ "${TEST_CONFIG}" == *aarch64* ]]; then 696 echo "$(($(lscpu | grep 'Cluster(s):' | awk '{print $2}') * $(lscpu | grep 'Core(s) per cluster:' | awk '{print $4}')))" 697 else 698 echo "$(($(lscpu | grep 'Socket(s):' | awk '{print $2}') * $(lscpu | grep 'Core(s) per socket:' | awk '{print $4}')))" 699 fi 700} 701 702test_inductor_set_cpu_affinity(){ 703 #set jemalloc 704 JEMALLOC_LIB="$(find /usr/lib -name libjemalloc.so.2)" 705 export LD_PRELOAD="$JEMALLOC_LIB":"$LD_PRELOAD" 706 export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1" 707 708 if [[ "${TEST_CONFIG}" != *aarch64* ]]; then 709 # Use Intel OpenMP for x86 710 IOMP_LIB="$(dirname "$(which python)")/../lib/libiomp5.so" 711 export LD_PRELOAD="$IOMP_LIB":"$LD_PRELOAD" 712 export KMP_AFFINITY=granularity=fine,compact,1,0 713 export KMP_BLOCKTIME=1 714 fi 715 cores=$(test_inductor_get_core_number) 716 export OMP_NUM_THREADS=$cores 717 end_core=$((cores-1)) 718 export TASKSET="taskset -c 0-$end_core" 719} 720 721test_inductor_torchbench_cpu_smoketest_perf(){ 722 TEST_REPORTS_DIR=$(pwd)/test/test-reports 723 mkdir -p "$TEST_REPORTS_DIR" 724 725 test_inductor_set_cpu_affinity 726 MODELS_SPEEDUP_TARGET=benchmarks/dynamo/expected_ci_speedup_inductor_torchbench_cpu.csv 727 728 grep -v '^ *#' < "$MODELS_SPEEDUP_TARGET" | while IFS=',' read -r -a model_cfg 729 do 730 local model_name=${model_cfg[0]} 731 local data_type=${model_cfg[2]} 732 local speedup_target=${model_cfg[5]} 733 local backend=${model_cfg[1]} 734 if [[ ${model_cfg[4]} == "cpp" ]]; then 735 export TORCHINDUCTOR_CPP_WRAPPER=1 736 else 737 unset TORCHINDUCTOR_CPP_WRAPPER 738 fi 739 local output_name="$TEST_REPORTS_DIR/inductor_inference_${model_cfg[0]}_${model_cfg[1]}_${model_cfg[2]}_${model_cfg[3]}_cpu_smoketest.csv" 740 741 if [[ ${model_cfg[3]} == "dynamic" ]]; then 742 $TASKSET python benchmarks/dynamo/torchbench.py \ 743 --inference --performance --"$data_type" -dcpu -n50 --only "$model_name" --dynamic-shapes \ 744 --dynamic-batch-only --freezing --timeout 9000 --"$backend" --output "$output_name" 745 else 746 $TASKSET python benchmarks/dynamo/torchbench.py \ 747 --inference --performance --"$data_type" -dcpu -n50 --only "$model_name" \ 748 --freezing --timeout 9000 --"$backend" --output "$output_name" 749 fi 750 cat "$output_name" 751 # The threshold value needs to be actively maintained to make this check useful. 752 python benchmarks/dynamo/check_perf_csv.py -f "$output_name" -t "$speedup_target" 753 done 754 755 # Add a few ABI-compatible accuracy tests for CPU. These can be removed once we turn on ABI-compatible as default. 756 TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/timm_models.py --device cpu --accuracy \ 757 --bfloat16 --inference --export-aot-inductor --disable-cudagraphs --only adv_inception_v3 \ 758 --output "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv" 759 TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/timm_models.py --device cpu --accuracy \ 760 --bfloat16 --inference --export-aot-inductor --disable-cudagraphs --only beit_base_patch16_224 \ 761 --output "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv" 762 python benchmarks/dynamo/check_accuracy.py \ 763 --actual "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv" \ 764 --expected "benchmarks/dynamo/ci_expected_accuracy/aot_inductor_timm_inference.csv" 765} 766 767test_torchbench_gcp_smoketest(){ 768 pushd "${TORCHBENCHPATH}" 769 python test.py -v 770 popd 771} 772 773test_python_gloo_with_tls() { 774 source "$(dirname "${BASH_SOURCE[0]}")/run_glootls_test.sh" 775 assert_git_not_dirty 776} 777 778 779test_aten() { 780 # Test ATen 781 # The following test(s) of ATen have already been skipped by caffe2 in rocm environment: 782 # scalar_tensor_test, basic, native_test 783 echo "Running ATen tests with pytorch lib" 784 785 if [[ -n "$IN_WHEEL_TEST" ]]; then 786 echo "Running test with the install folder" 787 # Rename the build folder when running test to ensure it 788 # is not depended on the folder 789 mv "$BUILD_DIR" "$BUILD_RENAMED_DIR" 790 TEST_BASE_DIR="$TORCH_TEST_DIR" 791 else 792 echo "Running test with the build folder" 793 TEST_BASE_DIR="$BUILD_BIN_DIR" 794 fi 795 796 # NB: the ATen test binaries don't have RPATH set, so it's necessary to 797 # put the dynamic libraries somewhere were the dynamic linker can find them. 798 # This is a bit of a hack. 799 ${SUDO} ln -sf "$TORCH_LIB_DIR"/libc10* "$TEST_BASE_DIR" 800 ${SUDO} ln -sf "$TORCH_LIB_DIR"/libcaffe2* "$TEST_BASE_DIR" 801 ${SUDO} ln -sf "$TORCH_LIB_DIR"/libmkldnn* "$TEST_BASE_DIR" 802 ${SUDO} ln -sf "$TORCH_LIB_DIR"/libnccl* "$TEST_BASE_DIR" 803 ${SUDO} ln -sf "$TORCH_LIB_DIR"/libtorch* "$TEST_BASE_DIR" 804 805 ls "$TEST_BASE_DIR" 806 aten/tools/run_tests.sh "$TEST_BASE_DIR" 807 808 if [[ -n "$IN_WHEEL_TEST" ]]; then 809 # Restore the build folder to avoid any impact on other tests 810 mv "$BUILD_RENAMED_DIR" "$BUILD_DIR" 811 fi 812 813 assert_git_not_dirty 814} 815 816test_without_numpy() { 817 pushd "$(dirname "${BASH_SOURCE[0]}")" 818 python -c "import sys;sys.path.insert(0, 'fake_numpy');from unittest import TestCase;import torch;x=torch.randn(3,3);TestCase().assertRaises(RuntimeError, lambda: x.numpy())" 819 # Regression test for https://github.com/pytorch/pytorch/issues/66353 820 python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;print(torch.tensor([torch.tensor(0.), torch.tensor(1.)]))" 821 # Regression test for https://github.com/pytorch/pytorch/issues/109387 822 if [[ "${TEST_CONFIG}" == *dynamo* ]]; then 823 python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;torch.compile(lambda x:print(x))('Hello World')" 824 fi 825 popd 826} 827 828test_libtorch() { 829 local SHARD="$1" 830 831 # The slow test config corresponds to a default test config that should run 832 # the libtorch tests instead. 833 if [[ "$TEST_CONFIG" != "slow" ]]; then 834 echo "Testing libtorch" 835 ln -sf "$TORCH_LIB_DIR"/libbackend_with_compiler.so "$TORCH_BIN_DIR" 836 ln -sf "$TORCH_LIB_DIR"/libjitbackend_test.so "$TORCH_BIN_DIR" 837 ln -sf "$TORCH_LIB_DIR"/libcaffe2_nvrtc.so "$TORCH_BIN_DIR" 838 ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR" 839 ln -sf "$TORCH_LIB_DIR"/libshm* "$TORCH_BIN_DIR" 840 ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR" 841 ln -sf "$TORCH_LIB_DIR"/libnvfuser* "$TORCH_BIN_DIR" 842 843 export CPP_TESTS_DIR="${TORCH_BIN_DIR}" 844 845 if [[ -z "${SHARD}" || "${SHARD}" == "1" ]]; then 846 test_libtorch_api 847 fi 848 849 if [[ -z "${SHARD}" || "${SHARD}" == "2" ]]; then 850 test_libtorch_jit 851 fi 852 853 assert_git_not_dirty 854 fi 855} 856 857test_libtorch_jit() { 858 # Prepare the model used by test_jit, the model needs to be in the test directory 859 # to get picked up by run_test 860 pushd test 861 python cpp/jit/tests_setup.py setup 862 popd 863 864 # Run jit and lazy tensor cpp tests together to finish them faster 865 if [[ "$BUILD_ENVIRONMENT" == *cuda* && "$TEST_CONFIG" != *nogpu* ]]; then 866 LTC_TS_CUDA=1 python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/test_lazy 867 else 868 # CUDA tests have already been skipped when CUDA is not available 869 python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/test_lazy -k "not CUDA" 870 fi 871 872 # Cleaning up test artifacts in the test folder 873 pushd test 874 python cpp/jit/tests_setup.py shutdown 875 popd 876} 877 878test_libtorch_api() { 879 # Start background download 880 MNIST_DIR="${PWD}/test/cpp/api/mnist" 881 python tools/download_mnist.py --quiet -d "${MNIST_DIR}" 882 883 if [[ "$BUILD_ENVIRONMENT" == *asan* || "$BUILD_ENVIRONMENT" == *slow-gradcheck* ]]; then 884 TEST_REPORTS_DIR=test/test-reports/cpp-unittest/test_libtorch 885 mkdir -p $TEST_REPORTS_DIR 886 887 OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" "$TORCH_BIN_DIR"/test_api --gtest_filter='-IMethodTest.*' --gtest_output=xml:$TEST_REPORTS_DIR/test_api.xml 888 "$TORCH_BIN_DIR"/test_tensorexpr --gtest_output=xml:$TEST_REPORTS_DIR/test_tensorexpr.xml 889 else 890 # Exclude IMethodTest that relies on torch::deploy, which will instead be ran in test_deploy 891 OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_api -k "not IMethodTest" 892 python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr 893 fi 894 895 if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* && "${BUILD_ENVIRONMENT}" != *asan* ]]; then 896 # NB: This test is not under TORCH_BIN_DIR but under BUILD_BIN_DIR 897 export CPP_TESTS_DIR="${BUILD_BIN_DIR}" 898 python test/run_test.py --cpp --verbose -i cpp/static_runtime_test 899 fi 900} 901 902test_xpu_bin(){ 903 TEST_REPORTS_DIR=$(pwd)/test/test-reports 904 mkdir -p "$TEST_REPORTS_DIR" 905 906 for xpu_case in "${BUILD_BIN_DIR}"/*{xpu,sycl}*; do 907 if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then 908 case_name=$(basename "$xpu_case") 909 echo "Testing ${case_name} ..." 910 "$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml 911 fi 912 done 913} 914 915test_aot_compilation() { 916 echo "Testing Ahead of Time compilation" 917 ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR" 918 ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR" 919 920 if [ -f "$TORCH_BIN_DIR"/test_mobile_nnc ]; then 921 CPP_TESTS_DIR="${TORCH_BIN_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_mobile_nnc 922 fi 923 924 if [ -f "$TORCH_BIN_DIR"/aot_model_compiler_test ]; then 925 source test/mobile/nnc/test_aot_compile.sh 926 fi 927} 928 929test_vulkan() { 930 if [[ "$BUILD_ENVIRONMENT" == *vulkan* ]]; then 931 ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_TEST_DIR" 932 ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_TEST_DIR" 933 export VK_ICD_FILENAMES=/var/lib/jenkins/swiftshader/swiftshader/build/Linux/vk_swiftshader_icd.json 934 CPP_TESTS_DIR="${TORCH_TEST_DIR}" LD_LIBRARY_PATH=/var/lib/jenkins/swiftshader/swiftshader/build/Linux/ python test/run_test.py --cpp --verbose -i cpp/vulkan_api_test 935 fi 936} 937 938test_distributed() { 939 echo "Testing distributed python tests" 940 # shellcheck disable=SC2086 941 time python test/run_test.py --distributed-tests --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" $INCLUDE_CLAUSE --verbose 942 assert_git_not_dirty 943 944 if [[ ("$BUILD_ENVIRONMENT" == *cuda* || "$BUILD_ENVIRONMENT" == *rocm*) && "$SHARD_NUMBER" == 1 ]]; then 945 echo "Testing distributed C++ tests" 946 ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR" 947 ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR" 948 949 export CPP_TESTS_DIR="${TORCH_BIN_DIR}" 950 # These are distributed tests, so let's continue running them sequentially here to avoid 951 # any surprise 952 python test/run_test.py --cpp --verbose -i cpp/FileStoreTest 953 python test/run_test.py --cpp --verbose -i cpp/HashStoreTest 954 python test/run_test.py --cpp --verbose -i cpp/TCPStoreTest 955 956 if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then 957 MPIEXEC=$(command -v mpiexec) 958 if [[ -n "$MPIEXEC" ]]; then 959 # NB: mpiexec only works directly with the C++ test binary here 960 MPICMD="${MPIEXEC} -np 2 $TORCH_BIN_DIR/ProcessGroupMPITest" 961 eval "$MPICMD" 962 fi 963 964 python test/run_test.py --cpp --verbose -i cpp/ProcessGroupGlooTest 965 python test/run_test.py --cpp --verbose -i cpp/ProcessGroupNCCLTest 966 python test/run_test.py --cpp --verbose -i cpp/ProcessGroupNCCLErrorsTest 967 fi 968 fi 969} 970 971test_rpc() { 972 echo "Testing RPC C++ tests" 973 # NB: the ending test_rpc must match the current function name for the current 974 # test reporting process to function as expected. 975 ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR" 976 ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR" 977 978 CPP_TESTS_DIR="${TORCH_BIN_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_cpp_rpc 979} 980 981test_custom_backend() { 982 echo "Testing custom backends" 983 CUSTOM_BACKEND_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-backend-build" 984 pushd test/custom_backend 985 cp -a "$CUSTOM_BACKEND_BUILD" build 986 # Run tests Python-side and export a lowered module. 987 python test_custom_backend.py -v 988 python backend.py --export-module-to=model.pt 989 # Run tests C++-side and load the exported lowered module. 990 build/test_custom_backend ./model.pt 991 rm -f ./model.pt 992 popd 993 assert_git_not_dirty 994} 995 996test_custom_script_ops() { 997 echo "Testing custom script operators" 998 CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build" 999 pushd test/custom_operator 1000 cp -a "$CUSTOM_OP_BUILD" build 1001 # Run tests Python-side and export a script module. 1002 python test_custom_ops.py -v 1003 python model.py --export-script-module=model.pt 1004 # Run tests C++-side and load the exported script module. 1005 build/test_custom_ops ./model.pt 1006 popd 1007 assert_git_not_dirty 1008} 1009 1010test_jit_hooks() { 1011 echo "Testing jit hooks in cpp" 1012 HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build" 1013 pushd test/jit_hooks 1014 cp -a "$HOOK_BUILD" build 1015 # Run tests Python-side and export the script modules with hooks 1016 python model.py --export-script-module=model 1017 # Run tests C++-side and load the exported script modules 1018 build/test_jit_hooks ./model 1019 popd 1020 assert_git_not_dirty 1021} 1022 1023test_torch_function_benchmark() { 1024 echo "Testing __torch_function__ benchmarks" 1025 pushd benchmarks/overrides_benchmark 1026 python bench.py -n 1 -m 2 1027 python pyspybench.py Tensor -n 1 1028 python pyspybench.py SubTensor -n 1 1029 python pyspybench.py WithTorchFunction -n 1 1030 python pyspybench.py SubWithTorchFunction -n 1 1031 popd 1032 assert_git_not_dirty 1033} 1034 1035build_xla() { 1036 # xla test needs pytorch headers in torch/include 1037 pushd .. 1038 python -c "import os, torch, shutil; shutil.copytree(os.path.join(os.path.dirname(torch.__file__), 'include'), 'workspace/torch/include', dirs_exist_ok=True)" 1039 popd 1040 1041 # xla test needs sccache setup. 1042 # shellcheck source=./common-build.sh 1043 source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh" 1044 1045 XLA_DIR=xla 1046 USE_CACHE=1 1047 clone_pytorch_xla 1048 # shellcheck disable=SC1091 1049 source "xla/.circleci/common.sh" 1050 1051 # TODO: The torch pin #73164 is involved in the sev https://github.com/pytorch/pytorch/issues/86093 1052 # so this is temporarily removed until XLA fixes the weird logic in https://github.com/pytorch/xla/blob/master/scripts/apply_patches.sh#L17-L18 1053 rm "${XLA_DIR}/torch_patches/.torch_pin" || true 1054 1055 apply_patches 1056 SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" 1057 # These functions are defined in .circleci/common.sh in pytorch/xla repo 1058 retry install_deps_pytorch_xla $XLA_DIR $USE_CACHE 1059 CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SANDBOX_BUILD=1 build_torch_xla $XLA_DIR 1060 assert_git_not_dirty 1061} 1062 1063test_xla() { 1064 # xla test needs sccache setup. 1065 # shellcheck source=./common-build.sh 1066 source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh" 1067 1068 clone_pytorch_xla 1069 # shellcheck disable=SC1091 1070 source "./xla/.circleci/common.sh" 1071 SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" 1072 # Set LD_LIBRARY_PATH for C++ tests 1073 export LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}" 1074 CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SKIP_MP_OP_TESTS=1 XLA_SKIP_XLA_OP_TESTS=1 run_torch_xla_tests "$(pwd)" "$(pwd)/xla" 1075 assert_git_not_dirty 1076} 1077 1078function check_public_api_test_fails { 1079 test_name=$1 1080 invalid_item_name=$2 1081 invalid_item_desc=$3 1082 1083 echo "Running public API test '${test_name}'..." 1084 test_output=$(python test/test_public_bindings.py -k "${test_name}" 2>&1) && ret=$? || ret=$? 1085 1086 # Ensure test fails correctly. 1087 if [ "$ret" -eq 0 ]; then 1088 cat << EOF 1089Expected the public API test '${test_name}' to fail after introducing 1090${invalid_item_desc}, but it succeeded! Check test/test_public_bindings.py 1091for any changes that may have broken the test. 1092EOF 1093 return 1 1094 fi 1095 1096 # Ensure invalid item is in the test output. 1097 echo "${test_output}" | grep -q "${invalid_item_name}" && ret=$? || ret=$? 1098 1099 if [ $ret -ne 0 ]; then 1100 cat << EOF 1101Expected the public API test '${test_name}' to identify ${invalid_item_desc}, but 1102it didn't! It's possible the test may not have run. Check test/test_public_bindings.py 1103for any changes that may have broken the test. 1104EOF 1105 return 1 1106 fi 1107 1108 echo "Success! '${test_name}' identified ${invalid_item_desc} ${invalid_item_name}." 1109 return 0 1110} 1111 1112# Do NOT run this test before any other tests, like test_python_shard, etc. 1113# Because this function uninstalls the torch built from branch and installs 1114# the torch built on its base commit. 1115test_forward_backward_compatibility() { 1116 set -x 1117 1118 # First, validate public API tests in the torch built from branch. 1119 # Step 1. Make sure the public API test "test_correct_module_names" fails when a new file 1120 # introduces an invalid public API function. 1121 new_filename=$(mktemp XXXXXXXX.py -p "${TORCH_INSTALL_DIR}") 1122 1123 BAD_PUBLIC_FUNC=$( 1124 cat << 'EOF' 1125def new_public_func(): 1126 pass 1127 1128# valid public API functions have __module__ set correctly 1129new_public_func.__module__ = None 1130EOF 1131 ) 1132 1133 echo "${BAD_PUBLIC_FUNC}" >> "${new_filename}" 1134 invalid_api="torch.$(basename -s '.py' "${new_filename}").new_public_func" 1135 echo "Created an invalid public API function ${invalid_api}..." 1136 1137 check_public_api_test_fails \ 1138 "test_correct_module_names" \ 1139 "${invalid_api}" \ 1140 "an invalid public API function" && ret=$? || ret=$? 1141 1142 rm -v "${new_filename}" 1143 1144 if [ "$ret" -ne 0 ]; then 1145 exit 1 1146 fi 1147 1148 # Step 2. Make sure that the public API test "test_correct_module_names" fails when an existing 1149 # file is modified to introduce an invalid public API function. 1150 EXISTING_FILEPATH="${TORCH_INSTALL_DIR}/nn/parameter.py" 1151 cp -v "${EXISTING_FILEPATH}" "${EXISTING_FILEPATH}.orig" 1152 echo "${BAD_PUBLIC_FUNC}" >> "${EXISTING_FILEPATH}" 1153 invalid_api="torch.nn.parameter.new_public_func" 1154 echo "Appended an invalid public API function to existing file ${EXISTING_FILEPATH}..." 1155 1156 check_public_api_test_fails \ 1157 "test_correct_module_names" \ 1158 "${invalid_api}" \ 1159 "an invalid public API function" && ret=$? || ret=$? 1160 1161 mv -v "${EXISTING_FILEPATH}.orig" "${EXISTING_FILEPATH}" 1162 1163 if [ "$ret" -ne 0 ]; then 1164 exit 1 1165 fi 1166 1167 # Step 3. Make sure that the public API test "test_modules_can_be_imported" fails when a module 1168 # cannot be imported. 1169 new_module_dir=$(mktemp XXXXXXXX -d -p "${TORCH_INSTALL_DIR}") 1170 echo "invalid syntax garbage" > "${new_module_dir}/__init__.py" 1171 invalid_module_name="torch.$(basename "${new_module_dir}")" 1172 1173 check_public_api_test_fails \ 1174 "test_modules_can_be_imported" \ 1175 "${invalid_module_name}" \ 1176 "a non-importable module" && ret=$? || ret=$? 1177 1178 rm -rv "${new_module_dir}" 1179 1180 if [ "$ret" -ne 0 ]; then 1181 exit 1 1182 fi 1183 1184 # Next, build torch from the merge base. 1185 REPO_DIR=$(pwd) 1186 if [[ "${BASE_SHA}" == "${SHA1}" ]]; then 1187 echo "On trunk, we should compare schemas with torch built from the parent commit" 1188 SHA_TO_COMPARE=$(git rev-parse "${SHA1}"^) 1189 else 1190 echo "On pull, we should compare schemas with torch built from the merge base" 1191 SHA_TO_COMPARE=$(git merge-base "${SHA1}" "${BASE_SHA}") 1192 fi 1193 export SHA_TO_COMPARE 1194 1195 # create a dummy ts model at this version 1196 python test/create_dummy_torchscript_model.py /tmp/model_new.pt 1197 python -m venv venv 1198 # shellcheck disable=SC1091 1199 . venv/bin/activate 1200 1201 # build torch at the base commit to generate a base function schema for comparison 1202 git reset --hard "${SHA_TO_COMPARE}" 1203 git submodule sync && git submodule update --init --recursive 1204 echo "::group::Installing Torch From Base Commit" 1205 pip install -r requirements.txt 1206 # shellcheck source=./common-build.sh 1207 source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh" 1208 python setup.py bdist_wheel --bdist-dir="base_bdist_tmp" --dist-dir="base_dist" 1209 python -mpip install base_dist/*.whl 1210 echo "::endgroup::" 1211 1212 pushd test/forward_backward_compatibility 1213 pip show torch 1214 python dump_all_function_schemas.py --filename nightly_schemas.txt 1215 1216 git reset --hard "${SHA1}" 1217 git submodule sync && git submodule update --init --recursive 1218 # FC: verify new model can be load with old code. 1219 if ! python ../load_torchscript_model.py /tmp/model_new.pt; then 1220 echo "FC check failed: new model cannot be load in old code" 1221 return 1 1222 fi 1223 python ../create_dummy_torchscript_model.py /tmp/model_old.pt 1224 deactivate 1225 rm -r "${REPO_DIR}/venv" "${REPO_DIR}/base_dist" 1226 pip show torch 1227 python check_forward_backward_compatibility.py --existing-schemas nightly_schemas.txt 1228 # BC: verify old model can be load with new code 1229 if ! python ../load_torchscript_model.py /tmp/model_old.pt; then 1230 echo "BC check failed: old model cannot be load in new code" 1231 return 1 1232 fi 1233 popd 1234 set +x 1235 assert_git_not_dirty 1236} 1237 1238test_bazel() { 1239 set -e 1240 1241 # bazel test needs sccache setup. 1242 # shellcheck source=./common-build.sh 1243 source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh" 1244 1245 get_bazel 1246 1247 if [[ "$CUDA_VERSION" == "cpu" ]]; then 1248 # Test //c10/... without Google flags and logging libraries. The 1249 # :all_tests target in the subsequent Bazel invocation tests 1250 # //c10/... with the Google libraries. 1251 tools/bazel test --config=cpu-only --test_timeout=480 --test_output=all --test_tag_filters=-gpu-required --test_filter=-*CUDA \ 1252 --no//c10:use_gflags --no//c10:use_glog //c10/... 1253 1254 tools/bazel test --config=cpu-only --test_timeout=480 --test_output=all --test_tag_filters=-gpu-required --test_filter=-*CUDA :all_tests 1255 else 1256 # Increase the test timeout to 480 like CPU tests because modules_test frequently timeout 1257 tools/bazel test --test_timeout=480 --test_output=errors \ 1258 //:any_test \ 1259 //:autograd_test \ 1260 //:dataloader_test \ 1261 //:dispatch_test \ 1262 //:enum_test \ 1263 //:expanding_array_test \ 1264 //:fft_test \ 1265 //:functional_test \ 1266 //:grad_mode_test \ 1267 //:inference_mode_test \ 1268 //:init_test \ 1269 //:jit_test \ 1270 //:memory_test \ 1271 //:meta_tensor_test \ 1272 //:misc_test \ 1273 //:moduledict_test \ 1274 //:modulelist_test \ 1275 //:modules_test \ 1276 //:namespace_test \ 1277 //:nested_test \ 1278 //:nn_utils_test \ 1279 //:operations_test \ 1280 //:ordered_dict_test \ 1281 //:parallel_benchmark_test \ 1282 //:parameterdict_test \ 1283 //:parameterlist_test \ 1284 //:sequential_test \ 1285 //:serialize_test \ 1286 //:special_test \ 1287 //:static_test \ 1288 //:support_test \ 1289 //:tensor_flatten_test \ 1290 //:tensor_indexing_test \ 1291 //:tensor_options_cuda_test \ 1292 //:tensor_options_test \ 1293 //:tensor_test \ 1294 //:torch_dist_autograd_test \ 1295 //:torch_include_test \ 1296 //:transformer_test \ 1297 //:test_bazel \ 1298 //c10/cuda/test:test \ 1299 //c10/test:core_tests \ 1300 //c10/test:typeid_test \ 1301 //c10/test:util/ssize_test \ 1302 //c10/test:util_base_tests 1303 fi 1304} 1305 1306test_benchmarks() { 1307 if [[ "$BUILD_ENVIRONMENT" == *cuda* && $TEST_CONFIG != *nogpu* ]]; then 1308 pip_install --user "pytest-benchmark==3.2.3" 1309 pip_install --user "requests" 1310 BENCHMARK_DATA="benchmarks/.data" 1311 mkdir -p ${BENCHMARK_DATA} 1312 pytest benchmarks/fastrnns/test_bench.py --benchmark-sort=Name --benchmark-json=${BENCHMARK_DATA}/fastrnns_default.json --fuser=default --executor=default 1313 pytest benchmarks/fastrnns/test_bench.py --benchmark-sort=Name --benchmark-json=${BENCHMARK_DATA}/fastrnns_legacy_old.json --fuser=old --executor=legacy 1314 pytest benchmarks/fastrnns/test_bench.py --benchmark-sort=Name --benchmark-json=${BENCHMARK_DATA}/fastrnns_profiling_te.json --fuser=te --executor=profiling 1315 # TODO: Enable these for GHA once we have credentials for forked pull requests 1316 if [[ -z "${GITHUB_ACTIONS}" ]]; then 1317 python benchmarks/upload_scribe.py --pytest_bench_json ${BENCHMARK_DATA}/fastrnns_default.json 1318 python benchmarks/upload_scribe.py --pytest_bench_json ${BENCHMARK_DATA}/fastrnns_legacy_old.json 1319 python benchmarks/upload_scribe.py --pytest_bench_json ${BENCHMARK_DATA}/fastrnns_profiling_te.json 1320 fi 1321 assert_git_not_dirty 1322 fi 1323} 1324 1325test_cpp_extensions() { 1326 # This is to test whether cpp extension build is compatible with current env. No need to test both ninja and no-ninja build 1327 time python test/run_test.py --include test_cpp_extensions_aot_ninja --verbose 1328 assert_git_not_dirty 1329} 1330 1331test_vec256() { 1332 # This is to test vec256 instructions DEFAULT/AVX/AVX2 (platform dependent, some platforms might not support AVX/AVX2) 1333 if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then 1334 echo "Testing vec256 instructions" 1335 mkdir -p test/test-reports/vec256 1336 pushd build/bin 1337 vec256_tests=$(find . -maxdepth 1 -executable -name 'vec256_test*') 1338 for vec256_exec in $vec256_tests 1339 do 1340 $vec256_exec --gtest_output=xml:test/test-reports/vec256/"$vec256_exec".xml 1341 done 1342 popd 1343 assert_git_not_dirty 1344 fi 1345} 1346 1347test_docs_test() { 1348 .ci/pytorch/docs-test.sh 1349} 1350 1351test_executorch() { 1352 echo "Install torchvision and torchaudio" 1353 install_torchvision 1354 install_torchaudio 1355 1356 pushd /executorch 1357 1358 export PYTHON_EXECUTABLE=python 1359 export EXECUTORCH_BUILD_PYBIND=ON 1360 export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" 1361 1362 # NB: We need to rebuild ExecuTorch runner here because it depends on PyTorch 1363 # from the PR 1364 # shellcheck disable=SC1091 1365 source .ci/scripts/setup-linux.sh cmake 1366 1367 echo "Run ExecuTorch unit tests" 1368 pytest -v -n auto 1369 # shellcheck disable=SC1091 1370 LLVM_PROFDATA=llvm-profdata-12 LLVM_COV=llvm-cov-12 bash test/run_oss_cpp_tests.sh 1371 1372 echo "Run ExecuTorch regression tests for some models" 1373 # TODO(huydhn): Add more coverage here using ExecuTorch's gather models script 1374 # shellcheck disable=SC1091 1375 source .ci/scripts/test.sh mv3 cmake xnnpack-quantization-delegation '' 1376 1377 popd 1378 1379 # Test torchgen generated code for Executorch. 1380 echo "Testing ExecuTorch op registration" 1381 "$BUILD_BIN_DIR"/test_edge_op_registration 1382 1383 assert_git_not_dirty 1384} 1385 1386test_linux_aarch64(){ 1387 python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \ 1388 test_transformers test_multiprocessing test_numpy_interop --verbose 1389 1390 # Dynamo tests 1391 python test/run_test.py --include dynamo/test_compile dynamo/test_backends dynamo/test_comptime dynamo/test_config \ 1392 dynamo/test_functions dynamo/test_fx_passes_pre_grad dynamo/test_interop dynamo/test_model_output dynamo/test_modules \ 1393 dynamo/test_optimizers dynamo/test_recompile_ux dynamo/test_recompiles --verbose 1394 1395 # Inductor tests 1396 python test/run_test.py --include inductor/test_torchinductor inductor/test_benchmark_fusion inductor/test_codecache \ 1397 inductor/test_config inductor/test_control_flow inductor/test_coordinate_descent_tuner inductor/test_fx_fusion \ 1398 inductor/test_group_batch_fusion inductor/test_inductor_freezing inductor/test_inductor_utils \ 1399 inductor/test_inplacing_pass inductor/test_kernel_benchmark inductor/test_layout_optim \ 1400 inductor/test_max_autotune inductor/test_memory_planning inductor/test_metrics inductor/test_multi_kernel inductor/test_pad_mm \ 1401 inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \ 1402 inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \ 1403 inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes --verbose 1404} 1405 1406if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then 1407 (cd test && python -c "import torch; print(torch.__config__.show())") 1408 (cd test && python -c "import torch; print(torch.__config__.parallel_info())") 1409fi 1410if [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" != *perf_cpu_aarch64* ]]; then 1411 test_linux_aarch64 1412elif [[ "${TEST_CONFIG}" == *backward* ]]; then 1413 test_forward_backward_compatibility 1414 # Do NOT add tests after bc check tests, see its comment. 1415elif [[ "${TEST_CONFIG}" == *xla* ]]; then 1416 install_torchvision 1417 build_xla 1418 test_xla 1419elif [[ "${TEST_CONFIG}" == *executorch* ]]; then 1420 test_executorch 1421elif [[ "$TEST_CONFIG" == 'jit_legacy' ]]; then 1422 test_python_legacy_jit 1423elif [[ "${BUILD_ENVIRONMENT}" == *libtorch* ]]; then 1424 # TODO: run some C++ tests 1425 echo "no-op at the moment" 1426elif [[ "$TEST_CONFIG" == distributed ]]; then 1427 test_distributed 1428 # Only run RPC C++ tests on the first shard 1429 if [[ "${SHARD_NUMBER}" == 1 ]]; then 1430 test_rpc 1431 fi 1432elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then 1433 test_inductor_distributed 1434elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then 1435 test_inductor_halide 1436elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then 1437 test_inductor_micro_benchmark 1438elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then 1439 install_torchvision 1440 id=$((SHARD_NUMBER-1)) 1441 test_dynamo_benchmark huggingface "$id" 1442elif [[ "${TEST_CONFIG}" == *timm* ]]; then 1443 install_torchvision 1444 id=$((SHARD_NUMBER-1)) 1445 test_dynamo_benchmark timm_models "$id" 1446elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then 1447 if [[ "${TEST_CONFIG}" == *cpu* ]]; then 1448 install_torchaudio cpu 1449 else 1450 install_torchaudio cuda 1451 fi 1452 install_torchtext 1453 install_torchvision 1454 TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install git+https://github.com/pytorch/ao.git 1455 id=$((SHARD_NUMBER-1)) 1456 # https://github.com/opencv/opencv-python/issues/885 1457 pip_install opencv-python==4.8.0.74 1458 if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then 1459 checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer 1460 PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf 1461 elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then 1462 checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \ 1463 llama_v2_7b_16h resnet50 timm_efficientnet mobilenet_v3_large timm_resnest \ 1464 functorch_maml_omniglot yolov3 mobilenet_v2 resnext50_32x4d densenet121 mnasnet1_0 1465 PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_cpu_smoketest_perf 1466 elif [[ "${TEST_CONFIG}" == *torchbench_gcp_smoketest* ]]; then 1467 checkout_install_torchbench 1468 TORCHBENCHPATH=$(pwd)/torchbench test_torchbench_gcp_smoketest 1469 else 1470 checkout_install_torchbench 1471 # Do this after checkout_install_torchbench to ensure we clobber any 1472 # nightlies that torchbench may pull in 1473 if [[ "${TEST_CONFIG}" != *cpu* ]]; then 1474 install_torchrec_and_fbgemm 1475 fi 1476 PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id" 1477 fi 1478elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper_abi_compatible* ]]; then 1479 install_torchvision 1480 test_inductor_cpp_wrapper_abi_compatible 1481elif [[ "${TEST_CONFIG}" == *inductor* ]]; then 1482 install_torchvision 1483 test_inductor_shard "${SHARD_NUMBER}" 1484 if [[ "${SHARD_NUMBER}" == 1 ]]; then 1485 if [[ "${BUILD_ENVIRONMENT}" != linux-jammy-py3.9-gcc11-build ]]; then 1486 test_inductor_distributed 1487 fi 1488 fi 1489elif [[ "${TEST_CONFIG}" == *dynamo* ]]; then 1490 install_torchvision 1491 test_dynamo_shard "${SHARD_NUMBER}" 1492 if [[ "${SHARD_NUMBER}" == 1 ]]; then 1493 test_aten 1494 fi 1495elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then 1496 install_torchvision 1497 test_python_shard "$SHARD_NUMBER" 1498 test_aten 1499elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then 1500 test_without_numpy 1501 install_torchvision 1502 test_python_shard 1 1503 test_aten 1504 test_libtorch 1 1505 if [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then 1506 test_xpu_bin 1507 fi 1508elif [[ "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then 1509 install_torchvision 1510 test_python_shard 2 1511 test_libtorch 2 1512 test_aot_compilation 1513 test_custom_script_ops 1514 test_custom_backend 1515 test_torch_function_benchmark 1516elif [[ "${SHARD_NUMBER}" -gt 2 ]]; then 1517 # Handle arbitrary number of shards 1518 install_torchvision 1519 test_python_shard "$SHARD_NUMBER" 1520elif [[ "${BUILD_ENVIRONMENT}" == *vulkan* ]]; then 1521 test_vulkan 1522elif [[ "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then 1523 test_bazel 1524elif [[ "${BUILD_ENVIRONMENT}" == *-mobile-lightweight-dispatch* ]]; then 1525 test_libtorch 1526elif [[ "${TEST_CONFIG}" = docs_test ]]; then 1527 test_docs_test 1528elif [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then 1529 install_torchvision 1530 test_python 1531 test_aten 1532 test_xpu_bin 1533else 1534 install_torchvision 1535 install_monkeytype 1536 test_python 1537 test_aten 1538 test_vec256 1539 test_libtorch 1540 test_aot_compilation 1541 test_custom_script_ops 1542 test_custom_backend 1543 test_torch_function_benchmark 1544 test_benchmarks 1545fi 1546